diff --git a/.gitignore b/.gitignore index 86ecd07f5d5..d278c3de9c1 100644 --- a/.gitignore +++ b/.gitignore @@ -62,4 +62,4 @@ docker/test/report_*.html kafka.Kafka __pycache__ -_data/ \ No newline at end of file +_data/ diff --git a/LICENSE-binary b/LICENSE-binary index 030f62b9675..b07b5e0472e 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -206,13 +206,12 @@ This project bundles some components that are also licensed under the Apache License Version 2.0: - caffeine-3.1.1 -- commons-beanutils-1.9.4 +- commons-beanutils-1.11.0 - commons-collections-3.2.2 - commons-digester-2.1 -- commons-lang3-3.12.0 -- commons-logging-1.3.2 +- commons-lang3-3.18.0 +- commons-logging-1.3.5 - commons-validator-1.9.0 -- error_prone_annotations-2.14.0 - jackson-annotations-2.16.2 - jackson-core-2.16.2 - jackson-databind-2.16.2 diff --git a/bin/kafka-run-class.sh b/bin/kafka-run-class.sh index 8bd1b17623b..3463389d3c0 100755 --- a/bin/kafka-run-class.sh +++ b/bin/kafka-run-class.sh @@ -225,7 +225,7 @@ if [ -z "$KAFKA_LOG4J_OPTS" ]; then (( WINDOWS_OS_FORMAT )) && LOG4J_DIR=$(cygpath --path --mixed "${LOG4J_DIR}") KAFKA_LOG4J_OPTS="-Dlog4j2.configurationFile=${LOG4J_DIR}" else - if echo "$KAFKA_LOG4J_OPTS" | grep -E "log4j\.[^[:space:]]+(\.properties|\.xml)$"; then + if echo "$KAFKA_LOG4J_OPTS" | grep -E "log4j\.[^[:space:]]+(\.properties|\.xml)$" >/dev/null; then # Enable Log4j 1.x configuration compatibility mode for Log4j 2 export LOG4J_COMPATIBILITY=true echo DEPRECATED: A Log4j 1.x configuration file has been detected, which is no longer recommended. >&2 diff --git a/build.gradle b/build.gradle index 0ec70f4987d..aa0d51a8020 100644 --- a/build.gradle +++ b/build.gradle @@ -200,7 +200,10 @@ allprojects { // ensure we have a single version in the classpath despite transitive dependencies libs.scalaLibrary, libs.scalaReflect, - libs.jacksonAnnotations + // Workaround before `commons-validator` has new release. See KAFKA-19359. + libs.commonsBeanutils, + libs.jacksonAnnotations, + libs.commonsLang ) } } @@ -1130,6 +1133,7 @@ project(':core') { testImplementation project(':test-common:test-common-util') testImplementation libs.bcpkix testImplementation libs.mockitoCore + testImplementation libs.jqwik testImplementation(libs.apacheda) { exclude group: 'xml-apis', module: 'xml-apis' // `mina-core` is a transitive dependency for `apacheds` and `apacheda`. @@ -1336,6 +1340,12 @@ project(':core') { ) } + test { + useJUnitPlatform { + includeEngines 'jqwik', 'junit-jupiter' + } + } + tasks.create(name: "copyDependantTestLibs", type: Copy) { from (configurations.testRuntimeClasspath) { include('*.jar') @@ -1903,6 +1913,7 @@ project(':clients') { testImplementation libs.jacksonJakartarsJsonProvider testImplementation libs.jose4j testImplementation libs.junitJupiter + testImplementation libs.jqwik testImplementation libs.spotbugs testImplementation libs.mockitoCore testImplementation libs.mockitoJunitJupiter // supports MockitoExtension @@ -2291,6 +2302,7 @@ project(':storage') { implementation project(':transaction-coordinator') implementation(libs.caffeine) { exclude group: 'org.checkerframework', module: 'checker-qual' + exclude group: 'com.google.errorprone', module: 'error_prone_annotations' } implementation libs.slf4jApi implementation libs.jacksonDatabind diff --git a/checkstyle/import-control.xml b/checkstyle/import-control.xml index dc674ab997a..ab6ebff7433 100644 --- a/checkstyle/import-control.xml +++ b/checkstyle/import-control.xml @@ -194,6 +194,8 @@ + + diff --git a/checkstyle/suppressions.xml b/checkstyle/suppressions.xml index dcc4c0813f8..f848a0ab5e8 100644 --- a/checkstyle/suppressions.xml +++ b/checkstyle/suppressions.xml @@ -105,7 +105,7 @@ files="(AbstractRequest|AbstractResponse|KerberosLogin|WorkerSinkTaskTest|TransactionManagerTest|SenderTest|KafkaAdminClient|ConsumerCoordinatorTest|KafkaAdminClientTest).java"/> + files="(AbstractMembershipManager|ConsumerCoordinator|BufferPool|MetricName|Node|ConfigDef|RecordBatch|SslFactory|SslTransportLayer|MetadataResponse|KerberosLogin|Selector|Sender|Serdes|TokenInformation|Agent|PluginUtils|MiniTrogdorCluster|TasksRequest|KafkaProducer|AbstractStickyAssignor|Authorizer|FetchSessionHandler|RecordAccumulator|Shell|MockConsumer).java"/> diff --git a/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java b/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java index dc3164993b8..12e122b123a 100644 --- a/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java +++ b/clients/src/main/java/org/apache/kafka/clients/admin/KafkaAdminClient.java @@ -577,10 +577,12 @@ static KafkaAdminClient createInternal(AdminClientConfig config, Time time) { Metrics metrics = null; String clientId = generateClientId(config); + List reporters = CommonClientConfigs.metricsReporters(clientId, config); Optional clientTelemetryReporter = CommonClientConfigs.telemetryReporter(clientId, config); + clientTelemetryReporter.ifPresent(reporters::add); try { - metrics = new Metrics(new MetricConfig(), new LinkedList<>(), time); + metrics = new Metrics(new MetricConfig(), reporters, time); LogContext logContext = createLogContext(clientId); return new KafkaAdminClient(config, clientId, time, metadataManager, metrics, client, null, logContext, clientTelemetryReporter); @@ -625,9 +627,7 @@ private KafkaAdminClient(AdminClientConfig config, CommonClientConfigs.RETRY_BACKOFF_EXP_BASE, retryBackoffMaxMs, CommonClientConfigs.RETRY_BACKOFF_JITTER); - List reporters = CommonClientConfigs.metricsReporters(this.clientId, config); this.clientTelemetryReporter = clientTelemetryReporter; - this.clientTelemetryReporter.ifPresent(reporters::add); this.metadataRecoveryStrategy = MetadataRecoveryStrategy.forName(config.getString(AdminClientConfig.METADATA_RECOVERY_STRATEGY_CONFIG)); this.partitionLeaderCache = new HashMap<>(); this.adminFetchMetricsManager = new AdminFetchMetricsManager(metrics); diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java index b9e69806694..c92ce6ec19d 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/MockConsumer.java @@ -36,6 +36,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -79,6 +80,8 @@ public class MockConsumer implements Consumer { private Uuid clientInstanceId; private int injectTimeoutExceptionCounter; + private long maxPollRecords = Long.MAX_VALUE; + private final List addedMetrics = new ArrayList<>(); /** @@ -275,14 +278,22 @@ public synchronized ConsumerRecords poll(final Duration timeout) { // update the consumed offset final Map>> results = new HashMap<>(); final Map nextOffsetAndMetadata = new HashMap<>(); - final List toClear = new ArrayList<>(); + long numPollRecords = 0L; + + final Iterator>>> partitionsIter = this.records.entrySet().iterator(); + while (partitionsIter.hasNext() && numPollRecords < this.maxPollRecords) { + Map.Entry>> entry = partitionsIter.next(); - for (Map.Entry>> entry : this.records.entrySet()) { if (!subscriptions.isPaused(entry.getKey())) { - final List> recs = entry.getValue(); - for (final ConsumerRecord rec : recs) { + final Iterator> recIterator = entry.getValue().iterator(); + while (recIterator.hasNext()) { + if (numPollRecords >= this.maxPollRecords) { + break; + } long position = subscriptions.position(entry.getKey()).offset; + final ConsumerRecord rec = recIterator.next(); + if (beginningOffsets.get(entry.getKey()) != null && beginningOffsets.get(entry.getKey()) > position) { throw new OffsetOutOfRangeException(Collections.singletonMap(entry.getKey(), position)); } @@ -294,13 +305,17 @@ public synchronized ConsumerRecords poll(final Duration timeout) { rec.offset() + 1, rec.leaderEpoch(), leaderAndEpoch); subscriptions.position(entry.getKey(), newPosition); nextOffsetAndMetadata.put(entry.getKey(), new OffsetAndMetadata(rec.offset() + 1, rec.leaderEpoch(), "")); + numPollRecords++; + recIterator.remove(); } } - toClear.add(entry.getKey()); + + if (entry.getValue().isEmpty()) { + partitionsIter.remove(); + } } } - toClear.forEach(records::remove); return new ConsumerRecords<>(results, nextOffsetAndMetadata); } @@ -314,6 +329,18 @@ public synchronized void addRecord(ConsumerRecord record) { recs.add(record); } + /** + * Sets the maximum number of records returned in a single call to {@link #poll(Duration)}. + * + * @param maxPollRecords the max.poll.records. + */ + public synchronized void setMaxPollRecords(long maxPollRecords) { + if (this.maxPollRecords < 1) { + throw new IllegalArgumentException("MaxPollRecords must be strictly superior to 0"); + } + this.maxPollRecords = maxPollRecords; + } + public synchronized void setPollException(KafkaException exception) { this.pollException = exception; } diff --git a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java index 584a03736f9..8d54c871bb9 100644 --- a/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java +++ b/clients/src/main/java/org/apache/kafka/clients/consumer/internals/ConsumerCoordinator.java @@ -1263,23 +1263,25 @@ RequestFuture sendOffsetCommitRequest(final Mapremote.log.delete.on.disable to true."; public static final String LOCAL_LOG_RETENTION_MS_CONFIG = "local.retention.ms"; public static final String LOCAL_LOG_RETENTION_MS_DOC = "The number of milliseconds to keep the local log segment before it gets deleted. " + diff --git a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java index 237948f61c9..c23aa1782d6 100644 --- a/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java +++ b/clients/src/main/java/org/apache/kafka/common/protocol/Protocol.java @@ -208,7 +208,7 @@ public static String toHtml() { // Responses b.append("Responses:
\n"); Schema[] responses = key.messageType.responseSchemas(); - for (int version = key.oldestVersion(); version < key.latestVersion(); version++) { + for (int version = key.oldestVersion(); version <= key.latestVersion(); version++) { Schema schema = responses[version]; if (schema == null) throw new IllegalStateException("Unexpected null schema for " + key + " with version " + version); diff --git a/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java b/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java index 912c3490f43..d6e9cc6bd7f 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java +++ b/clients/src/main/java/org/apache/kafka/common/record/DefaultRecordBatch.java @@ -159,7 +159,7 @@ public void ensureValid() { /** * Gets the base timestamp of the batch which is used to calculate the record timestamps from the deltas. - * + * * @return The base timestamp */ public long baseTimestamp() { @@ -502,6 +502,7 @@ public static void writeHeader(ByteBuffer buffer, public String toString() { return "RecordBatch(magic=" + magic() + ", offsets=[" + baseOffset() + ", " + lastOffset() + "], " + "sequence=[" + baseSequence() + ", " + lastSequence() + "], " + + "partitionLeaderEpoch=" + partitionLeaderEpoch() + ", " + "isTransactional=" + isTransactional() + ", isControlBatch=" + isControlBatch() + ", " + "compression=" + compressionType() + ", timestampType=" + timestampType() + ", crc=" + checksum() + ")"; } diff --git a/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java b/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java index 64dd73de412..ba5cb556cef 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java +++ b/clients/src/main/java/org/apache/kafka/common/record/FileRecords.java @@ -201,6 +201,10 @@ public void flush() throws IOException { * Close this record set */ public void close() throws IOException { + if (!channel.isOpen()) { + return; + } + flush(); trim(); channel.close(); diff --git a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java index 3aee889aded..17334f89c2a 100644 --- a/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java +++ b/clients/src/main/java/org/apache/kafka/common/record/MemoryRecords.java @@ -33,9 +33,6 @@ import org.apache.kafka.common.utils.Time; import org.apache.kafka.common.utils.Utils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.GatheringByteChannel; @@ -50,7 +47,6 @@ * or one of the {@link #builder(ByteBuffer, byte, Compression, TimestampType, long)} variants. */ public class MemoryRecords extends AbstractRecords { - private static final Logger log = LoggerFactory.getLogger(MemoryRecords.class); public static final MemoryRecords EMPTY = MemoryRecords.readableRecords(ByteBuffer.allocate(0)); private final ByteBuffer buffer; @@ -602,7 +598,7 @@ public static MemoryRecords withRecords(byte magic, long initialOffset, Compress return withRecords(magic, initialOffset, compression, TimestampType.CREATE_TIME, records); } - public static MemoryRecords withRecords(long initialOffset, Compression compression, Integer partitionLeaderEpoch, SimpleRecord... records) { + public static MemoryRecords withRecords(long initialOffset, Compression compression, int partitionLeaderEpoch, SimpleRecord... records) { return withRecords(RecordBatch.CURRENT_MAGIC_VALUE, initialOffset, compression, TimestampType.CREATE_TIME, RecordBatch.NO_PRODUCER_ID, RecordBatch.NO_PRODUCER_EPOCH, RecordBatch.NO_SEQUENCE, partitionLeaderEpoch, false, records); } diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java index 38da78efb1b..4ae02c5145c 100644 --- a/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java +++ b/clients/src/main/java/org/apache/kafka/common/requests/AbstractRequest.java @@ -136,7 +136,7 @@ public String toString(boolean verbose) { } @Override - public final String toString() { + public String toString() { return toString(true); } diff --git a/clients/src/main/java/org/apache/kafka/common/requests/AlterUserScramCredentialsRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/AlterUserScramCredentialsRequest.java index 1ca7ea77aa4..c779f6d9a84 100644 --- a/clients/src/main/java/org/apache/kafka/common/requests/AlterUserScramCredentialsRequest.java +++ b/clients/src/main/java/org/apache/kafka/common/requests/AlterUserScramCredentialsRequest.java @@ -17,10 +17,14 @@ package org.apache.kafka.common.requests; import org.apache.kafka.common.message.AlterUserScramCredentialsRequestData; +import org.apache.kafka.common.message.AlterUserScramCredentialsRequestDataJsonConverter; import org.apache.kafka.common.message.AlterUserScramCredentialsResponseData; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.ByteBufferAccessor; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + import java.nio.ByteBuffer; import java.util.List; import java.util.Set; @@ -82,4 +86,16 @@ public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) { .collect(Collectors.toList()); return new AlterUserScramCredentialsResponse(new AlterUserScramCredentialsResponseData().setResults(results)); } + + // Do not print salt or saltedPassword + @Override + public String toString() { + JsonNode json = AlterUserScramCredentialsRequestDataJsonConverter.write(data, version()).deepCopy(); + + for (JsonNode upsertion : json.get("upsertions")) { + ((ObjectNode) upsertion).put("salt", ""); + ((ObjectNode) upsertion).put("saltedPassword", ""); + } + return AlterUserScramCredentialsRequestDataJsonConverter.read(json, version()).toString(); + } } diff --git a/clients/src/main/java/org/apache/kafka/common/requests/IncrementalAlterConfigsRequest.java b/clients/src/main/java/org/apache/kafka/common/requests/IncrementalAlterConfigsRequest.java index 222097502b2..54540837756 100644 --- a/clients/src/main/java/org/apache/kafka/common/requests/IncrementalAlterConfigsRequest.java +++ b/clients/src/main/java/org/apache/kafka/common/requests/IncrementalAlterConfigsRequest.java @@ -21,11 +21,15 @@ import org.apache.kafka.common.config.ConfigResource; import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData; import org.apache.kafka.common.message.IncrementalAlterConfigsRequestData.AlterConfigsResource; +import org.apache.kafka.common.message.IncrementalAlterConfigsRequestDataJsonConverter; import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData; import org.apache.kafka.common.message.IncrementalAlterConfigsResponseData.AlterConfigsResourceResponse; import org.apache.kafka.common.protocol.ApiKeys; import org.apache.kafka.common.protocol.ByteBufferAccessor; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; @@ -107,4 +111,16 @@ public AbstractResponse getErrorResponse(final int throttleTimeMs, final Throwab } return new IncrementalAlterConfigsResponse(response); } + + // It is not safe to print all config values + @Override + public String toString() { + JsonNode json = IncrementalAlterConfigsRequestDataJsonConverter.write(data, version()).deepCopy(); + for (JsonNode resource : json.get("resources")) { + for (JsonNode config : resource.get("configs")) { + ((ObjectNode) config).put("value", "REDACTED"); + } + } + return IncrementalAlterConfigsRequestDataJsonConverter.read(json, version()).toString(); + } } diff --git a/clients/src/main/java/org/apache/kafka/common/telemetry/internals/ClientTelemetryReporter.java b/clients/src/main/java/org/apache/kafka/common/telemetry/internals/ClientTelemetryReporter.java index 705aafaaa70..e0491943fef 100644 --- a/clients/src/main/java/org/apache/kafka/common/telemetry/internals/ClientTelemetryReporter.java +++ b/clients/src/main/java/org/apache/kafka/common/telemetry/internals/ClientTelemetryReporter.java @@ -41,7 +41,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.nio.ByteBuffer; import java.time.Duration; import java.util.Collections; @@ -718,8 +717,8 @@ private Optional> createPushRequest(ClientTelemetrySubscription local ByteBuffer compressedPayload; try { compressedPayload = ClientTelemetryUtils.compress(payload, compressionType); - } catch (IOException e) { - log.info("Failed to compress telemetry payload for compression: {}, sending uncompressed data", compressionType); + } catch (Throwable e) { + log.debug("Failed to compress telemetry payload for compression: {}, sending uncompressed data", compressionType); compressedPayload = ByteBuffer.wrap(payload.toByteArray()); compressionType = CompressionType.NONE; } diff --git a/clients/src/main/resources/common/message/JoinGroupRequest.json b/clients/src/main/resources/common/message/JoinGroupRequest.json index 41d7c1acbae..31afdb1a32a 100644 --- a/clients/src/main/resources/common/message/JoinGroupRequest.json +++ b/clients/src/main/resources/common/message/JoinGroupRequest.json @@ -18,8 +18,6 @@ "type": "request", "listeners": ["broker"], "name": "JoinGroupRequest", - // Versions 0-1 were removed in Apache Kafka 4.0, Version 2 is the new baseline. - // // Version 1 adds RebalanceTimeoutMs. Version 2 and 3 are the same as version 1. // // Starting from version 4, the client needs to issue a second request to join group @@ -34,7 +32,7 @@ // Version 8 adds the Reason field (KIP-800). // // Version 9 is the same as version 8. - "validVersions": "2-9", + "validVersions": "0-9", "flexibleVersions": "6+", "fields": [ { "name": "GroupId", "type": "string", "versions": "0+", "entityType": "groupId", diff --git a/clients/src/main/resources/common/message/JoinGroupResponse.json b/clients/src/main/resources/common/message/JoinGroupResponse.json index 364309596eb..d2f016f62f6 100644 --- a/clients/src/main/resources/common/message/JoinGroupResponse.json +++ b/clients/src/main/resources/common/message/JoinGroupResponse.json @@ -17,8 +17,6 @@ "apiKey": 11, "type": "response", "name": "JoinGroupResponse", - // Versions 0-1 were removed in Apache Kafka 4.0, Version 2 is the new baseline. - // // Version 1 is the same as version 0. // // Version 2 adds throttle time. @@ -37,7 +35,7 @@ // Version 8 is the same as version 7. // // Version 9 adds the SkipAssignment field. - "validVersions": "2-9", + "validVersions": "0-9", "flexibleVersions": "6+", "fields": [ { "name": "ThrottleTimeMs", "type": "int32", "versions": "2+", "ignorable": true, diff --git a/clients/src/test/java/org/apache/kafka/clients/consumer/MockConsumerTest.java b/clients/src/test/java/org/apache/kafka/clients/consumer/MockConsumerTest.java index 21cee3183bc..647976b1d1d 100644 --- a/clients/src/test/java/org/apache/kafka/clients/consumer/MockConsumerTest.java +++ b/clients/src/test/java/org/apache/kafka/clients/consumer/MockConsumerTest.java @@ -32,6 +32,7 @@ import java.util.Iterator; import java.util.List; import java.util.Optional; +import java.util.stream.IntStream; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -202,4 +203,33 @@ public void testRe2JPatternSubscription() { assertThrows(IllegalStateException.class, () -> consumer.subscribe(List.of("topic1"))); } + @Test + public void shouldReturnMaxPollRecords() { + TopicPartition partition = new TopicPartition("test", 0); + consumer.assign(Collections.singleton(partition)); + consumer.updateBeginningOffsets(Collections.singletonMap(partition, 0L)); + + IntStream.range(0, 10).forEach(offset -> { + consumer.addRecord(new ConsumerRecord<>("test", 0, offset, null, null)); + }); + + consumer.setMaxPollRecords(2L); + + ConsumerRecords records; + + records = consumer.poll(Duration.ofMillis(1)); + assertEquals(2, records.count()); + + records = consumer.poll(Duration.ofMillis(1)); + assertEquals(2, records.count()); + + consumer.setMaxPollRecords(Long.MAX_VALUE); + + records = consumer.poll(Duration.ofMillis(1)); + assertEquals(6, records.count()); + + records = consumer.poll(Duration.ofMillis(1)); + assertTrue(records.isEmpty()); + } + } diff --git a/clients/src/test/java/org/apache/kafka/common/record/ArbitraryMemoryRecords.java b/clients/src/test/java/org/apache/kafka/common/record/ArbitraryMemoryRecords.java new file mode 100644 index 00000000000..30eec866a6c --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/common/record/ArbitraryMemoryRecords.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.common.record; + +import net.jqwik.api.Arbitraries; +import net.jqwik.api.Arbitrary; +import net.jqwik.api.ArbitrarySupplier; + +import java.nio.ByteBuffer; +import java.util.Random; + +public final class ArbitraryMemoryRecords implements ArbitrarySupplier { + @Override + public Arbitrary get() { + return Arbitraries.randomValue(ArbitraryMemoryRecords::buildRandomRecords); + } + + private static MemoryRecords buildRandomRecords(Random random) { + int size = random.nextInt(128) + 1; + byte[] bytes = new byte[size]; + random.nextBytes(bytes); + + return MemoryRecords.readableRecords(ByteBuffer.wrap(bytes)); + } +} diff --git a/clients/src/test/java/org/apache/kafka/common/record/InvalidMemoryRecordsProvider.java b/clients/src/test/java/org/apache/kafka/common/record/InvalidMemoryRecordsProvider.java new file mode 100644 index 00000000000..0f9446a6391 --- /dev/null +++ b/clients/src/test/java/org/apache/kafka/common/record/InvalidMemoryRecordsProvider.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.kafka.common.record; + +import org.apache.kafka.common.errors.CorruptRecordException; + +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; + +import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.stream.Stream; + +public final class InvalidMemoryRecordsProvider implements ArgumentsProvider { + // Use a baseOffset that's not zero so that it is less likely to match the LEO + private static final long BASE_OFFSET = 1234; + private static final int EPOCH = 4321; + + /** + * Returns a stream of arguments for invalid memory records and the expected exception. + * + * The first object in the {@code Arguments} is a {@code MemoryRecords}. + * + * The second object in the {@code Arguments} is an {@code Optional>} which is + * the expected exception from the log layer. + */ + @Override + public Stream provideArguments(ExtensionContext context) { + return Stream.of( + Arguments.of(MemoryRecords.readableRecords(notEnoughBytes()), Optional.empty()), + Arguments.of(MemoryRecords.readableRecords(recordsSizeTooSmall()), Optional.of(CorruptRecordException.class)), + Arguments.of(MemoryRecords.readableRecords(notEnoughBytesToMagic()), Optional.empty()), + Arguments.of(MemoryRecords.readableRecords(negativeMagic()), Optional.of(CorruptRecordException.class)), + Arguments.of(MemoryRecords.readableRecords(largeMagic()), Optional.of(CorruptRecordException.class)), + Arguments.of(MemoryRecords.readableRecords(lessBytesThanRecordSize()), Optional.empty()) + ); + } + + private static ByteBuffer notEnoughBytes() { + var buffer = ByteBuffer.allocate(Records.LOG_OVERHEAD - 1); + buffer.limit(buffer.capacity()); + + return buffer; + } + + private static ByteBuffer recordsSizeTooSmall() { + var buffer = ByteBuffer.allocate(256); + // Write the base offset + buffer.putLong(BASE_OFFSET); + // Write record size + buffer.putInt(LegacyRecord.RECORD_OVERHEAD_V0 - 1); + buffer.position(0); + buffer.limit(buffer.capacity()); + + return buffer; + } + + private static ByteBuffer notEnoughBytesToMagic() { + var buffer = ByteBuffer.allocate(256); + // Write the base offset + buffer.putLong(BASE_OFFSET); + // Write record size + buffer.putInt(buffer.capacity() - Records.LOG_OVERHEAD); + buffer.position(0); + buffer.limit(Records.HEADER_SIZE_UP_TO_MAGIC - 1); + + return buffer; + } + + private static ByteBuffer negativeMagic() { + var buffer = ByteBuffer.allocate(256); + // Write the base offset + buffer.putLong(BASE_OFFSET); + // Write record size + buffer.putInt(buffer.capacity() - Records.LOG_OVERHEAD); + // Write the epoch + buffer.putInt(EPOCH); + // Write magic + buffer.put((byte) -1); + buffer.position(0); + buffer.limit(buffer.capacity()); + + return buffer; + } + + private static ByteBuffer largeMagic() { + var buffer = ByteBuffer.allocate(256); + // Write the base offset + buffer.putLong(BASE_OFFSET); + // Write record size + buffer.putInt(buffer.capacity() - Records.LOG_OVERHEAD); + // Write the epoch + buffer.putInt(EPOCH); + // Write magic + buffer.put((byte) (RecordBatch.CURRENT_MAGIC_VALUE + 1)); + buffer.position(0); + buffer.limit(buffer.capacity()); + + return buffer; + } + + private static ByteBuffer lessBytesThanRecordSize() { + var buffer = ByteBuffer.allocate(256); + // Write the base offset + buffer.putLong(BASE_OFFSET); + // Write record size + buffer.putInt(buffer.capacity() - Records.LOG_OVERHEAD); + // Write the epoch + buffer.putInt(EPOCH); + // Write magic + buffer.put(RecordBatch.CURRENT_MAGIC_VALUE); + buffer.position(0); + buffer.limit(buffer.capacity() - Records.LOG_OVERHEAD - 1); + + return buffer; + } +} diff --git a/clients/src/test/java/org/apache/kafka/common/requests/JoinGroupRequestTest.java b/clients/src/test/java/org/apache/kafka/common/requests/JoinGroupRequestTest.java index 60d10a68939..a3301908957 100644 --- a/clients/src/test/java/org/apache/kafka/common/requests/JoinGroupRequestTest.java +++ b/clients/src/test/java/org/apache/kafka/common/requests/JoinGroupRequestTest.java @@ -19,12 +19,15 @@ import org.apache.kafka.common.errors.InvalidConfigurationException; import org.apache.kafka.common.errors.UnsupportedVersionException; import org.apache.kafka.common.message.JoinGroupRequestData; +import org.apache.kafka.common.protocol.MessageUtil; import org.apache.kafka.test.TestUtils; import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; import java.util.Arrays; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.fail; @@ -65,4 +68,20 @@ public void testRequestVersionCompatibilityFailBuild() { .setProtocolType("consumer") ).build((short) 4)); } + + @Test + public void testRebalanceTimeoutDefaultsToSessionTimeoutV0() { + int sessionTimeoutMs = 30000; + short version = 0; + + ByteBuffer buffer = MessageUtil.toByteBuffer(new JoinGroupRequestData() + .setGroupId("groupId") + .setMemberId("consumerId") + .setProtocolType("consumer") + .setSessionTimeoutMs(sessionTimeoutMs), version); + + JoinGroupRequest request = JoinGroupRequest.parse(buffer, version); + assertEquals(sessionTimeoutMs, request.data().sessionTimeoutMs()); + assertEquals(sessionTimeoutMs, request.data().rebalanceTimeoutMs()); + } } diff --git a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java index 6578302e81e..ad52482a3b3 100644 --- a/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java +++ b/clients/src/test/java/org/apache/kafka/common/requests/RequestResponseTest.java @@ -665,6 +665,14 @@ public void testFetchRequestWithMetadata() { assertEquals(request.isolationLevel(), deserialized.isolationLevel()); } + @Test + public void testJoinGroupRequestV0RebalanceTimeout() { + final short version = 0; + JoinGroupRequest jgr = createJoinGroupRequest(version); + JoinGroupRequest jgr2 = JoinGroupRequest.parse(jgr.serialize(), version); + assertEquals(jgr2.data().rebalanceTimeoutMs(), jgr.data().rebalanceTimeoutMs()); + } + @Test public void testSerializeWithHeader() { CreatableTopicCollection topicsToCreate = new CreatableTopicCollection(1); diff --git a/committer-tools/kafka-merge-pr.py b/committer-tools/kafka-merge-pr.py index 78df3858615..c62ad0cbcc5 100755 --- a/committer-tools/kafka-merge-pr.py +++ b/committer-tools/kafka-merge-pr.py @@ -70,7 +70,7 @@ DEV_BRANCH_NAME = "trunk" -DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "4.0.0-inkless") +DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "4.0.1-inkless") ORIGINAL_HEAD = "" diff --git a/config/log4j2.yaml b/config/log4j2.yaml index 7ee6f001e18..891a5c71cde 100644 --- a/config/log4j2.yaml +++ b/config/log4j2.yaml @@ -44,7 +44,7 @@ Configuration: # State Change appender - name: StateChangeAppender fileName: "${sys:kafka.logs.dir}/state-change.log" - filePattern: "${sys:kafka.logs.dir}/stage-change.log.%d{yyyy-MM-dd-HH}" + filePattern: "${sys:kafka.logs.dir}/state-change.log.%d{yyyy-MM-dd-HH}" PatternLayout: pattern: "${logPattern}" TimeBasedTriggeringPolicy: diff --git a/coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntime.java b/coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntime.java index 1e9724a57aa..e1e80476cf8 100644 --- a/coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntime.java +++ b/coordinator-common/src/main/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntime.java @@ -70,6 +70,7 @@ import java.util.function.Consumer; import java.util.stream.Collectors; +import static java.lang.Math.min; import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.CoordinatorWriteEvent.NOT_QUEUED; /** @@ -758,8 +759,14 @@ private void freeCurrentBatch() { // Cancel the linger timeout. currentBatch.lingerTimeoutTask.ifPresent(TimerTask::cancel); - // Release the buffer. - bufferSupplier.release(currentBatch.buffer); + // Release the buffer only if it is not larger than the maxBatchSize. + int maxBatchSize = partitionWriter.config(tp).maxMessageSize(); + + if (currentBatch.builder.buffer().capacity() <= maxBatchSize) { + bufferSupplier.release(currentBatch.builder.buffer()); + } else if (currentBatch.buffer.capacity() <= maxBatchSize) { + bufferSupplier.release(currentBatch.buffer); + } currentBatch = null; } @@ -859,7 +866,7 @@ private void maybeAllocateNewBatch( LogConfig logConfig = partitionWriter.config(tp); int maxBatchSize = logConfig.maxMessageSize(); long prevLastWrittenOffset = coordinator.lastWrittenOffset(); - ByteBuffer buffer = bufferSupplier.get(maxBatchSize); + ByteBuffer buffer = bufferSupplier.get(min(INITIAL_BUFFER_SIZE, maxBatchSize)); MemoryRecordsBuilder builder = new MemoryRecordsBuilder( buffer, @@ -1888,9 +1895,9 @@ public void onHighWatermarkUpdated( } /** - * 16KB. Used for initial buffer size for write operations. + * 512KB. Used for initial buffer size for write operations. */ - static final int MIN_BUFFER_SIZE = 16384; + static final int INITIAL_BUFFER_SIZE = 512 * 1024; /** * The log prefix. diff --git a/coordinator-common/src/test/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntimeTest.java b/coordinator-common/src/test/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntimeTest.java index 9e4e6f7bb9b..9198e207e4b 100644 --- a/coordinator-common/src/test/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntimeTest.java +++ b/coordinator-common/src/test/java/org/apache/kafka/coordinator/common/runtime/CoordinatorRuntimeTest.java @@ -19,6 +19,7 @@ import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.compress.Compression; +import org.apache.kafka.common.config.TopicConfig; import org.apache.kafka.common.errors.NotCoordinatorException; import org.apache.kafka.common.errors.NotEnoughReplicasException; import org.apache.kafka.common.errors.RecordTooLargeException; @@ -65,6 +66,7 @@ import java.util.Deque; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.OptionalInt; import java.util.Set; @@ -84,7 +86,7 @@ import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.CoordinatorState.INITIAL; import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.CoordinatorState.LOADING; import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.HighWatermarkListener.NO_OFFSET; -import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.MIN_BUFFER_SIZE; +import static org.apache.kafka.coordinator.common.runtime.CoordinatorRuntime.INITIAL_BUFFER_SIZE; import static org.apache.kafka.test.TestUtils.assertFutureThrows; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -3486,11 +3488,11 @@ public void testAppendRecordBatchSize() { assertEquals(List.of(0L), ctx.coordinator.snapshotRegistry().epochsList()); int maxBatchSize = writer.config(TP).maxMessageSize(); - assertTrue(maxBatchSize > MIN_BUFFER_SIZE); + assertTrue(maxBatchSize > INITIAL_BUFFER_SIZE); - // Generate enough records to create a batch that has 16KB < batchSize < maxBatchSize + // Generate enough records to create a batch that has INITIAL_BUFFER_SIZE < batchSize < maxBatchSize List records = new ArrayList<>(); - for (int i = 0; i < 3000; i++) { + for (int i = 0; i < 50000; i++) { records.add("record-" + i); } @@ -3504,7 +3506,210 @@ public void testAppendRecordBatchSize() { assertFalse(write1.isCompletedExceptionally()); int batchSize = writer.entries(TP).get(0).sizeInBytes(); - assertTrue(batchSize > MIN_BUFFER_SIZE && batchSize < maxBatchSize); + assertTrue(batchSize > INITIAL_BUFFER_SIZE && batchSize < maxBatchSize); + } + + @Test + public void testCoordinatorDoNotRetainBufferLargeThanMaxMessageSize() { + MockTimer timer = new MockTimer(); + InMemoryPartitionWriter mockWriter = new InMemoryPartitionWriter(false) { + @Override + public LogConfig config(TopicPartition tp) { + return new LogConfig(Map.of( + TopicConfig.MAX_MESSAGE_BYTES_CONFIG, String.valueOf(1024 * 1024) // 1MB + )); + } + }; + StringSerializer serializer = new StringSerializer(); + + CoordinatorRuntime runtime = + new CoordinatorRuntime.Builder() + .withTime(timer.time()) + .withTimer(timer) + .withDefaultWriteTimeOut(DEFAULT_WRITE_TIMEOUT) + .withLoader(new MockCoordinatorLoader()) + .withEventProcessor(new DirectEventProcessor()) + .withPartitionWriter(mockWriter) + .withCoordinatorShardBuilderSupplier(new MockCoordinatorShardBuilderSupplier()) + .withCoordinatorRuntimeMetrics(mock(CoordinatorRuntimeMetrics.class)) + .withCoordinatorMetrics(mock(CoordinatorMetrics.class)) + .withSerializer(serializer) + .withExecutorService(mock(ExecutorService.class)) + .build(); + + // Schedule the loading. + runtime.scheduleLoadOperation(TP, 10); + + // Verify the initial state. + CoordinatorRuntime.CoordinatorContext ctx = runtime.contextOrThrow(TP); + assertEquals(0L, ctx.coordinator.lastWrittenOffset()); + assertEquals(0L, ctx.coordinator.lastCommittedOffset()); + assertEquals(List.of(0L), ctx.coordinator.snapshotRegistry().epochsList()); + + // Generate a record larger than the maxBatchSize. + List largeRecords = List.of("A".repeat(100 * 1024 * 1024)); + + // Write #1. + CompletableFuture write1 = runtime.scheduleWriteOperation("write#1", TP, DEFAULT_WRITE_TIMEOUT, + state -> new CoordinatorResult<>(largeRecords, "response1", null, true, false) + ); + + // Verify that the write has not completed exceptionally. + // This will catch any exceptions thrown including RecordTooLargeException. + assertFalse(write1.isCompletedExceptionally()); + + // Verify that the next buffer retrieved from the bufferSupplier is the initial small one, not the large buffer. + assertEquals(INITIAL_BUFFER_SIZE, ctx.bufferSupplier.get(1).capacity()); + } + + @Test + public void testCoordinatorRetainExpandedBufferLessOrEqualToMaxMessageSize() { + MockTimer timer = new MockTimer(); + InMemoryPartitionWriter mockWriter = new InMemoryPartitionWriter(false) { + @Override + public LogConfig config(TopicPartition tp) { + return new LogConfig(Map.of( + TopicConfig.MAX_MESSAGE_BYTES_CONFIG, String.valueOf(1024 * 1024 * 1024) // 1GB + )); + } + }; + StringSerializer serializer = new StringSerializer(); + + CoordinatorRuntime runtime = + new CoordinatorRuntime.Builder() + .withTime(timer.time()) + .withTimer(timer) + .withDefaultWriteTimeOut(DEFAULT_WRITE_TIMEOUT) + .withLoader(new MockCoordinatorLoader()) + .withEventProcessor(new DirectEventProcessor()) + .withPartitionWriter(mockWriter) + .withCoordinatorShardBuilderSupplier(new MockCoordinatorShardBuilderSupplier()) + .withCoordinatorRuntimeMetrics(mock(CoordinatorRuntimeMetrics.class)) + .withCoordinatorMetrics(mock(CoordinatorMetrics.class)) + .withSerializer(serializer) + .withExecutorService(mock(ExecutorService.class)) + .build(); + + // Schedule the loading. + runtime.scheduleLoadOperation(TP, 10); + + // Verify the initial state. + CoordinatorRuntime.CoordinatorContext ctx = runtime.contextOrThrow(TP); + assertEquals(0L, ctx.coordinator.lastWrittenOffset()); + assertEquals(0L, ctx.coordinator.lastCommittedOffset()); + assertEquals(List.of(0L), ctx.coordinator.snapshotRegistry().epochsList()); + + // Generate enough records to create a batch that has INITIAL_BUFFER_SIZE < batchSize < maxBatchSize + List records = new ArrayList<>(); + for (int i = 0; i < 1000000; i++) { + records.add("record-" + i); + } + + // Write #1. + CompletableFuture write1 = runtime.scheduleWriteOperation("write#1", TP, DEFAULT_WRITE_TIMEOUT, + state -> new CoordinatorResult<>(records, "response1") + ); + + // Verify that the write has not completed exceptionally. + // This will catch any exceptions thrown including RecordTooLargeException. + assertFalse(write1.isCompletedExceptionally()); + + int batchSize = mockWriter.entries(TP).get(0).sizeInBytes(); + int maxBatchSize = mockWriter.config(TP).maxMessageSize(); + assertTrue(INITIAL_BUFFER_SIZE < batchSize && batchSize <= maxBatchSize); + + // Verify that the next buffer retrieved from the bufferSupplier is the expanded buffer. + assertTrue(ctx.bufferSupplier.get(1).capacity() > INITIAL_BUFFER_SIZE); + } + + @Test + public void testBufferShrinkWhenMaxMessageSizeReducedBelowInitialBufferSize() { + MockTimer timer = new MockTimer(); + var mockWriter = new InMemoryPartitionWriter(false) { + private LogConfig config = new LogConfig(Map.of( + TopicConfig.MAX_MESSAGE_BYTES_CONFIG, String.valueOf(1024 * 1024) // 1MB + )); + + @Override + public LogConfig config(TopicPartition tp) { + return config; + } + + public void updateConfig(LogConfig newConfig) { + this.config = newConfig; + } + }; + StringSerializer serializer = new StringSerializer(); + + CoordinatorRuntime runtime = + new CoordinatorRuntime.Builder() + .withTime(timer.time()) + .withTimer(timer) + .withDefaultWriteTimeOut(DEFAULT_WRITE_TIMEOUT) + .withLoader(new MockCoordinatorLoader()) + .withEventProcessor(new DirectEventProcessor()) + .withPartitionWriter(mockWriter) + .withCoordinatorShardBuilderSupplier(new MockCoordinatorShardBuilderSupplier()) + .withCoordinatorRuntimeMetrics(mock(CoordinatorRuntimeMetrics.class)) + .withCoordinatorMetrics(mock(CoordinatorMetrics.class)) + .withSerializer(serializer) + .withExecutorService(mock(ExecutorService.class)) + .build(); + + // Schedule the loading. + runtime.scheduleLoadOperation(TP, 10); + + // Verify the initial state. + CoordinatorRuntime.CoordinatorContext ctx = runtime.contextOrThrow(TP); + assertEquals(0L, ctx.coordinator.lastWrittenOffset()); + assertEquals(0L, ctx.coordinator.lastCommittedOffset()); + assertEquals(List.of(0L), ctx.coordinator.snapshotRegistry().epochsList()); + + List records = new ArrayList<>(); + for (int i = 0; i < 1000; i++) { + records.add("record-" + i); + } + + // Write #1. + CompletableFuture write1 = runtime.scheduleWriteOperation("write#1", TP, DEFAULT_WRITE_TIMEOUT, + state -> new CoordinatorResult<>(records, "response1") + ); + + // Verify that the write has not completed exceptionally. + // This will catch any exceptions thrown including RecordTooLargeException. + assertFalse(write1.isCompletedExceptionally()); + + int batchSize = mockWriter.entries(TP).get(0).sizeInBytes(); + int maxBatchSize = mockWriter.config(TP).maxMessageSize(); + assertTrue(batchSize <= INITIAL_BUFFER_SIZE && INITIAL_BUFFER_SIZE <= maxBatchSize); + + ByteBuffer cachedBuffer = ctx.bufferSupplier.get(1); + assertEquals(INITIAL_BUFFER_SIZE, cachedBuffer.capacity()); + // ctx.bufferSupplier.get(1); will clear cachedBuffer in bufferSupplier. Use release to put it back to bufferSupplier + ctx.bufferSupplier.release(cachedBuffer); + + // Reduce max message size below initial buffer size. + mockWriter.updateConfig(new LogConfig( + Map.of(TopicConfig.MAX_MESSAGE_BYTES_CONFIG, String.valueOf(INITIAL_BUFFER_SIZE - 66)))); + assertEquals(INITIAL_BUFFER_SIZE - 66, mockWriter.config(TP).maxMessageSize()); + + // Write #2. + CompletableFuture write2 = runtime.scheduleWriteOperation("write#2", TP, DEFAULT_WRITE_TIMEOUT, + state -> new CoordinatorResult<>(records, "response2") + ); + assertFalse(write2.isCompletedExceptionally()); + + // Verify that there is no cached buffer since the cached buffer size is greater than new maxMessageSize. + assertEquals(1, ctx.bufferSupplier.get(1).capacity()); + + // Write #3. + CompletableFuture write3 = runtime.scheduleWriteOperation("write#3", TP, DEFAULT_WRITE_TIMEOUT, + state -> new CoordinatorResult<>(records, "response3") + ); + assertFalse(write3.isCompletedExceptionally()); + + // Verify that the cached buffer size is equals to new maxMessageSize that less than INITIAL_BUFFER_SIZE. + assertEquals(mockWriter.config(TP).maxMessageSize(), ctx.bufferSupplier.get(1).capacity()); } @Test diff --git a/core/src/main/java/kafka/log/remote/RemoteLogManager.java b/core/src/main/java/kafka/log/remote/RemoteLogManager.java index b5f9e408c94..5fbc4693e4d 100644 --- a/core/src/main/java/kafka/log/remote/RemoteLogManager.java +++ b/core/src/main/java/kafka/log/remote/RemoteLogManager.java @@ -303,8 +303,15 @@ public void resizeExpirationThreadPool(int newSize) { public void resizeReaderThreadPool(int newSize) { int currentSize = remoteStorageReaderThreadPool.getCorePoolSize(); + int currentMaximumSize = remoteStorageReaderThreadPool.getMaximumPoolSize(); LOGGER.info("Updating remote reader thread pool size from {} to {}", currentSize, newSize); - remoteStorageReaderThreadPool.setCorePoolSize(newSize); + if (newSize > currentMaximumSize) { + remoteStorageReaderThreadPool.setMaximumPoolSize(newSize); + remoteStorageReaderThreadPool.setCorePoolSize(newSize); + } else { + remoteStorageReaderThreadPool.setCorePoolSize(newSize); + remoteStorageReaderThreadPool.setMaximumPoolSize(newSize); + } } private void removeMetrics() { @@ -313,6 +320,11 @@ private void removeMetrics() { remoteStorageReaderThreadPool.removeMetrics(); } + // Visible for testing + int readerThreadPoolSize() { + return remoteStorageReaderThreadPool.getCorePoolSize(); + } + /** * Returns the timeout for the RLM Tasks to wait for the quota to be available */ diff --git a/core/src/main/scala/kafka/cluster/Partition.scala b/core/src/main/scala/kafka/cluster/Partition.scala index 7ec0904cb00..66cc6eaabbf 100755 --- a/core/src/main/scala/kafka/cluster/Partition.scala +++ b/core/src/main/scala/kafka/cluster/Partition.scala @@ -1307,27 +1307,35 @@ class Partition(val topicPartition: TopicPartition, } } - private def doAppendRecordsToFollowerOrFutureReplica(records: MemoryRecords, isFuture: Boolean): Option[LogAppendInfo] = { + private def doAppendRecordsToFollowerOrFutureReplica( + records: MemoryRecords, + isFuture: Boolean, + partitionLeaderEpoch: Int + ): Option[LogAppendInfo] = { if (isFuture) { // The read lock is needed to handle race condition if request handler thread tries to // remove future replica after receiving AlterReplicaLogDirsRequest. inReadLock(leaderIsrUpdateLock) { // Note the replica may be undefined if it is removed by a non-ReplicaAlterLogDirsThread before // this method is called - futureLog.map { _.appendAsFollower(records) } + futureLog.map { _.appendAsFollower(records, partitionLeaderEpoch) } } } else { // The lock is needed to prevent the follower replica from being updated while ReplicaAlterDirThread // is executing maybeReplaceCurrentWithFutureReplica() to replace follower replica with the future replica. futureLogLock.synchronized { - Some(localLogOrException.appendAsFollower(records)) + Some(localLogOrException.appendAsFollower(records, partitionLeaderEpoch)) } } } - def appendRecordsToFollowerOrFutureReplica(records: MemoryRecords, isFuture: Boolean): Option[LogAppendInfo] = { + def appendRecordsToFollowerOrFutureReplica( + records: MemoryRecords, + isFuture: Boolean, + partitionLeaderEpoch: Int + ): Option[LogAppendInfo] = { try { - doAppendRecordsToFollowerOrFutureReplica(records, isFuture) + doAppendRecordsToFollowerOrFutureReplica(records, isFuture, partitionLeaderEpoch) } catch { case e: UnexpectedAppendOffsetException => val log = if (isFuture) futureLocalLogOrException else localLogOrException @@ -1345,7 +1353,7 @@ class Partition(val topicPartition: TopicPartition, info(s"Unexpected offset in append to $topicPartition. First offset ${e.firstOffset} is less than log start offset ${log.logStartOffset}." + s" Since this is the first record to be appended to the $replicaName's log, will start the log from offset ${e.firstOffset}.") truncateFullyAndStartAt(e.firstOffset, isFuture) - doAppendRecordsToFollowerOrFutureReplica(records, isFuture) + doAppendRecordsToFollowerOrFutureReplica(records, isFuture, partitionLeaderEpoch) } else throw e } diff --git a/core/src/main/scala/kafka/coordinator/transaction/TransactionCoordinator.scala b/core/src/main/scala/kafka/coordinator/transaction/TransactionCoordinator.scala index e0019f0d773..fdccf5a0209 100644 --- a/core/src/main/scala/kafka/coordinator/transaction/TransactionCoordinator.scala +++ b/core/src/main/scala/kafka/coordinator/transaction/TransactionCoordinator.scala @@ -802,11 +802,9 @@ class TransactionCoordinator(txnConfig: TransactionConfig, } if (nextState == PrepareAbort && isEpochFence) { - // We should clear the pending state to make way for the transition to PrepareAbort and also bump - // the epoch in the transaction metadata we are about to append. + // We should clear the pending state to make way for the transition to PrepareAbort txnMetadata.pendingState = None - txnMetadata.producerEpoch = producerEpoch - txnMetadata.lastProducerEpoch = RecordBatch.NO_PRODUCER_EPOCH + // For TV2+, don't manually set the epoch - let prepareAbortOrCommit handle it naturally. } nextProducerIdOrErrors.flatMap { @@ -962,10 +960,10 @@ class TransactionCoordinator(txnConfig: TransactionConfig, case Some(epochAndMetadata) => if (epochAndMetadata.coordinatorEpoch == coordinatorEpoch) { - // This was attempted epoch fence that failed, so mark this state on the metadata - epochAndMetadata.transactionMetadata.hasFailedEpochFence = true + // For TV2, we allow re-bumping the epoch on retry, since we don't complete the epoch bump. + // Therefore, we don't set hasFailedEpochFence = true. warn(s"The coordinator failed to write an epoch fence transition for producer $transactionalId to the transaction log " + - s"with error $error. The epoch was increased to ${newMetadata.producerEpoch} but not returned to the client") + s"with error $error") } } } diff --git a/core/src/main/scala/kafka/log/LogCleaner.scala b/core/src/main/scala/kafka/log/LogCleaner.scala index a4f96ff7e63..e1f4d4afa2d 100644 --- a/core/src/main/scala/kafka/log/LogCleaner.scala +++ b/core/src/main/scala/kafka/log/LogCleaner.scala @@ -117,14 +117,14 @@ class LogCleaner(initialConfig: CleanerConfig, /** * @param f to compute the result - * @return the max value (int value) or 0 if there is no cleaner + * @return the max value or 0 if there is no cleaner */ - private[log] def maxOverCleanerThreads(f: CleanerThread => Double): Int = - cleaners.map(f).maxOption.getOrElse(0.0d).toInt + private[log] def maxOverCleanerThreads(f: CleanerThread => Double): Double = + cleaners.map(f).maxOption.getOrElse(0.0d) /* a metric to track the maximum utilization of any thread's buffer in the last cleaning */ metricsGroup.newGauge(MaxBufferUtilizationPercentMetricName, - () => maxOverCleanerThreads(_.lastStats.bufferUtilization) * 100) + () => (maxOverCleanerThreads(_.lastStats.bufferUtilization) * 100).toInt) /* a metric to track the recopy rate of each thread's last cleaning */ metricsGroup.newGauge(CleanerRecopyPercentMetricName, () => { @@ -134,12 +134,12 @@ class LogCleaner(initialConfig: CleanerConfig, }) /* a metric to track the maximum cleaning time for the last cleaning from each thread */ - metricsGroup.newGauge(MaxCleanTimeMetricName, () => maxOverCleanerThreads(_.lastStats.elapsedSecs)) + metricsGroup.newGauge(MaxCleanTimeMetricName, () => maxOverCleanerThreads(_.lastStats.elapsedSecs).toInt) // a metric to track delay between the time when a log is required to be compacted // as determined by max compaction lag and the time of last cleaner run. metricsGroup.newGauge(MaxCompactionDelayMetricsName, - () => maxOverCleanerThreads(_.lastPreCleanStats.maxCompactionDelayMs.toDouble) / 1000) + () => (maxOverCleanerThreads(_.lastPreCleanStats.maxCompactionDelayMs.toDouble) / 1000).toInt) metricsGroup.newGauge(DeadThreadCountMetricName, () => deadThreadCount) @@ -523,10 +523,11 @@ object LogCleaner { } - private val MaxBufferUtilizationPercentMetricName = "max-buffer-utilization-percent" + // Visible for test. + private[log] val MaxBufferUtilizationPercentMetricName = "max-buffer-utilization-percent" private val CleanerRecopyPercentMetricName = "cleaner-recopy-percent" - private val MaxCleanTimeMetricName = "max-clean-time-secs" - private val MaxCompactionDelayMetricsName = "max-compaction-delay-secs" + private[log] val MaxCleanTimeMetricName = "max-clean-time-secs" + private[log] val MaxCompactionDelayMetricsName = "max-compaction-delay-secs" private val DeadThreadCountMetricName = "DeadThreadCount" // package private for testing private[log] val MetricNames = Set( @@ -824,7 +825,7 @@ private[log] class Cleaner(val id: Int, val retained = MemoryRecords.readableRecords(outputBuffer) // it's OK not to hold the Log's lock in this case, because this segment is only accessed by other threads // after `Log.replaceSegments` (which acquires the lock) is called - dest.append(result.maxOffset, result.maxTimestamp, result.shallowOffsetOfMaxTimestamp(), retained) + dest.append(result.maxOffset, retained) throttler.maybeThrottle(outputBuffer.limit()) } diff --git a/core/src/main/scala/kafka/log/UnifiedLog.scala b/core/src/main/scala/kafka/log/UnifiedLog.scala index 9a977a262b6..e9985f619f5 100644 --- a/core/src/main/scala/kafka/log/UnifiedLog.scala +++ b/core/src/main/scala/kafka/log/UnifiedLog.scala @@ -698,6 +698,7 @@ class UnifiedLog(@volatile var logStartOffset: Long, * Append this message set to the active segment of the local log, assigning offsets and Partition Leader Epochs * * @param records The records to append + * @param leaderEpoch the epoch of the replica appending * @param origin Declares the origin of the append which affects required validations * @param requestLocal request local instance * @throws KafkaStorageException If the append fails due to an I/O error. @@ -728,14 +729,15 @@ class UnifiedLog(@volatile var logStartOffset: Long, * Append this message set to the active segment of the local log without assigning offsets or Partition Leader Epochs * * @param records The records to append + * @param leaderEpoch the epoch of the replica appending * @throws KafkaStorageException If the append fails due to an I/O error. * @return Information about the appended messages including the first and last offset. */ - def appendAsFollower(records: MemoryRecords): LogAppendInfo = { + def appendAsFollower(records: MemoryRecords, leaderEpoch: Int): LogAppendInfo = { append(records, origin = AppendOrigin.REPLICATION, validateAndAssignOffsets = false, - leaderEpoch = -1, + leaderEpoch = leaderEpoch, requestLocal = None, verificationGuard = VerificationGuard.SENTINEL, // disable to check the validation of record size since the record is already accepted by leader. @@ -816,7 +818,6 @@ class UnifiedLog(@volatile var logStartOffset: Long, validRecords = validateAndOffsetAssignResult.validatedRecords appendInfo.setMaxTimestamp(validateAndOffsetAssignResult.maxTimestampMs) - appendInfo.setShallowOffsetOfMaxTimestamp(validateAndOffsetAssignResult.shallowOffsetOfMaxTimestamp) appendInfo.setLastOffset(offset.value - 1) appendInfo.setRecordValidationStats(validateAndOffsetAssignResult.recordValidationStats) if (config.messageTimestampType == TimestampType.LOG_APPEND_TIME) @@ -902,7 +903,7 @@ class UnifiedLog(@volatile var logStartOffset: Long, // will be cleaned up after the log directory is recovered. Note that the end offset of the // ProducerStateManager will not be updated and the last stable offset will not advance // if the append to the transaction index fails. - localLog.append(appendInfo.lastOffset, appendInfo.maxTimestamp, appendInfo.shallowOffsetOfMaxTimestamp, validRecords) + localLog.append(appendInfo.lastOffset, validRecords) updateHighWatermarkWithLogEndOffset() // update the producer state @@ -1115,63 +1116,85 @@ class UnifiedLog(@volatile var logStartOffset: Long, var shallowOffsetOfMaxTimestamp = -1L var readFirstMessage = false var lastOffsetOfFirstBatch = -1L + var skipRemainingBatches = false records.batches.forEach { batch => if (origin == AppendOrigin.RAFT_LEADER && batch.partitionLeaderEpoch != leaderEpoch) { - throw new InvalidRecordException("Append from Raft leader did not set the batch epoch correctly") + throw new InvalidRecordException( + s"Append from Raft leader did not set the batch epoch correctly, expected $leaderEpoch " + + s"but the batch has ${batch.partitionLeaderEpoch}" + ) } // we only validate V2 and higher to avoid potential compatibility issues with older clients - if (batch.magic >= RecordBatch.MAGIC_VALUE_V2 && origin == AppendOrigin.CLIENT && batch.baseOffset != 0) + if (batch.magic >= RecordBatch.MAGIC_VALUE_V2 && origin == AppendOrigin.CLIENT && batch.baseOffset != 0) { throw new InvalidRecordException(s"The baseOffset of the record batch in the append to $topicPartition should " + s"be 0, but it is ${batch.baseOffset}") - - // update the first offset if on the first message. For magic versions older than 2, we use the last offset - // to avoid the need to decompress the data (the last offset can be obtained directly from the wrapper message). - // For magic version 2, we can get the first offset directly from the batch header. - // When appending to the leader, we will update LogAppendInfo.baseOffset with the correct value. In the follower - // case, validation will be more lenient. - // Also indicate whether we have the accurate first offset or not - if (!readFirstMessage) { - if (batch.magic >= RecordBatch.MAGIC_VALUE_V2) - firstOffset = batch.baseOffset - lastOffsetOfFirstBatch = batch.lastOffset - readFirstMessage = true } - // check that offsets are monotonically increasing - if (lastOffset >= batch.lastOffset) - monotonic = false - - // update the last offset seen - lastOffset = batch.lastOffset - lastLeaderEpoch = batch.partitionLeaderEpoch - - // Check if the message sizes are valid. - val batchSize = batch.sizeInBytes - if (!ignoreRecordSize && batchSize > config.maxMessageSize) { - brokerTopicStats.topicStats(topicPartition.topic).bytesRejectedRate.mark(records.sizeInBytes) - brokerTopicStats.allTopicsStats.bytesRejectedRate.mark(records.sizeInBytes) - throw new RecordTooLargeException(s"The record batch size in the append to $topicPartition is $batchSize bytes " + - s"which exceeds the maximum configured value of ${config.maxMessageSize}.") - } + /* During replication of uncommitted data it is possible for the remote replica to send record batches after it lost + * leadership. This can happen if sending FETCH responses is slow. There is a race between sending the FETCH + * response and the replica truncating and appending to the log. The replicating replica resolves this issue by only + * persisting up to the current leader epoch used in the fetch request. See KAFKA-18723 for more details. + */ + skipRemainingBatches = skipRemainingBatches || hasHigherPartitionLeaderEpoch(batch, origin, leaderEpoch) + if (skipRemainingBatches) { + info( + s"Skipping batch $batch from an origin of $origin because its partition leader epoch " + + s"${batch.partitionLeaderEpoch} is higher than the replica's current leader epoch " + + s"$leaderEpoch" + ) + } else { + // update the first offset if on the first message. For magic versions older than 2, we use the last offset + // to avoid the need to decompress the data (the last offset can be obtained directly from the wrapper message). + // For magic version 2, we can get the first offset directly from the batch header. + // When appending to the leader, we will update LogAppendInfo.baseOffset with the correct value. In the follower + // case, validation will be more lenient. + // Also indicate whether we have the accurate first offset or not + if (!readFirstMessage) { + if (batch.magic >= RecordBatch.MAGIC_VALUE_V2) { + firstOffset = batch.baseOffset + } + lastOffsetOfFirstBatch = batch.lastOffset + readFirstMessage = true + } - // check the validity of the message by checking CRC - if (!batch.isValid) { - brokerTopicStats.allTopicsStats.invalidMessageCrcRecordsPerSec.mark() - throw new CorruptRecordException(s"Record is corrupt (stored crc = ${batch.checksum()}) in topic partition $topicPartition.") - } + // check that offsets are monotonically increasing + if (lastOffset >= batch.lastOffset) { + monotonic = false + } - if (batch.maxTimestamp > maxTimestamp) { - maxTimestamp = batch.maxTimestamp - shallowOffsetOfMaxTimestamp = lastOffset - } + // update the last offset seen + lastOffset = batch.lastOffset + lastLeaderEpoch = batch.partitionLeaderEpoch + + // Check if the message sizes are valid. + val batchSize = batch.sizeInBytes + if (!ignoreRecordSize && batchSize > config.maxMessageSize) { + brokerTopicStats.topicStats(topicPartition.topic).bytesRejectedRate.mark(records.sizeInBytes) + brokerTopicStats.allTopicsStats.bytesRejectedRate.mark(records.sizeInBytes) + throw new RecordTooLargeException(s"The record batch size in the append to $topicPartition is $batchSize bytes " + + s"which exceeds the maximum configured value of ${config.maxMessageSize}.") + } - validBytesCount += batchSize + // check the validity of the message by checking CRC + if (!batch.isValid) { + brokerTopicStats.allTopicsStats.invalidMessageCrcRecordsPerSec.mark() + throw new CorruptRecordException(s"Record is corrupt (stored crc = ${batch.checksum()}) in topic partition $topicPartition.") + } - val batchCompression = CompressionType.forId(batch.compressionType.id) - // sourceCompression is only used on the leader path, which only contains one batch if version is v2 or messages are compressed - if (batchCompression != CompressionType.NONE) - sourceCompression = batchCompression + if (batch.maxTimestamp > maxTimestamp) { + maxTimestamp = batch.maxTimestamp + shallowOffsetOfMaxTimestamp = lastOffset + } + + validBytesCount += batchSize + + val batchCompression = CompressionType.forId(batch.compressionType.id) + // sourceCompression is only used on the leader path, which only contains one batch if version is v2 or messages are compressed + if (batchCompression != CompressionType.NONE) { + sourceCompression = batchCompression + } + } } if (requireOffsetsMonotonic && !monotonic) @@ -1183,11 +1206,30 @@ class UnifiedLog(@volatile var logStartOffset: Long, else OptionalInt.empty() - new LogAppendInfo(firstOffset, lastOffset, lastLeaderEpochOpt, maxTimestamp, shallowOffsetOfMaxTimestamp, + new LogAppendInfo(firstOffset, lastOffset, lastLeaderEpochOpt, maxTimestamp, RecordBatch.NO_TIMESTAMP, logStartOffset, RecordValidationStats.EMPTY, sourceCompression, validBytesCount, lastOffsetOfFirstBatch, Collections.emptyList[RecordError], LeaderHwChange.NONE) } + /** + * Return true if the record batch has a higher leader epoch than the specified leader epoch + * + * @param batch the batch to validate + * @param origin the reason for appending the record batch + * @param leaderEpoch the epoch to compare + * @return true if the append reason is replication and the batch's partition leader epoch is + * greater than the specified leaderEpoch, otherwise false + */ + private def hasHigherPartitionLeaderEpoch( + batch: RecordBatch, + origin: AppendOrigin, + leaderEpoch: Int + ): Boolean = { + origin == AppendOrigin.REPLICATION && + batch.partitionLeaderEpoch() != RecordBatch.NO_PARTITION_LEADER_EPOCH && + batch.partitionLeaderEpoch() > leaderEpoch + } + /** * Trim any invalid bytes from the end of this message set (if there are any) * @@ -1327,7 +1369,7 @@ class UnifiedLog(@volatile var logStartOffset: Long, val asyncOffsetReadFutureHolder = remoteLogManager.get.asyncOffsetRead(topicPartition, targetTimestamp, logStartOffset, leaderEpochCache, () => searchOffsetInLocalLog(targetTimestamp, localLogStartOffset())) - + new OffsetResultHolder(Optional.empty(), Optional.of(asyncOffsetReadFutureHolder)) } else { new OffsetResultHolder(searchOffsetInLocalLog(targetTimestamp, logStartOffset).toJava) diff --git a/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala b/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala index bd80c0aca4d..997376c471e 100644 --- a/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala +++ b/core/src/main/scala/kafka/raft/KafkaMetadataLog.scala @@ -25,6 +25,7 @@ import kafka.raft.KafkaMetadataLog.UnknownReason import kafka.utils.Logging import org.apache.kafka.common.config.TopicConfig import org.apache.kafka.common.errors.InvalidConfigurationException +import org.apache.kafka.common.errors.CorruptRecordException import org.apache.kafka.common.record.{MemoryRecords, Records} import org.apache.kafka.common.utils.{Time, Utils} import org.apache.kafka.common.{KafkaException, TopicPartition, Uuid} @@ -89,8 +90,9 @@ final class KafkaMetadataLog private ( } override def appendAsLeader(records: Records, epoch: Int): LogAppendInfo = { - if (records.sizeInBytes == 0) + if (records.sizeInBytes == 0) { throw new IllegalArgumentException("Attempt to append an empty record set") + } handleAndConvertLogAppendInfo( log.appendAsLeader(records.asInstanceOf[MemoryRecords], @@ -101,18 +103,20 @@ final class KafkaMetadataLog private ( ) } - override def appendAsFollower(records: Records): LogAppendInfo = { - if (records.sizeInBytes == 0) + override def appendAsFollower(records: Records, epoch: Int): LogAppendInfo = { + if (records.sizeInBytes == 0) { throw new IllegalArgumentException("Attempt to append an empty record set") + } - handleAndConvertLogAppendInfo(log.appendAsFollower(records.asInstanceOf[MemoryRecords])) + handleAndConvertLogAppendInfo(log.appendAsFollower(records.asInstanceOf[MemoryRecords], epoch)) } private def handleAndConvertLogAppendInfo(appendInfo: internals.log.LogAppendInfo): LogAppendInfo = { - if (appendInfo.firstOffset != UnifiedLog.UnknownOffset) + if (appendInfo.firstOffset == UnifiedLog.UnknownOffset) { + throw new CorruptRecordException(s"Append failed unexpectedly $appendInfo") + } else { new LogAppendInfo(appendInfo.firstOffset, appendInfo.lastOffset) - else - throw new KafkaException(s"Append failed unexpectedly") + } } override def lastFetchedEpoch: Int = { diff --git a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala index be663d19ec8..0fd9c9333d3 100755 --- a/core/src/main/scala/kafka/server/AbstractFetcherThread.scala +++ b/core/src/main/scala/kafka/server/AbstractFetcherThread.scala @@ -78,9 +78,12 @@ abstract class AbstractFetcherThread(name: String, /* callbacks to be defined in subclass */ // process fetched data - protected def processPartitionData(topicPartition: TopicPartition, - fetchOffset: Long, - partitionData: FetchData): Option[LogAppendInfo] + protected def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] protected def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit @@ -304,7 +307,8 @@ abstract class AbstractFetcherThread(name: String, } } - private def processFetchRequest(sessionPartitions: util.Map[TopicPartition, FetchRequest.PartitionData], + // visible for testing + private[server] def processFetchRequest(sessionPartitions: util.Map[TopicPartition, FetchRequest.PartitionData], fetchRequest: FetchRequest.Builder): Unit = { val partitionsWithError = mutable.Set[TopicPartition]() val divergingEndOffsets = mutable.Map.empty[TopicPartition, EpochEndOffset] @@ -330,10 +334,15 @@ abstract class AbstractFetcherThread(name: String, responseData.foreachEntry { (topicPartition, partitionData) => Option(partitionStates.stateValue(topicPartition)).foreach { currentFetchState => // It's possible that a partition is removed and re-added or truncated when there is a pending fetch request. - // In this case, we only want to process the fetch response if the partition state is ready for fetch and - // the current offset is the same as the offset requested. + // In this case, we only want to process the fetch response if: + // - the partition state is ready for fetch + // - the current offset is the same as the offset requested + // - the current leader epoch is the same as the leader epoch requested val fetchPartitionData = sessionPartitions.get(topicPartition) - if (fetchPartitionData != null && fetchPartitionData.fetchOffset == currentFetchState.fetchOffset && currentFetchState.isReadyForFetch) { + if (fetchPartitionData != null && + fetchPartitionData.fetchOffset == currentFetchState.fetchOffset && + fetchPartitionData.currentLeaderEpoch.map[Boolean](_ == currentFetchState.currentLeaderEpoch).orElse(true) && + currentFetchState.isReadyForFetch) { Errors.forCode(partitionData.errorCode) match { case Errors.NONE => try { @@ -348,10 +357,16 @@ abstract class AbstractFetcherThread(name: String, .setLeaderEpoch(partitionData.divergingEpoch.epoch) .setEndOffset(partitionData.divergingEpoch.endOffset) } else { - // Once we hand off the partition data to the subclass, we can't mess with it any more in this thread + /* Once we hand off the partition data to the subclass, we can't mess with it any more in this thread + * + * When appending batches to the log only append record batches up to the leader epoch when the FETCH + * request was handled. This is done to make sure that logs are not inconsistent because of log + * truncation and append after the FETCH request was handled. See KAFKA-18723 for more details. + */ val logAppendInfoOpt = processPartitionData( topicPartition, currentFetchState.fetchOffset, + currentFetchState.currentLeaderEpoch, partitionData ) diff --git a/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala b/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala index 36c666b6467..fdcbbf5964c 100644 --- a/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala +++ b/core/src/main/scala/kafka/server/BrokerLifecycleManager.scala @@ -28,7 +28,7 @@ import org.apache.kafka.common.protocol.Errors import org.apache.kafka.common.requests.{BrokerHeartbeatRequest, BrokerHeartbeatResponse, BrokerRegistrationRequest, BrokerRegistrationResponse} import org.apache.kafka.metadata.{BrokerState, VersionRange} import org.apache.kafka.queue.EventQueue.DeadlineFunction -import org.apache.kafka.common.utils.{ExponentialBackoff, LogContext, Time} +import org.apache.kafka.common.utils.{LogContext, Time} import org.apache.kafka.queue.{EventQueue, KafkaEventQueue} import org.apache.kafka.server.common.{ControllerRequestCompletionHandler, NodeToControllerChannelManager} @@ -93,18 +93,6 @@ class BrokerLifecycleManager( private val initialTimeoutNs = MILLISECONDS.toNanos(config.initialRegistrationTimeoutMs.longValue()) - /** - * The exponential backoff to use for resending communication. - */ - private val resendExponentialBackoff = - new ExponentialBackoff(100, 2, config.brokerSessionTimeoutMs.toLong / 2, 0.02) - - /** - * The number of times we've tried and failed to communicate. This variable can only be - * read or written from the BrokerToControllerRequestThread. - */ - private var failedAttempts = 0L - /** * The broker incarnation ID. This ID uniquely identifies each time we start the broker */ @@ -449,7 +437,6 @@ class BrokerLifecycleManager( val message = response.responseBody().asInstanceOf[BrokerRegistrationResponse] val errorCode = Errors.forCode(message.data().errorCode()) if (errorCode == Errors.NONE) { - failedAttempts = 0 _brokerEpoch = message.data().brokerEpoch() registered = true initialRegistrationSucceeded = true @@ -523,7 +510,6 @@ class BrokerLifecycleManager( val errorCode = Errors.forCode(message.data().errorCode()) if (errorCode == Errors.NONE) { val responseData = message.data() - failedAttempts = 0 currentOfflineDirs.foreach(cur => offlineDirs.put(cur, true)) _state match { case BrokerState.STARTING => @@ -586,10 +572,9 @@ class BrokerLifecycleManager( } private def scheduleNextCommunicationAfterFailure(): Unit = { - val delayMs = resendExponentialBackoff.backoff(failedAttempts) - failedAttempts = failedAttempts + 1 nextSchedulingShouldBeImmediate = false // never immediately reschedule after a failure - scheduleNextCommunication(NANOSECONDS.convert(delayMs, MILLISECONDS)) + scheduleNextCommunication(NANOSECONDS.convert( + config.brokerHeartbeatIntervalMs.longValue() , MILLISECONDS)) } private def scheduleNextCommunicationAfterSuccess(): Unit = { diff --git a/core/src/main/scala/kafka/server/BrokerServer.scala b/core/src/main/scala/kafka/server/BrokerServer.scala index fd9fd2a2db6..f2fd3a9e0b3 100644 --- a/core/src/main/scala/kafka/server/BrokerServer.scala +++ b/core/src/main/scala/kafka/server/BrokerServer.scala @@ -374,6 +374,10 @@ class BrokerServer( tokenManager = new DelegationTokenManager(config, tokenCache, time) tokenManager.startup() + // Create and initialize an authorizer if one is configured. + authorizer = config.createNewAuthorizer() + authorizer.foreach(_.configure(config.originals)) + /* initializing the groupConfigManager */ groupConfigManager = new GroupConfigManager(config.groupCoordinatorConfig.extractGroupConfigMap(config.shareGroupConfig)) @@ -417,7 +421,7 @@ class BrokerServer( config, "heartbeat", s"broker-${config.nodeId}-", - config.brokerSessionTimeoutMs / 2 // KAFKA-14392 + config.brokerHeartbeatIntervalMs ) lifecycleManager.start( () => sharedServer.loader.lastAppliedOffset(), @@ -428,10 +432,6 @@ class BrokerServer( logManager.readBrokerEpochFromCleanShutdownFiles() ) - // Create and initialize an authorizer if one is configured. - authorizer = config.createNewAuthorizer() - authorizer.foreach(_.configure(config.originals)) - // The FetchSessionCache is divided into config.numIoThreads shards, each responsible // for Math.max(1, shardNum * sessionIdRange) <= sessionId < (shardNum + 1) * sessionIdRange val sessionIdRange = Int.MaxValue / NumFetchSessionCacheShards diff --git a/core/src/main/scala/kafka/server/DelayedRemoteFetch.scala b/core/src/main/scala/kafka/server/DelayedRemoteFetch.scala index 45bfe69844a..e6bdce63e68 100644 --- a/core/src/main/scala/kafka/server/DelayedRemoteFetch.scala +++ b/core/src/main/scala/kafka/server/DelayedRemoteFetch.scala @@ -87,8 +87,9 @@ class DelayedRemoteFetch(remoteFetchTask: Future[Void], } override def onExpiration(): Unit = { - // cancel the remote storage read task, if it has not been executed yet - val cancelled = remoteFetchTask.cancel(true) + // cancel the remote storage read task, if it has not been executed yet and + // avoid interrupting the task if it is already running as it may force closing opened/cached resources as transaction index. + val cancelled = remoteFetchTask.cancel(false) if (!cancelled) debug(s"Remote fetch task for RemoteStorageFetchInfo: $remoteFetchInfo could not be cancelled and its isDone value is ${remoteFetchTask.isDone}") DelayedRemoteFetchMetrics.expiredRequestMeter.mark() diff --git a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala index 56492de3485..5f5373b3641 100644 --- a/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala +++ b/core/src/main/scala/kafka/server/ReplicaAlterLogDirsThread.scala @@ -66,9 +66,12 @@ class ReplicaAlterLogDirsThread(name: String, } // process fetched data - override def processPartitionData(topicPartition: TopicPartition, - fetchOffset: Long, - partitionData: FetchData): Option[LogAppendInfo] = { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = { val partition = replicaMgr.getPartitionOrException(topicPartition) val futureLog = partition.futureLocalLogOrException val records = toMemoryRecords(FetchResponse.recordsOrFail(partitionData)) @@ -78,7 +81,7 @@ class ReplicaAlterLogDirsThread(name: String, topicPartition, fetchOffset, futureLog.logEndOffset)) val logAppendInfo = if (records.sizeInBytes() > 0) - partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = true) + partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = true, partitionLeaderEpoch) else None diff --git a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala index 7f0c6d41dbd..4c11301c567 100644 --- a/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala +++ b/core/src/main/scala/kafka/server/ReplicaFetcherThread.scala @@ -98,9 +98,12 @@ class ReplicaFetcherThread(name: String, } // process fetched data - override def processPartitionData(topicPartition: TopicPartition, - fetchOffset: Long, - partitionData: FetchData): Option[LogAppendInfo] = { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = { val logTrace = isTraceEnabled val partition = replicaMgr.getPartitionOrException(topicPartition) val log = partition.localLogOrException @@ -117,7 +120,7 @@ class ReplicaFetcherThread(name: String, .format(log.logEndOffset, topicPartition, records.sizeInBytes, partitionData.highWatermark)) // Append the leader's messages to the log - val logAppendInfo = partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false) + val logAppendInfo = partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false, partitionLeaderEpoch) if (logTrace) trace("Follower has replica log end offset %d after appending %d bytes of messages for partition %s" diff --git a/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala b/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala index 1985f04348f..4a7ae84c2d5 100644 --- a/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala +++ b/core/src/main/scala/kafka/server/metadata/BrokerMetadataPublisher.scala @@ -250,6 +250,11 @@ class BrokerMetadataPublisher( /** * Update the coordinator of local replica changes: election and resignation. * + * When the topic is deleted or a partition of the topic is deleted, {@param resignation} + * callback must be called with {@code None}. The coordinator expects the leader epoch to be + * incremented when the {@param resignation} callback is called but the leader epoch + * is not incremented when a topic is deleted. + * * @param image latest metadata image * @param delta metadata delta from the previous image and the latest image * @param topicName name of the topic associated with the coordinator @@ -270,7 +275,7 @@ class BrokerMetadataPublisher( if (topicsDelta.topicWasDeleted(topicName)) { topicsDelta.image.getTopic(topicName).partitions.entrySet.forEach { entry => if (entry.getValue.leader == brokerId) { - resignation(entry.getKey, Some(entry.getValue.leaderEpoch)) + resignation(entry.getKey, None) } } } diff --git a/core/src/test/java/kafka/log/remote/RemoteLogManagerTest.java b/core/src/test/java/kafka/log/remote/RemoteLogManagerTest.java index 7f536c9872f..3943ff9289a 100644 --- a/core/src/test/java/kafka/log/remote/RemoteLogManagerTest.java +++ b/core/src/test/java/kafka/log/remote/RemoteLogManagerTest.java @@ -1156,7 +1156,7 @@ void testRemoteLogManagerRemoteMetrics() throws Exception { safeLongYammerMetricValue("RemoteLogSizeComputationTime,topic=" + leaderTopic), safeLongYammerMetricValue("RemoteLogSizeComputationTime"))); remoteLogSizeComputationTimeLatch.countDown(); - + TestUtils.waitForCondition( () -> 0 == safeLongYammerMetricValue("RemoteCopyLagBytes") && 0 == safeLongYammerMetricValue("RemoteCopyLagBytes,topic=" + leaderTopic), String.format("Expected to find 0 for RemoteCopyLagBytes metric value, but found %d for topic 'Leader' and %d for all topics.", @@ -3713,6 +3713,15 @@ public void testRLMOpsWhenMetadataIsNotReady() throws InterruptedException { verifyNoMoreInteractions(remoteStorageManager); } + @Test + void testUpdateRemoteStorageReaderThreads() { + assertEquals(10, remoteLogManager.readerThreadPoolSize()); + remoteLogManager.resizeReaderThreadPool(6); + assertEquals(6, remoteLogManager.readerThreadPoolSize()); + remoteLogManager.resizeReaderThreadPool(12); + assertEquals(12, remoteLogManager.readerThreadPoolSize()); + } + private void appendRecordsToFile(File file, int nRecords, int nRecordsPerBatch) throws IOException { byte magic = RecordBatch.CURRENT_MAGIC_VALUE; Compression compression = Compression.NONE; diff --git a/core/src/test/java/kafka/server/LogManagerIntegrationTest.java b/core/src/test/java/kafka/server/LogManagerIntegrationTest.java index 3d386283943..752f56fa418 100644 --- a/core/src/test/java/kafka/server/LogManagerIntegrationTest.java +++ b/core/src/test/java/kafka/server/LogManagerIntegrationTest.java @@ -34,9 +34,11 @@ import org.apache.kafka.common.test.ClusterInstance; import org.apache.kafka.common.test.api.ClusterTest; import org.apache.kafka.common.test.api.Type; +import org.apache.kafka.storage.internals.checkpoint.CleanShutdownFileHandler; import org.apache.kafka.storage.internals.checkpoint.PartitionMetadataFile; import org.apache.kafka.test.TestUtils; +import java.io.File; import java.io.IOException; import java.time.Duration; import java.util.ArrayList; @@ -59,6 +61,70 @@ public LogManagerIntegrationTest(ClusterInstance cluster) { this.cluster = cluster; } + @ClusterTest(types = {Type.KRAFT}) + public void testIOExceptionOnLogSegmentCloseResultsInRecovery() throws IOException, InterruptedException, ExecutionException { + try (Admin admin = cluster.admin()) { + admin.createTopics(List.of(new NewTopic("foo", 1, (short) 1))).all().get(); + } + cluster.waitForTopic("foo", 1); + + // Produce some data into the topic + Map producerConfigs = Map.of( + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, cluster.bootstrapServers(), + ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName(), + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName() + ); + + try (Producer producer = new KafkaProducer<>(producerConfigs)) { + producer.send(new ProducerRecord<>("foo", 0, null, "bar")).get(); + producer.flush(); + } + + var broker = cluster.brokers().get(0); + + File timeIndexFile = broker.logManager() + .getLog(new TopicPartition("foo", 0), false).get() + .activeSegment() + .timeIndexFile(); + + // Set read only so that we throw an IOException on shutdown + assertTrue(timeIndexFile.exists()); + assertTrue(timeIndexFile.setReadOnly()); + + broker.shutdown(); + + assertEquals(1, broker.config().logDirs().size()); + String logDir = broker.config().logDirs().head(); + CleanShutdownFileHandler cleanShutdownFileHandler = new CleanShutdownFileHandler(logDir); + assertFalse(cleanShutdownFileHandler.exists(), "Did not expect the clean shutdown file to exist"); + + // Ensure we have a corrupt index on broker shutdown + long maxIndexSize = broker.config().logIndexSizeMaxBytes(); + long expectedIndexSize = 12 * (maxIndexSize / 12); + assertEquals(expectedIndexSize, timeIndexFile.length()); + + // Allow write permissions before startup + assertTrue(timeIndexFile.setWritable(true)); + + broker.startup(); + // make sure there is no error during load logs + assertTrue(cluster.firstFatalException().isEmpty()); + try (Admin admin = cluster.admin()) { + TestUtils.waitForCondition(() -> { + List partitionInfos = admin.describeTopics(List.of("foo")) + .topicNameValues().get("foo").get().partitions(); + return partitionInfos.get(0).leader().id() == 0; + }, "Partition does not have a leader assigned"); + } + + // Ensure that sanity check does not fail + broker.logManager() + .getLog(new TopicPartition("foo", 0), false).get() + .activeSegment() + .timeIndex() + .sanityCheck(); + } + @ClusterTest(types = {Type.KRAFT, Type.CO_KRAFT}, brokers = 3) public void testRestartBrokerNoErrorIfMissingPartitionMetadata() throws IOException, ExecutionException, InterruptedException { diff --git a/core/src/test/scala/integration/kafka/admin/RemoteTopicCrudTest.scala b/core/src/test/scala/integration/kafka/admin/RemoteTopicCrudTest.scala index d89a83c7750..4f66dd9e311 100644 --- a/core/src/test/scala/integration/kafka/admin/RemoteTopicCrudTest.scala +++ b/core/src/test/scala/integration/kafka/admin/RemoteTopicCrudTest.scala @@ -441,7 +441,36 @@ class RemoteTopicCrudTest extends IntegrationTestHarness { AlterConfigOp.OpType.SET), )) assertThrowsException(classOf[InvalidConfigurationException], - () => admin.incrementalAlterConfigs(configs).all().get(), "Disabling remote storage feature on the topic level is not supported.") + () => admin.incrementalAlterConfigs(configs).all().get(), "It is invalid to disable remote storage without deleting remote data. " + + "If you want to keep the remote data and turn to read only, please set `remote.storage.enable=true,remote.log.copy.disable=true`. " + + "If you want to disable remote storage and delete all remote data, please set `remote.storage.enable=false,remote.log.delete.on.disable=true`.") + } + + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testUpdateTopicConfigWithDisablingRemoteStorageWithDeleteOnDisable(quorum: String): Unit = { + val admin = createAdminClient() + val topicConfig = new Properties + topicConfig.setProperty(TopicConfig.REMOTE_LOG_STORAGE_ENABLE_CONFIG, "true") + TestUtils.createTopicWithAdmin(admin, testTopicName, brokers, controllerServers, numPartitions, numReplicationFactor, + topicConfig = topicConfig) + + val configs = new util.HashMap[ConfigResource, util.Collection[AlterConfigOp]]() + configs.put(new ConfigResource(ConfigResource.Type.TOPIC, testTopicName), + util.Arrays.asList( + new AlterConfigOp(new ConfigEntry(TopicConfig.REMOTE_LOG_STORAGE_ENABLE_CONFIG, "false"), + AlterConfigOp.OpType.SET), + new AlterConfigOp(new ConfigEntry(TopicConfig.REMOTE_LOG_DELETE_ON_DISABLE_CONFIG, "true"), + AlterConfigOp.OpType.SET) + )) + admin.incrementalAlterConfigs(configs).all().get() + + val newProps = new Properties() + configs.get(new ConfigResource(ConfigResource.Type.TOPIC, testTopicName)).forEach { op => + newProps.setProperty(op.configEntry().name(), op.configEntry().value()) + } + + verifyRemoteLogTopicConfigs(newProps) } @ParameterizedTest diff --git a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala index fc74344c863..671386a2824 100644 --- a/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/AuthorizerIntegrationTest.scala @@ -37,7 +37,8 @@ import org.apache.kafka.common.message.JoinGroupRequestData.JoinGroupRequestProt import org.apache.kafka.common.message.LeaveGroupRequestData.MemberIdentity import org.apache.kafka.common.message.ListOffsetsRequestData.{ListOffsetsPartition, ListOffsetsTopic} import org.apache.kafka.common.message.OffsetForLeaderEpochRequestData.{OffsetForLeaderPartition, OffsetForLeaderTopic, OffsetForLeaderTopicCollection} -import org.apache.kafka.common.message.{AddOffsetsToTxnRequestData, AlterPartitionReassignmentsRequestData, AlterReplicaLogDirsRequestData, ConsumerGroupDescribeRequestData, ConsumerGroupHeartbeatRequestData, CreateAclsRequestData, CreatePartitionsRequestData, CreateTopicsRequestData, DeleteAclsRequestData, DeleteGroupsRequestData, DeleteRecordsRequestData, DeleteTopicsRequestData, DescribeClusterRequestData, DescribeConfigsRequestData, DescribeGroupsRequestData, DescribeLogDirsRequestData, DescribeProducersRequestData, DescribeTransactionsRequestData, FetchResponseData, FindCoordinatorRequestData, HeartbeatRequestData, IncrementalAlterConfigsRequestData, JoinGroupRequestData, ListPartitionReassignmentsRequestData, ListTransactionsRequestData, MetadataRequestData, OffsetCommitRequestData, ProduceRequestData, SyncGroupRequestData, WriteTxnMarkersRequestData} +import org.apache.kafka.common.message.{AddOffsetsToTxnRequestData, AlterPartitionReassignmentsRequestData, AlterReplicaLogDirsRequestData, ConsumerGroupDescribeRequestData, ConsumerGroupHeartbeatRequestData, ConsumerGroupHeartbeatResponseData, CreateAclsRequestData, CreatePartitionsRequestData, CreateTopicsRequestData, DeleteAclsRequestData, DeleteGroupsRequestData, DeleteRecordsRequestData, DeleteTopicsRequestData, DescribeClusterRequestData, DescribeConfigsRequestData, DescribeGroupsRequestData, DescribeLogDirsRequestData, DescribeProducersRequestData, DescribeTransactionsRequestData, FetchResponseData, FindCoordinatorRequestData, HeartbeatRequestData, IncrementalAlterConfigsRequestData, JoinGroupRequestData, ListPartitionReassignmentsRequestData, ListTransactionsRequestData, MetadataRequestData, OffsetCommitRequestData, ProduceRequestData, SyncGroupRequestData, WriteTxnMarkersRequestData} +import org.apache.kafka.common.network.ListenerName import org.apache.kafka.common.protocol.{ApiKeys, Errors} import org.apache.kafka.common.record.{MemoryRecords, RecordBatch, SimpleRecord} import org.apache.kafka.common.requests.OffsetFetchResponse.PartitionData @@ -2546,6 +2547,118 @@ class AuthorizerIntegrationTest extends AbstractAuthorizerIntegrationTest { sendRequestAndVerifyResponseError(request, resource, isAuthorized = false) } + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testConsumerGroupHeartbeaWithRegex(quorum: String): Unit = { + createTopicWithBrokerPrincipal(topic) + val allowAllOpsAcl = new AccessControlEntry(clientPrincipalString, WILDCARD_HOST, ALL, ALLOW) + addAndVerifyAcls(Set(allowAllOpsAcl), groupResource) + addAndVerifyAcls(Set(allowAllOpsAcl), topicResource) + + val response = sendAndReceiveFirstRegexHeartbeat(Uuid.randomUuid.toString, listenerName) + sendAndReceiveRegexHeartbeat(response, listenerName, Some(1)) + } + + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testConsumerGroupHeartbeaWithRegexWithoutTopicDescribeAcl(quorum: String): Unit = { + createTopicWithBrokerPrincipal(topic) + val allowAllOpsAcl = new AccessControlEntry(clientPrincipalString, WILDCARD_HOST, ALL, ALLOW) + addAndVerifyAcls(Set(allowAllOpsAcl), groupResource) + + val response = sendAndReceiveFirstRegexHeartbeat(Uuid.randomUuid.toString, listenerName) + sendAndReceiveRegexHeartbeat(response, listenerName, None) + } + + @ParameterizedTest + @ValueSource(strings = Array("kraft")) + def testConsumerGroupHeartbeaWithRegexWithDifferentMemberAcls(quorum: String): Unit = { + createTopicWithBrokerPrincipal(topic, numPartitions = 2) + val allowAllOpsAcl = new AccessControlEntry(clientPrincipalString, WILDCARD_HOST, ALL, ALLOW) + addAndVerifyAcls(Set(allowAllOpsAcl), groupResource) + + // Member on inter-broker listener has all access and is assigned the matching topic + var member1Response = sendAndReceiveFirstRegexHeartbeat("memberWithAllAccess", interBrokerListenerName) + member1Response = sendAndReceiveRegexHeartbeat(member1Response, interBrokerListenerName, Some(2)) + + // Member on client listener has no topic describe access, but is assigned a partition of the + // unauthorized topic. This is leaking unauthorized topic metadata to member2. Simply filtering out + // the topic from the assignment in the response is not sufficient since different assignment states + // in the broker and client can lead to other issues. This needs to be fixed properly by using + // member permissions while computing assignments. + var member2Response = sendAndReceiveFirstRegexHeartbeat("memberWithLimitedAccess", listenerName) + member1Response = sendAndReceiveRegexHeartbeat(member1Response, interBrokerListenerName, Some(1)) + member1Response = sendAndReceiveRegexHeartbeat(member1Response, interBrokerListenerName, None, fullRequest = true) + member2Response = sendAndReceiveRegexHeartbeat(member2Response, listenerName, Some(1)) + + // Create another topic and send heartbeats on member1 to trigger regex refresh + createTopicWithBrokerPrincipal("topic2", numPartitions = 2) + TestUtils.retry(15000) { + member1Response = sendAndReceiveRegexHeartbeat(member1Response, interBrokerListenerName, Some(2)) + } + // This is leaking unauthorized topic metadata to member2. + member2Response = sendAndReceiveRegexHeartbeat(member2Response, listenerName, Some(2)) + + // Create another topic and send heartbeats on member2 to trigger regex refresh + createTopicWithBrokerPrincipal("topic3", numPartitions = 2) + TestUtils.retry(15000) { + member2Response = sendAndReceiveRegexHeartbeat(member2Response, listenerName, Some(0), fullRequest = true) + } + // This removes all topics from member1 since member2's permissions were used to refresh regex. + sendAndReceiveRegexHeartbeat(member1Response, interBrokerListenerName, Some(0), fullRequest = true) + } + + private def sendAndReceiveFirstRegexHeartbeat(memberId: String, + listenerName: ListenerName): ConsumerGroupHeartbeatResponseData = { + val request = new ConsumerGroupHeartbeatRequest.Builder( + new ConsumerGroupHeartbeatRequestData() + .setGroupId(group) + .setMemberId(memberId) + .setMemberEpoch(0) + .setRebalanceTimeoutMs(5 * 60 * 1000) + .setTopicPartitions(Collections.emptyList()) + .setSubscribedTopicRegex("^top.*")).build() + val resource = Set[ResourceType](GROUP, TOPIC) + val response = sendRequestAndVerifyResponseError(request, resource, isAuthorized = true, listenerName = listenerName) + .data.asInstanceOf[ConsumerGroupHeartbeatResponseData] + assertEquals(Errors.NONE.code, response.errorCode, s"Unexpected response $response") + assertEquals(0, response.assignment.topicPartitions.size, s"Unexpected assignment $response") + response + } + + private def sendAndReceiveRegexHeartbeat(lastResponse: ConsumerGroupHeartbeatResponseData, + listenerName: ListenerName, + expectedAssignmentSize: Option[Int], + fullRequest: Boolean = false): ConsumerGroupHeartbeatResponseData = { + var data = new ConsumerGroupHeartbeatRequestData() + .setGroupId(group) + .setMemberId(lastResponse.memberId) + .setMemberEpoch(lastResponse.memberEpoch) + if (fullRequest) { + val partitions = Option(lastResponse.assignment).map(_.topicPartitions.asScala.map(p => + new ConsumerGroupHeartbeatRequestData.TopicPartitions() + .setTopicId(p.topicId) + .setPartitions(p.partitions) + )).getOrElse(List()) + data = data + .setTopicPartitions(partitions.asJava) + .setSubscribedTopicRegex("^top.*") + } + val request = new ConsumerGroupHeartbeatRequest.Builder(data).build() + val resource = Set[ResourceType](GROUP, TOPIC) + val response = sendRequestAndVerifyResponseError(request, resource, isAuthorized = true, listenerName = listenerName) + .data.asInstanceOf[ConsumerGroupHeartbeatResponseData] + assertEquals(Errors.NONE.code, response.errorCode, s"Unexpected response $response") + expectedAssignmentSize match { + case Some(size) => + assertNotNull(response.assignment, s"Unexpected assignment $response") + assertEquals(size, response.assignment.topicPartitions.asScala.map(_.partitions.size).sum, s"Unexpected assignment $response") + case None => + assertNull(response.assignment, s"Unexpected assignment $response") + } + response + } + private def createConsumerGroupToDescribe(): Unit = { createTopicWithBrokerPrincipal(topic) addAndVerifyAcls(Set(new AccessControlEntry(clientPrincipalString, WILDCARD_HOST, READ, ALLOW)), groupResource) @@ -2650,9 +2763,10 @@ class AuthorizerIntegrationTest extends AbstractAuthorizerIntegrationTest { resources: Set[ResourceType], isAuthorized: Boolean, topicExists: Boolean = true, - topicNames: Map[Uuid, String] = getTopicNames()): AbstractResponse = { + topicNames: Map[Uuid, String] = getTopicNames(), + listenerName: ListenerName = listenerName): AbstractResponse = { val apiKey = request.apiKey - val response = connectAndReceive[AbstractResponse](request) + val response = connectAndReceive[AbstractResponse](request, listenerName = listenerName) val error = requestKeyToError(topicNames, request.version())(apiKey).asInstanceOf[AbstractResponse => Errors](response) val authorizationErrors = resources.flatMap { resourceType => diff --git a/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala b/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala index cbd69baedc7..8d1399a1ebd 100644 --- a/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala +++ b/core/src/test/scala/integration/kafka/api/GroupCoordinatorIntegrationTest.scala @@ -17,8 +17,8 @@ import org.apache.kafka.common.test.api.{ClusterConfigProperty, ClusterTest, Typ import kafka.utils.TestUtils import org.apache.kafka.clients.admin.{Admin, ConsumerGroupDescription} import org.apache.kafka.clients.consumer.{Consumer, GroupProtocol, OffsetAndMetadata} -import org.apache.kafka.common.errors.GroupIdNotFoundException -import org.apache.kafka.common.{ConsumerGroupState, GroupType, KafkaFuture, TopicPartition} +import org.apache.kafka.common.errors.{GroupIdNotFoundException, UnknownTopicOrPartitionException} +import org.apache.kafka.common.{ConsumerGroupState, GroupType, KafkaFuture, TopicCollection, TopicPartition} import org.junit.jupiter.api.Assertions._ import scala.jdk.CollectionConverters._ @@ -27,11 +27,12 @@ import org.apache.kafka.common.record.CompressionType import org.apache.kafka.common.test.ClusterInstance import org.apache.kafka.coordinator.group.GroupCoordinatorConfig import org.apache.kafka.server.config.ServerConfigs +import org.apache.kafka.test.{TestUtils => JTestUtils} import org.junit.jupiter.api.Timeout import java.time.Duration import java.util.Collections -import java.util.concurrent.TimeUnit +import java.util.concurrent.{ExecutionException, TimeUnit} @Timeout(120) class GroupCoordinatorIntegrationTest(cluster: ClusterInstance) { @@ -278,6 +279,58 @@ class GroupCoordinatorIntegrationTest(cluster: ClusterInstance) { } } + @ClusterTest( + types = Array(Type.KRAFT), + serverProperties = Array( + new ClusterConfigProperty(key = GroupCoordinatorConfig.OFFSETS_TOPIC_PARTITIONS_CONFIG, value = "1"), + new ClusterConfigProperty(key = GroupCoordinatorConfig.OFFSETS_TOPIC_REPLICATION_FACTOR_CONFIG, value = "1") + ) + ) + def testRecreatingConsumerOffsetsTopic(): Unit = { + withAdmin { admin => + TestUtils.createTopicWithAdminRaw( + admin = admin, + topic = "foo", + numPartitions = 3 + ) + + withConsumer(groupId = "group", groupProtocol = GroupProtocol.CONSUMER) { consumer => + consumer.subscribe(List("foo").asJava) + TestUtils.waitUntilTrue(() => { + consumer.poll(Duration.ofMillis(50)) + consumer.assignment().asScala.nonEmpty + }, msg = "Consumer did not get an non empty assignment") + } + + admin + .deleteTopics(TopicCollection.ofTopicNames(List(Topic.GROUP_METADATA_TOPIC_NAME).asJava)) + .all() + .get() + + TestUtils.waitUntilTrue(() => { + try { + admin + .describeTopics(TopicCollection.ofTopicNames(List(Topic.GROUP_METADATA_TOPIC_NAME).asJava)) + .topicNameValues() + .get(Topic.GROUP_METADATA_TOPIC_NAME) + .get(JTestUtils.DEFAULT_MAX_WAIT_MS, TimeUnit.MILLISECONDS) + false + } catch { + case e: ExecutionException => + e.getCause.isInstanceOf[UnknownTopicOrPartitionException] + } + }, msg = s"${Topic.GROUP_METADATA_TOPIC_NAME} was not deleted") + + withConsumer(groupId = "group", groupProtocol = GroupProtocol.CONSUMER) { consumer => + consumer.subscribe(List("foo").asJava) + TestUtils.waitUntilTrue(() => { + consumer.poll(Duration.ofMillis(50)) + consumer.assignment().asScala.nonEmpty + }, msg = "Consumer did not get an non empty assignment") + } + } + } + private def rollAndCompactConsumerOffsets(): Unit = { val tp = new TopicPartition("__consumer_offsets", 0) val broker = cluster.brokers.asScala.head._2 diff --git a/core/src/test/scala/integration/kafka/api/TransactionsTest.scala b/core/src/test/scala/integration/kafka/api/TransactionsTest.scala index 74737668127..c531f62595a 100644 --- a/core/src/test/scala/integration/kafka/api/TransactionsTest.scala +++ b/core/src/test/scala/integration/kafka/api/TransactionsTest.scala @@ -21,7 +21,7 @@ import kafka.utils.TestUtils.{consumeRecords, waitUntilTrue} import kafka.utils.{TestInfoUtils, TestUtils} import org.apache.kafka.clients.consumer._ import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} -import org.apache.kafka.common.TopicPartition +import org.apache.kafka.common.{KafkaException, TopicPartition} import org.apache.kafka.common.errors.{ConcurrentTransactionsException, InvalidProducerEpochException, ProducerFencedException, TimeoutException} import org.apache.kafka.common.test.api.Flaky import org.apache.kafka.coordinator.group.GroupCoordinatorConfig @@ -738,6 +738,19 @@ class TransactionsTest extends IntegrationTestHarness { restartDeadBrokers() org.apache.kafka.test.TestUtils.assertFutureThrows(failedFuture, classOf[TimeoutException]) + // Ensure the producer transitions to abortable_error state. + TestUtils.waitUntilTrue(() => { + var failed = false + try { + producer.send(TestUtils.producerRecordWithExpectedTransactionStatus(testTopic, 0, "3", "3", willBeCommitted = false)) + } catch { + case e: Exception => + if (e.isInstanceOf[KafkaException]) + failed = true + } + failed + }, "The send request never failed as expected.") + assertThrows(classOf[KafkaException], () => producer.send(TestUtils.producerRecordWithExpectedTransactionStatus(testTopic, 0, "3", "3", willBeCommitted = false))) producer.abortTransaction() producer.beginTransaction() @@ -760,7 +773,7 @@ class TransactionsTest extends IntegrationTestHarness { producerStateEntry = brokers(partitionLeader).logManager.getLog(new TopicPartition(testTopic, 0)).get.producerStateManager.activeProducers.get(producerId) assertNotNull(producerStateEntry) - assertTrue(producerStateEntry.producerEpoch > initialProducerEpoch) + assertTrue(producerStateEntry.producerEpoch > initialProducerEpoch, "InitialProduceEpoch: " + initialProducerEpoch + " ProducerStateEntry: " + producerStateEntry) } finally { producer.close(Duration.ZERO) } diff --git a/core/src/test/scala/integration/kafka/server/DelayedRemoteFetchTest.scala b/core/src/test/scala/integration/kafka/server/DelayedRemoteFetchTest.scala index 264f5310c2d..b3f032e3dba 100644 --- a/core/src/test/scala/integration/kafka/server/DelayedRemoteFetchTest.scala +++ b/core/src/test/scala/integration/kafka/server/DelayedRemoteFetchTest.scala @@ -200,7 +200,7 @@ class DelayedRemoteFetchTest { delayedRemoteFetch.run() // Check that the task was cancelled and force-completed - verify(remoteFetchTask).cancel(true) + verify(remoteFetchTask).cancel(false) assertTrue(delayedRemoteFetch.isCompleted) // Check that the ExpiresPerSec metric was incremented diff --git a/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala b/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala index 17a75080ba1..4fe4fb48cd8 100644 --- a/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala +++ b/core/src/test/scala/integration/kafka/server/KRaftClusterTest.scala @@ -65,6 +65,7 @@ import java.util.{Collections, Optional, OptionalLong, Properties} import scala.collection.{Seq, mutable} import scala.concurrent.duration.{FiniteDuration, MILLISECONDS, SECONDS} import scala.jdk.CollectionConverters._ +import scala.util.Using @Timeout(120) @Tag("integration") @@ -1621,6 +1622,51 @@ class KRaftClusterTest { } } + /** + * Test that once a cluster is formatted, a bootstrap.metadata file that contains an unsupported + * MetadataVersion is not a problem. This is a regression test for KAFKA-19192. + */ + @Test + def testOldBootstrapMetadataFile(): Unit = { + val baseDirectory = TestUtils.tempDir().toPath() + Using.resource(new KafkaClusterTestKit.Builder( + new TestKitNodes.Builder(). + setNumBrokerNodes(1). + setNumControllerNodes(1). + setBaseDirectory(baseDirectory). + build()). + setDeleteOnClose(false). + build() + ) { cluster => + cluster.format() + cluster.startup() + cluster.waitForReadyBrokers() + } + val oldBootstrapMetadata = BootstrapMetadata.fromRecords( + util.Arrays.asList( + new ApiMessageAndVersion( + new FeatureLevelRecord(). + setName(MetadataVersion.FEATURE_NAME). + setFeatureLevel(1), + 0.toShort) + ), + "oldBootstrapMetadata") + // Re-create the cluster using the same directory structure as above. + // Since we do not need to use the bootstrap metadata, the fact that + // it specifies an obsolete metadata.version should not be a problem. + Using.resource(new KafkaClusterTestKit.Builder( + new TestKitNodes.Builder(). + setNumBrokerNodes(1). + setNumControllerNodes(1). + setBaseDirectory(baseDirectory). + setBootstrapMetadata(oldBootstrapMetadata). + build()).build() + ) { cluster => + cluster.startup() + cluster.waitForReadyBrokers() + } + } + @Test def testIncreaseNumIoThreads(): Unit = { val cluster = new KafkaClusterTestKit.Builder( diff --git a/core/src/test/scala/kafka/raft/KafkaMetadataLogTest.scala b/core/src/test/scala/kafka/raft/KafkaMetadataLogTest.scala index 285560d3826..8cca9202ebc 100644 --- a/core/src/test/scala/kafka/raft/KafkaMetadataLogTest.scala +++ b/core/src/test/scala/kafka/raft/KafkaMetadataLogTest.scala @@ -20,9 +20,12 @@ import kafka.log.UnifiedLog import kafka.server.{KafkaConfig, KafkaRaftServer} import kafka.utils.TestUtils import org.apache.kafka.common.compress.Compression +import org.apache.kafka.common.errors.CorruptRecordException import org.apache.kafka.common.errors.{InvalidConfigurationException, RecordTooLargeException} import org.apache.kafka.common.protocol import org.apache.kafka.common.protocol.{ObjectSerializationCache, Writable} +import org.apache.kafka.common.record.ArbitraryMemoryRecords +import org.apache.kafka.common.record.InvalidMemoryRecordsProvider import org.apache.kafka.common.record.{MemoryRecords, SimpleRecord} import org.apache.kafka.common.utils.Utils import org.apache.kafka.raft._ @@ -34,7 +37,14 @@ import org.apache.kafka.snapshot.{FileRawSnapshotWriter, RawSnapshotReader, RawS import org.apache.kafka.storage.internals.log.{LogConfig, LogStartOffsetIncrementReason} import org.apache.kafka.test.TestUtils.assertOptional import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.function.Executable import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} +import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ArgumentsSource + +import net.jqwik.api.AfterFailureMode +import net.jqwik.api.ForAll +import net.jqwik.api.Property import java.io.File import java.nio.ByteBuffer @@ -109,12 +119,93 @@ final class KafkaMetadataLogTest { classOf[RuntimeException], () => { log.appendAsFollower( - MemoryRecords.withRecords(initialOffset, Compression.NONE, currentEpoch, recordFoo) + MemoryRecords.withRecords(initialOffset, Compression.NONE, currentEpoch, recordFoo), + currentEpoch ) } ) } + @Test + def testEmptyAppendNotAllowed(): Unit = { + val log = buildMetadataLog(tempDir, mockTime) + + assertThrows(classOf[IllegalArgumentException], () => log.appendAsFollower(MemoryRecords.EMPTY, 1)); + assertThrows(classOf[IllegalArgumentException], () => log.appendAsLeader(MemoryRecords.EMPTY, 1)); + } + + @ParameterizedTest + @ArgumentsSource(classOf[InvalidMemoryRecordsProvider]) + def testInvalidMemoryRecords(records: MemoryRecords, expectedException: Optional[Class[Exception]]): Unit = { + val log = buildMetadataLog(tempDir, mockTime) + val previousEndOffset = log.endOffset().offset() + + val action: Executable = () => log.appendAsFollower(records, Int.MaxValue) + if (expectedException.isPresent()) { + assertThrows(expectedException.get, action) + } else { + assertThrows(classOf[CorruptRecordException], action) + } + + assertEquals(previousEndOffset, log.endOffset().offset()) + } + + @Property(tries = 100, afterFailure = AfterFailureMode.SAMPLE_ONLY) + def testRandomRecords( + @ForAll(supplier = classOf[ArbitraryMemoryRecords]) records: MemoryRecords + ): Unit = { + val tempDir = TestUtils.tempDir() + try { + val log = buildMetadataLog(tempDir, mockTime) + val previousEndOffset = log.endOffset().offset() + + assertThrows( + classOf[CorruptRecordException], + () => log.appendAsFollower(records, Int.MaxValue) + ) + + assertEquals(previousEndOffset, log.endOffset().offset()) + } finally { + Utils.delete(tempDir) + } + } + + @Test + def testInvalidLeaderEpoch(): Unit = { + val log = buildMetadataLog(tempDir, mockTime) + val previousEndOffset = log.endOffset().offset() + val epoch = log.lastFetchedEpoch() + 1 + val numberOfRecords = 10 + + val batchWithValidEpoch = MemoryRecords.withRecords( + previousEndOffset, + Compression.NONE, + epoch, + (0 until numberOfRecords).map(number => new SimpleRecord(number.toString.getBytes)): _* + ) + + val batchWithInvalidEpoch = MemoryRecords.withRecords( + previousEndOffset + numberOfRecords, + Compression.NONE, + epoch + 1, + (0 until numberOfRecords).map(number => new SimpleRecord(number.toString.getBytes)): _* + ) + + val buffer = ByteBuffer.allocate(batchWithValidEpoch.sizeInBytes() + batchWithInvalidEpoch.sizeInBytes()) + buffer.put(batchWithValidEpoch.buffer()) + buffer.put(batchWithInvalidEpoch.buffer()) + buffer.flip() + + val records = MemoryRecords.readableRecords(buffer) + + log.appendAsFollower(records, epoch) + + // Check that only the first batch was appended + assertEquals(previousEndOffset + numberOfRecords, log.endOffset().offset()) + // Check that the last fetched epoch matches the first batch + assertEquals(epoch, log.lastFetchedEpoch()) + } + @Test def testCreateSnapshot(): Unit = { val numberOfRecords = 10 @@ -1062,4 +1153,4 @@ object KafkaMetadataLogTest { } dir } -} \ No newline at end of file +} diff --git a/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala b/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala index b559189f394..c10f5aab42c 100644 --- a/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala +++ b/core/src/test/scala/unit/kafka/cluster/PartitionTest.scala @@ -426,6 +426,7 @@ class PartitionTest extends AbstractPartitionTest { def testMakeFollowerWithWithFollowerAppendRecords(): Unit = { val appendSemaphore = new Semaphore(0) val mockTime = new MockTime() + val prevLeaderEpoch = 0 partition = new Partition( topicPartition, @@ -478,24 +479,38 @@ class PartitionTest extends AbstractPartitionTest { } partition.createLogIfNotExists(isNew = true, isFutureReplica = false, offsetCheckpoints, None) + var partitionState = new LeaderAndIsrRequest.PartitionState() + .setControllerEpoch(0) + .setLeader(2) + .setLeaderEpoch(prevLeaderEpoch) + .setIsr(List[Integer](0, 1, 2, brokerId).asJava) + .setPartitionEpoch(1) + .setReplicas(List[Integer](0, 1, 2, brokerId).asJava) + .setIsNew(false) + assertTrue(partition.makeFollower(partitionState, offsetCheckpoints, None)) val appendThread = new Thread { override def run(): Unit = { - val records = createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes), - new SimpleRecord("k2".getBytes, "v2".getBytes)), - baseOffset = 0) - partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false) + val records = createRecords( + List( + new SimpleRecord("k1".getBytes, "v1".getBytes), + new SimpleRecord("k2".getBytes, "v2".getBytes) + ), + baseOffset = 0, + partitionLeaderEpoch = prevLeaderEpoch + ) + partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false, prevLeaderEpoch) } } appendThread.start() TestUtils.waitUntilTrue(() => appendSemaphore.hasQueuedThreads, "follower log append is not called.") - val partitionState = new LeaderAndIsrRequest.PartitionState() + partitionState = new LeaderAndIsrRequest.PartitionState() .setControllerEpoch(0) .setLeader(2) - .setLeaderEpoch(1) + .setLeaderEpoch(prevLeaderEpoch + 1) .setIsr(List[Integer](0, 1, 2, brokerId).asJava) - .setPartitionEpoch(1) + .setPartitionEpoch(2) .setReplicas(List[Integer](0, 1, 2, brokerId).asJava) .setIsNew(false) assertTrue(partition.makeFollower(partitionState, offsetCheckpoints, None)) @@ -535,15 +550,22 @@ class PartitionTest extends AbstractPartitionTest { // Write to the future replica as if the log had been compacted, and do not roll the segment val buffer = ByteBuffer.allocate(1024) - val builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.CREATE_TIME, 0L, RecordBatch.NO_TIMESTAMP, 0) + val builder = MemoryRecords.builder( + buffer, + RecordBatch.CURRENT_MAGIC_VALUE, + Compression.NONE, + TimestampType.CREATE_TIME, + 0L, // baseOffset + RecordBatch.NO_TIMESTAMP, + 0 // partitionLeaderEpoch + ) builder.appendWithOffset(2L, new SimpleRecord("k1".getBytes, "v3".getBytes)) builder.appendWithOffset(5L, new SimpleRecord("k2".getBytes, "v6".getBytes)) builder.appendWithOffset(6L, new SimpleRecord("k3".getBytes, "v7".getBytes)) builder.appendWithOffset(7L, new SimpleRecord("k4".getBytes, "v8".getBytes)) val futureLog = partition.futureLocalLogOrException - futureLog.appendAsFollower(builder.build()) + futureLog.appendAsFollower(builder.build(), 0) assertTrue(partition.maybeReplaceCurrentWithFutureReplica()) } @@ -951,6 +973,18 @@ class PartitionTest extends AbstractPartitionTest { def testAppendRecordsAsFollowerBelowLogStartOffset(): Unit = { partition.createLogIfNotExists(isNew = false, isFutureReplica = false, offsetCheckpoints, None) val log = partition.localLogOrException + val epoch = 1 + + // Start off as follower + val partitionState = new LeaderAndIsrRequest.PartitionState() + .setControllerEpoch(0) + .setLeader(1) + .setLeaderEpoch(epoch) + .setIsr(List[Integer](0, 1, 2, brokerId).asJava) + .setPartitionEpoch(1) + .setReplicas(List[Integer](0, 1, 2, brokerId).asJava) + .setIsNew(false) + partition.makeFollower(partitionState, offsetCheckpoints, None) val initialLogStartOffset = 5L partition.truncateFullyAndStartAt(initialLogStartOffset, isFuture = false) @@ -960,9 +994,14 @@ class PartitionTest extends AbstractPartitionTest { s"Log start offset after truncate fully and start at $initialLogStartOffset:") // verify that we cannot append records that do not contain log start offset even if the log is empty - assertThrows(classOf[UnexpectedAppendOffsetException], () => + assertThrows( + classOf[UnexpectedAppendOffsetException], // append one record with offset = 3 - partition.appendRecordsToFollowerOrFutureReplica(createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 3L), isFuture = false) + () => partition.appendRecordsToFollowerOrFutureReplica( + createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 3L), + isFuture = false, + partitionLeaderEpoch = epoch + ) ) assertEquals(initialLogStartOffset, log.logEndOffset, s"Log end offset should not change after failure to append") @@ -974,12 +1013,16 @@ class PartitionTest extends AbstractPartitionTest { new SimpleRecord("k2".getBytes, "v2".getBytes), new SimpleRecord("k3".getBytes, "v3".getBytes)), baseOffset = newLogStartOffset) - partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false) + partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false, partitionLeaderEpoch = epoch) assertEquals(7L, log.logEndOffset, s"Log end offset after append of 3 records with base offset $newLogStartOffset:") assertEquals(newLogStartOffset, log.logStartOffset, s"Log start offset after append of 3 records with base offset $newLogStartOffset:") // and we can append more records after that - partition.appendRecordsToFollowerOrFutureReplica(createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 7L), isFuture = false) + partition.appendRecordsToFollowerOrFutureReplica( + createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 7L), + isFuture = false, + partitionLeaderEpoch = epoch + ) assertEquals(8L, log.logEndOffset, s"Log end offset after append of 1 record at offset 7:") assertEquals(newLogStartOffset, log.logStartOffset, s"Log start offset not expected to change:") @@ -987,11 +1030,18 @@ class PartitionTest extends AbstractPartitionTest { val records2 = createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes), new SimpleRecord("k2".getBytes, "v2".getBytes)), baseOffset = 3L) - assertThrows(classOf[UnexpectedAppendOffsetException], () => partition.appendRecordsToFollowerOrFutureReplica(records2, isFuture = false)) + assertThrows( + classOf[UnexpectedAppendOffsetException], + () => partition.appendRecordsToFollowerOrFutureReplica(records2, isFuture = false, partitionLeaderEpoch = epoch) + ) assertEquals(8L, log.logEndOffset, s"Log end offset should not change after failure to append") // we still can append to next offset - partition.appendRecordsToFollowerOrFutureReplica(createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 8L), isFuture = false) + partition.appendRecordsToFollowerOrFutureReplica( + createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 8L), + isFuture = false, + partitionLeaderEpoch = epoch + ) assertEquals(9L, log.logEndOffset, s"Log end offset after append of 1 record at offset 8:") assertEquals(newLogStartOffset, log.logStartOffset, s"Log start offset not expected to change:") } @@ -1074,9 +1124,13 @@ class PartitionTest extends AbstractPartitionTest { @Test def testAppendRecordsToFollowerWithNoReplicaThrowsException(): Unit = { - assertThrows(classOf[NotLeaderOrFollowerException], () => - partition.appendRecordsToFollowerOrFutureReplica( - createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 0L), isFuture = false) + assertThrows( + classOf[NotLeaderOrFollowerException], + () => partition.appendRecordsToFollowerOrFutureReplica( + createRecords(List(new SimpleRecord("k1".getBytes, "v1".getBytes)), baseOffset = 0L), + isFuture = false, + partitionLeaderEpoch = 0 + ) ) } @@ -3440,13 +3494,16 @@ class PartitionTest extends AbstractPartitionTest { partition.createLogIfNotExists(isNew = true, isFutureReplica = false, offsetCheckpoints, topicId = topicId) assertTrue(partition.log.isDefined) + val replicas = Seq(brokerId, brokerId + 1) + val isr = replicas + val epoch = 0 partition.makeLeader( new LeaderAndIsrRequest.PartitionState() .setControllerEpoch(0) .setLeader(brokerId) - .setLeaderEpoch(0) - .setIsr(List(brokerId, brokerId + 1).map(Int.box).asJava) - .setReplicas(List(brokerId, brokerId + 1).map(Int.box).asJava) + .setLeaderEpoch(epoch) + .setIsr(isr.map(Int.box).asJava) + .setReplicas(replicas.map(Int.box).asJava) .setPartitionEpoch(1) .setIsNew(true), offsetCheckpoints, @@ -3477,7 +3534,8 @@ class PartitionTest extends AbstractPartitionTest { partition.appendRecordsToFollowerOrFutureReplica( records = records, - isFuture = true + isFuture = true, + partitionLeaderEpoch = epoch ) listener.verify() @@ -3623,9 +3681,9 @@ class PartitionTest extends AbstractPartitionTest { _topicId = topicId, keepPartitionMetadataFile = true) { - override def appendAsFollower(records: MemoryRecords): LogAppendInfo = { + override def appendAsFollower(records: MemoryRecords, epoch: Int): LogAppendInfo = { appendSemaphore.acquire() - val appendInfo = super.appendAsFollower(records) + val appendInfo = super.appendAsFollower(records, epoch) appendInfo } } diff --git a/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionCoordinatorTest.scala b/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionCoordinatorTest.scala index e5b48d92466..3adec5c029a 100644 --- a/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionCoordinatorTest.scala +++ b/core/src/test/scala/unit/kafka/coordinator/transaction/TransactionCoordinatorTest.scala @@ -33,6 +33,7 @@ import org.junit.jupiter.params.provider.ValueSource import org.mockito.ArgumentMatchers.{any, anyInt} import org.mockito.Mockito._ import org.mockito.{ArgumentCaptor, ArgumentMatchers} +import org.mockito.Mockito.doAnswer import scala.collection.mutable import scala.jdk.CollectionConverters._ @@ -1165,6 +1166,140 @@ class TransactionCoordinatorTest { any()) } + @Test + def shouldNotCauseEpochOverflowWhenInitPidDuringOngoingTxnV2(): Unit = { + // When InitProducerId is called with an ongoing transaction at epoch 32766 (Short.MaxValue - 1), + // it should not cause an epoch overflow by incrementing twice. + // The only true increment happens in prepareAbortOrCommit + val txnMetadata = new TransactionMetadata(transactionalId, producerId, producerId, RecordBatch.NO_PRODUCER_ID, + (Short.MaxValue - 1).toShort, (Short.MaxValue - 2).toShort, txnTimeoutMs, Ongoing, partitions, time.milliseconds(), time.milliseconds(), TV_2) + + when(transactionManager.validateTransactionTimeoutMs(anyInt())) + .thenReturn(true) + when(transactionManager.getTransactionState(ArgumentMatchers.eq(transactionalId))) + .thenReturn(Right(Some(CoordinatorEpochAndTxnMetadata(coordinatorEpoch, txnMetadata)))) + when(transactionManager.transactionVersionLevel()).thenReturn(TV_2) + + // Capture the transition metadata to verify epoch increments + val capturedTxnTransitMetadata: ArgumentCaptor[TxnTransitMetadata] = ArgumentCaptor.forClass(classOf[TxnTransitMetadata]) + when(transactionManager.appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + capturedTxnTransitMetadata.capture(), + capturedErrorsCallback.capture(), + any(), + any()) + ).thenAnswer(invocation => { + val transitMetadata = invocation.getArgument[TxnTransitMetadata](2) + // Simulate the metadata update that would happen in the real appendTransactionToLog + txnMetadata.completeTransitionTo(transitMetadata) + capturedErrorsCallback.getValue.apply(Errors.NONE) + }) + + // Handle InitProducerId with ongoing transaction at epoch 32766 + coordinator.handleInitProducerId( + transactionalId, + txnTimeoutMs, + None, + initProducerIdMockCallback + ) + + // Verify that the epoch did not overflow (should be Short.MaxValue = 32767, not negative) + assertEquals(Short.MaxValue, txnMetadata.producerEpoch) + assertEquals(PrepareAbort, txnMetadata.state) + + verify(transactionManager).validateTransactionTimeoutMs(anyInt()) + verify(transactionManager, times(3)).getTransactionState(ArgumentMatchers.eq(transactionalId)) + verify(transactionManager).appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + any[TxnTransitMetadata], + any(), + any(), + any()) + } + + @Test + def shouldHandleTimeoutAtEpochOverflowBoundaryCorrectlyTV2(): Unit = { + // Test the scenario where we have an ongoing transaction at epoch 32766 (Short.MaxValue - 1) + // and the producer crashes/times out. This test verifies that the timeout handling + // correctly manages the epoch overflow scenario without causing failures. + + val epochAtMaxBoundary = (Short.MaxValue - 1).toShort // 32766 + val now = time.milliseconds() + + // Create transaction metadata at the epoch boundary that would cause overflow IFF double-incremented + val txnMetadata = new TransactionMetadata( + transactionalId = transactionalId, + producerId = producerId, + previousProducerId = RecordBatch.NO_PRODUCER_ID, + nextProducerId = RecordBatch.NO_PRODUCER_ID, + producerEpoch = epochAtMaxBoundary, + lastProducerEpoch = RecordBatch.NO_PRODUCER_EPOCH, + txnTimeoutMs = txnTimeoutMs, + state = Ongoing, + topicPartitions = partitions, + txnStartTimestamp = now, + txnLastUpdateTimestamp = now, + clientTransactionVersion = TV_2 + ) + assertTrue(txnMetadata.isProducerEpochExhausted) + + // Mock the transaction manager to return our test transaction as timed out + when(transactionManager.timedOutTransactions()) + .thenReturn(List(TransactionalIdAndProducerIdEpoch(transactionalId, producerId, epochAtMaxBoundary))) + when(transactionManager.getTransactionState(ArgumentMatchers.eq(transactionalId))) + .thenReturn(Right(Some(CoordinatorEpochAndTxnMetadata(coordinatorEpoch, txnMetadata)))) + when(transactionManager.transactionVersionLevel()).thenReturn(TV_2) + + // Mock the append operation to simulate successful write and update the metadata + when(transactionManager.appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + any[TxnTransitMetadata], + capturedErrorsCallback.capture(), + any(), + any()) + ).thenAnswer(invocation => { + val transitMetadata = invocation.getArgument[TxnTransitMetadata](2) + // Simulate the metadata update that would happen in the real appendTransactionToLog + txnMetadata.completeTransitionTo(transitMetadata) + capturedErrorsCallback.getValue.apply(Errors.NONE) + }) + + // Track the actual behavior + var callbackInvoked = false + var resultError: Errors = null + var resultProducerId: Long = -1 + var resultEpoch: Short = -1 + + def checkOnEndTransactionComplete(txnIdAndPidEpoch: TransactionalIdAndProducerIdEpoch) + (error: Errors, newProducerId: Long, newProducerEpoch: Short): Unit = { + callbackInvoked = true + resultError = error + resultProducerId = newProducerId + resultEpoch = newProducerEpoch + } + + // Execute the timeout abort process + coordinator.abortTimedOutTransactions(checkOnEndTransactionComplete) + + assertTrue(callbackInvoked, "Callback should have been invoked") + assertEquals(Errors.NONE, resultError, "Expected no errors in the callback") + assertEquals(producerId, resultProducerId, "Expected producer ID to match") + assertEquals(Short.MaxValue, resultEpoch, "Expected producer epoch to be Short.MaxValue (32767) single epoch bump") + + // Verify the transaction metadata was correctly updated to the final epoch + assertEquals(Short.MaxValue, txnMetadata.producerEpoch, + s"Expected transaction metadata producer epoch to be ${Short.MaxValue} " + + s"after timeout handling, but was ${txnMetadata.producerEpoch}" + ) + + // Verify the basic flow was attempted + verify(transactionManager).timedOutTransactions() + verify(transactionManager, atLeast(1)).getTransactionState(ArgumentMatchers.eq(transactionalId)) + } + @Test def testInitProducerIdWithNoLastProducerData(): Unit = { // If the metadata doesn't include the previous producer data (for example, if it was written to the log by a broker @@ -1671,4 +1806,147 @@ class TransactionCoordinatorTest { else producerEpoch } + + @Test + def testTV2AllowsEpochReBumpingAfterFailedWrite(): Unit = { + // Test the complete TV2 flow: failed write → epoch fence → abort → retry with epoch bump + // This demonstrates that TV2 allows epoch re-bumping after failed writes (unlike TV1) + val producerEpoch = 1.toShort + val txnMetadata = new TransactionMetadata(transactionalId, producerId, producerId, RecordBatch.NO_PRODUCER_ID, + producerEpoch, RecordBatch.NO_PRODUCER_EPOCH, txnTimeoutMs, Ongoing, partitions, time.milliseconds(), time.milliseconds(), TV_2) + + when(transactionManager.validateTransactionTimeoutMs(anyInt())) + .thenReturn(true) + when(transactionManager.getTransactionState(ArgumentMatchers.eq(transactionalId))) + .thenReturn(Right(Some(CoordinatorEpochAndTxnMetadata(coordinatorEpoch, txnMetadata)))) + when(transactionManager.transactionVersionLevel()).thenReturn(TV_2) + + // First attempt fails with COORDINATOR_NOT_AVAILABLE + when(transactionManager.appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + any(), + any(), + any(), + any() + )).thenAnswer(invocation => { + val callback = invocation.getArgument[Errors => Unit](3) + + // Simulate the real TransactionStateManager behavior: reset pendingState on failure + // since handleInitProducerId doesn't provide a custom retryOnError function + txnMetadata.pendingState = None + + // For TV2, hasFailedEpochFence is NOT set to true, allowing epoch bumps on retry + // The epoch remains at its original value (1) since completeTransitionTo was never called + + callback.apply(Errors.COORDINATOR_NOT_AVAILABLE) + }) + + coordinator.handleInitProducerId( + transactionalId, + txnTimeoutMs, + None, + initProducerIdMockCallback + ) + assertEquals(InitProducerIdResult(-1, -1, Errors.COORDINATOR_NOT_AVAILABLE), result) + + // After the first failed attempt, the state should be: + // - hasFailedEpochFence = false (NOT set for TV2) + // - pendingState = None (reset by TransactionStateManager) + // - producerEpoch = 1 (unchanged since completeTransitionTo was never called) + // - transaction still ONGOING + + // Second attempt: Should abort the ongoing transaction + reset(transactionManager) + when(transactionManager.validateTransactionTimeoutMs(anyInt())) + .thenReturn(true) + when(transactionManager.getTransactionState(ArgumentMatchers.eq(transactionalId))) + .thenReturn(Right(Some(CoordinatorEpochAndTxnMetadata(coordinatorEpoch, txnMetadata)))) + when(transactionManager.transactionVersionLevel()).thenReturn(TV_2) + + // Mock the appendTransactionToLog to succeed for the endTransaction call + when(transactionManager.appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + any(), + any(), + any(), + any() + )).thenAnswer(invocation => { + val newMetadata = invocation.getArgument[TxnTransitMetadata](2) + val callback = invocation.getArgument[Errors => Unit](3) + + // Complete the transition and call the callback with success + txnMetadata.completeTransitionTo(newMetadata) + callback.apply(Errors.NONE) + }) + + // Mock the transactionMarkerChannelManager to simulate the second write (PREPARE_ABORT -> COMPLETE_ABORT) + doAnswer(invocation => { + val newMetadata = invocation.getArgument[TxnTransitMetadata](3) + // Simulate the completion of transaction markers and the second write + // This would normally happen asynchronously after markers are sent + txnMetadata.completeTransitionTo(newMetadata) // This transitions to COMPLETE_ABORT + txnMetadata.pendingState = None + + null + }).when(transactionMarkerChannelManager).addTxnMarkersToSend( + ArgumentMatchers.eq(coordinatorEpoch), + ArgumentMatchers.eq(TransactionResult.ABORT), + ArgumentMatchers.eq(txnMetadata), + any() + ) + + coordinator.handleInitProducerId( + transactionalId, + txnTimeoutMs, + None, + initProducerIdMockCallback + ) + + // The second attempt should return CONCURRENT_TRANSACTIONS (this is intentional) + assertEquals(InitProducerIdResult(-1, -1, Errors.CONCURRENT_TRANSACTIONS), result) + + // The transactionMarkerChannelManager mock should have completed the transition to COMPLETE_ABORT + // Verify that hasFailedEpochFence was never set to true for TV2, allowing future epoch bumps + assertFalse(txnMetadata.hasFailedEpochFence) + + // Third attempt: Client retries after CONCURRENT_TRANSACTIONS + reset(transactionManager) + when(transactionManager.validateTransactionTimeoutMs(anyInt())) + .thenReturn(true) + when(transactionManager.getTransactionState(ArgumentMatchers.eq(transactionalId))) + .thenReturn(Right(Some(CoordinatorEpochAndTxnMetadata(coordinatorEpoch, txnMetadata)))) + when(transactionManager.transactionVersionLevel()).thenReturn(TV_2) + + when(transactionManager.appendTransactionToLog( + ArgumentMatchers.eq(transactionalId), + ArgumentMatchers.eq(coordinatorEpoch), + any(), + any(), + any(), + any() + )).thenAnswer(invocation => { + val newMetadata = invocation.getArgument[TxnTransitMetadata](2) + val callback = invocation.getArgument[Errors => Unit](3) + + // Complete the transition and call the callback with success + txnMetadata.completeTransitionTo(newMetadata) + callback.apply(Errors.NONE) + }) + + coordinator.handleInitProducerId( + transactionalId, + txnTimeoutMs, + None, + initProducerIdMockCallback + ) + + // The third attempt should succeed with epoch 3 (2 + 1) + // This demonstrates that TV2 allows epoch re-bumping after failed writes + assertEquals(InitProducerIdResult(producerId, 3.toShort, Errors.NONE), result) + + // Final verification that hasFailedEpochFence was never set to true for TV2 + assertFalse(txnMetadata.hasFailedEpochFence) + } } diff --git a/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala b/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala index 1be6cfd62d8..9ca39163194 100644 --- a/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogCleanerTest.scala @@ -18,6 +18,7 @@ package kafka.log import kafka.common._ +import kafka.log.LogCleaner.{MaxBufferUtilizationPercentMetricName, MaxCleanTimeMetricName, MaxCompactionDelayMetricsName} import kafka.server.KafkaConfig import kafka.utils.{CoreUtils, Logging, Pool, TestUtils} import org.apache.kafka.common.TopicPartition @@ -1458,7 +1459,7 @@ class LogCleanerTest extends Logging { log.appendAsLeader(TestUtils.singletonRecords(value = v, key = k), leaderEpoch = 0) //0 to Int.MaxValue is Int.MaxValue+1 message, -1 will be the last message of i-th segment val records = messageWithOffset(k, v, (i + 1L) * (Int.MaxValue + 1L) -1 ) - log.appendAsFollower(records) + log.appendAsFollower(records, Int.MaxValue) assertEquals(i + 1, log.numberOfSegments) } @@ -1512,7 +1513,7 @@ class LogCleanerTest extends Logging { // forward offset and append message to next segment at offset Int.MaxValue val records = messageWithOffset("hello".getBytes, "hello".getBytes, Int.MaxValue - 1) - log.appendAsFollower(records) + log.appendAsFollower(records, Int.MaxValue) log.appendAsLeader(TestUtils.singletonRecords(value = "hello".getBytes, key = "hello".getBytes), leaderEpoch = 0) assertEquals(Int.MaxValue, log.activeSegment.offsetIndex.lastOffset) @@ -1561,14 +1562,14 @@ class LogCleanerTest extends Logging { val log = makeLog(config = LogConfig.fromProps(logConfig.originals, logProps)) val record1 = messageWithOffset("hello".getBytes, "hello".getBytes, 0) - log.appendAsFollower(record1) + log.appendAsFollower(record1, Int.MaxValue) val record2 = messageWithOffset("hello".getBytes, "hello".getBytes, 1) - log.appendAsFollower(record2) + log.appendAsFollower(record2, Int.MaxValue) log.roll(Some(Int.MaxValue/2)) // starting a new log segment at offset Int.MaxValue/2 val record3 = messageWithOffset("hello".getBytes, "hello".getBytes, Int.MaxValue/2) - log.appendAsFollower(record3) + log.appendAsFollower(record3, Int.MaxValue) val record4 = messageWithOffset("hello".getBytes, "hello".getBytes, Int.MaxValue.toLong + 1) - log.appendAsFollower(record4) + log.appendAsFollower(record4, Int.MaxValue) assertTrue(log.logEndOffset - 1 - log.logStartOffset > Int.MaxValue, "Actual offset range should be > Int.MaxValue") assertTrue(log.logSegments.asScala.last.offsetIndex.lastOffset - log.logStartOffset <= Int.MaxValue, @@ -1882,8 +1883,8 @@ class LogCleanerTest extends Logging { val noDupSetOffset = 50 val noDupSet = noDupSetKeys zip (noDupSetOffset until noDupSetOffset + noDupSetKeys.size) - log.appendAsFollower(invalidCleanedMessage(dupSetOffset, dupSet, codec)) - log.appendAsFollower(invalidCleanedMessage(noDupSetOffset, noDupSet, codec)) + log.appendAsFollower(invalidCleanedMessage(dupSetOffset, dupSet, codec), Int.MaxValue) + log.appendAsFollower(invalidCleanedMessage(noDupSetOffset, noDupSet, codec), Int.MaxValue) log.roll() @@ -1969,7 +1970,7 @@ class LogCleanerTest extends Logging { log.roll(Some(11L)) // active segment record - log.appendAsFollower(messageWithOffset(1015, 1015, 11L)) + log.appendAsFollower(messageWithOffset(1015, 1015, 11L), Int.MaxValue) val (nextDirtyOffset, _) = cleaner.clean(LogToClean(log.topicPartition, log, 0L, log.activeSegment.baseOffset, needCompactionNow = true)) assertEquals(log.activeSegment.baseOffset, nextDirtyOffset, @@ -1988,7 +1989,7 @@ class LogCleanerTest extends Logging { log.roll(Some(30L)) // active segment record - log.appendAsFollower(messageWithOffset(1015, 1015, 30L)) + log.appendAsFollower(messageWithOffset(1015, 1015, 30L), Int.MaxValue) val (nextDirtyOffset, _) = cleaner.clean(LogToClean(log.topicPartition, log, 0L, log.activeSegment.baseOffset, needCompactionNow = true)) assertEquals(log.activeSegment.baseOffset, nextDirtyOffset, @@ -2048,47 +2049,164 @@ class LogCleanerTest extends Logging { } @Test - def testMaxOverCleanerThreads(): Unit = { - val logCleaner = new LogCleaner(new CleanerConfig(true), + def testMaxBufferUtilizationPercentMetric(): Unit = { + val logCleaner = new LogCleaner( + new CleanerConfig(true), logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()), logs = new Pool[TopicPartition, UnifiedLog](), logDirFailureChannel = new LogDirFailureChannel(1), - time = time) + time = time + ) + + def assertMaxBufferUtilizationPercent(expected: Int): Unit = { + val gauge = logCleaner.metricsGroup.newGauge(MaxBufferUtilizationPercentMetricName, + () => (logCleaner.maxOverCleanerThreads(_.lastStats.bufferUtilization) * 100).toInt) + assertEquals(expected, gauge.value()) + } + + try { + // No CleanerThreads + assertMaxBufferUtilizationPercent(0) + + val cleaners = logCleaner.cleaners + + val cleaner1 = new logCleaner.CleanerThread(1) + cleaner1.lastStats = new CleanerStats(time) + cleaner1.lastStats.bufferUtilization = 0.75 + cleaners += cleaner1 + + val cleaner2 = new logCleaner.CleanerThread(2) + cleaner2.lastStats = new CleanerStats(time) + cleaner2.lastStats.bufferUtilization = 0.85 + cleaners += cleaner2 + + val cleaner3 = new logCleaner.CleanerThread(3) + cleaner3.lastStats = new CleanerStats(time) + cleaner3.lastStats.bufferUtilization = 0.65 + cleaners += cleaner3 + + // expect the gauge value to reflect the maximum bufferUtilization + assertMaxBufferUtilizationPercent(85) + + // Update bufferUtilization and verify the gauge value updates + cleaner1.lastStats.bufferUtilization = 0.9 + assertMaxBufferUtilizationPercent(90) + + // All CleanerThreads have the same bufferUtilization + cleaners.foreach(_.lastStats.bufferUtilization = 0.5) + assertMaxBufferUtilizationPercent(50) + } finally { + logCleaner.shutdown() + } + } + + @Test + def testMaxCleanTimeMetric(): Unit = { + val logCleaner = new LogCleaner( + new CleanerConfig(true), + logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()), + logs = new Pool[TopicPartition, UnifiedLog](), + logDirFailureChannel = new LogDirFailureChannel(1), + time = time + ) + + def assertMaxCleanTime(expected: Int): Unit = { + val gauge = logCleaner.metricsGroup.newGauge(MaxCleanTimeMetricName, + () => logCleaner.maxOverCleanerThreads(_.lastStats.elapsedSecs).toInt) + assertEquals(expected, gauge.value()) + } - val cleaners = logCleaner.cleaners + try { + // No CleanerThreads + assertMaxCleanTime(0) - val cleaner1 = new logCleaner.CleanerThread(1) - cleaner1.lastStats = new CleanerStats(time) - cleaner1.lastStats.bufferUtilization = 0.75 - cleaners += cleaner1 + val cleaners = logCleaner.cleaners - val cleaner2 = new logCleaner.CleanerThread(2) - cleaner2.lastStats = new CleanerStats(time) - cleaner2.lastStats.bufferUtilization = 0.85 - cleaners += cleaner2 + val cleaner1 = new logCleaner.CleanerThread(1) + cleaner1.lastStats = new CleanerStats(time) + cleaner1.lastStats.endTime = cleaner1.lastStats.startTime + 1_000L + cleaners += cleaner1 - val cleaner3 = new logCleaner.CleanerThread(3) - cleaner3.lastStats = new CleanerStats(time) - cleaner3.lastStats.bufferUtilization = 0.65 - cleaners += cleaner3 + val cleaner2 = new logCleaner.CleanerThread(2) + cleaner2.lastStats = new CleanerStats(time) + cleaner2.lastStats.endTime = cleaner2.lastStats.startTime + 2_000L + cleaners += cleaner2 - assertEquals(0, logCleaner.maxOverCleanerThreads(_.lastStats.bufferUtilization)) + val cleaner3 = new logCleaner.CleanerThread(3) + cleaner3.lastStats = new CleanerStats(time) + cleaner3.lastStats.endTime = cleaner3.lastStats.startTime + 3_000L + cleaners += cleaner3 - cleaners.clear() + // expect the gauge value to reflect the maximum cleanTime + assertMaxCleanTime(3) - cleaner1.lastStats.bufferUtilization = 5d - cleaners += cleaner1 - cleaner2.lastStats.bufferUtilization = 6d - cleaners += cleaner2 - cleaner3.lastStats.bufferUtilization = 7d - cleaners += cleaner3 + // Update cleanTime and verify the gauge value updates + cleaner1.lastStats.endTime = cleaner1.lastStats.startTime + 4_000L + assertMaxCleanTime(4) - assertEquals(7, logCleaner.maxOverCleanerThreads(_.lastStats.bufferUtilization)) + // All CleanerThreads have the same cleanTime + cleaners.foreach(cleaner => cleaner.lastStats.endTime = cleaner.lastStats.startTime + 1_500L) + assertMaxCleanTime(1) + } finally { + logCleaner.shutdown() + } + } + + @Test + def testMaxCompactionDelayMetrics(): Unit = { + val logCleaner = new LogCleaner( + new CleanerConfig(true), + logDirs = Array(TestUtils.tempDir(), TestUtils.tempDir()), + logs = new Pool[TopicPartition, UnifiedLog](), + logDirFailureChannel = new LogDirFailureChannel(1), + time = time + ) + + def assertMaxCompactionDelay(expected: Int): Unit = { + val gauge = logCleaner.metricsGroup.newGauge(MaxCompactionDelayMetricsName, + () => (logCleaner.maxOverCleanerThreads(_.lastPreCleanStats.maxCompactionDelayMs.toDouble) / 1000).toInt) + assertEquals(expected, gauge.value()) + } + + try { + // No CleanerThreads + assertMaxCompactionDelay(0) + + val cleaners = logCleaner.cleaners + + val cleaner1 = new logCleaner.CleanerThread(1) + cleaner1.lastStats = new CleanerStats(time) + cleaner1.lastPreCleanStats.maxCompactionDelayMs = 1_000L + cleaners += cleaner1 + + val cleaner2 = new logCleaner.CleanerThread(2) + cleaner2.lastStats = new CleanerStats(time) + cleaner2.lastPreCleanStats.maxCompactionDelayMs = 2_000L + cleaners += cleaner2 + + val cleaner3 = new logCleaner.CleanerThread(3) + cleaner3.lastStats = new CleanerStats(time) + cleaner3.lastPreCleanStats.maxCompactionDelayMs = 3_000L + cleaners += cleaner3 + + // expect the gauge value to reflect the maximum CompactionDelay + assertMaxCompactionDelay(3) + + // Update CompactionDelay and verify the gauge value updates + cleaner1.lastPreCleanStats.maxCompactionDelayMs = 4_000L + assertMaxCompactionDelay(4) + + // All CleanerThreads have the same CompactionDelay + cleaners.foreach(_.lastPreCleanStats.maxCompactionDelayMs = 1_500L) + assertMaxCompactionDelay(1) + } finally { + logCleaner.shutdown() + } } private def writeToLog(log: UnifiedLog, keysAndValues: Iterable[(Int, Int)], offsetSeq: Iterable[Long]): Iterable[Long] = { for (((key, value), offset) <- keysAndValues.zip(offsetSeq)) - yield log.appendAsFollower(messageWithOffset(key, value, offset)).lastOffset + yield log.appendAsFollower(messageWithOffset(key, value, offset), Int.MaxValue).lastOffset } private def invalidCleanedMessage(initialOffset: Long, diff --git a/core/src/test/scala/unit/kafka/log/LogConcurrencyTest.scala b/core/src/test/scala/unit/kafka/log/LogConcurrencyTest.scala index d6d2b066506..b106e381656 100644 --- a/core/src/test/scala/unit/kafka/log/LogConcurrencyTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogConcurrencyTest.scala @@ -126,9 +126,14 @@ class LogConcurrencyTest { log.appendAsLeader(TestUtils.records(records), leaderEpoch) log.maybeIncrementHighWatermark(logEndOffsetMetadata) } else { - log.appendAsFollower(TestUtils.records(records, - baseOffset = logEndOffset, - partitionLeaderEpoch = leaderEpoch)) + log.appendAsFollower( + TestUtils.records( + records, + baseOffset = logEndOffset, + partitionLeaderEpoch = leaderEpoch + ), + Int.MaxValue + ) log.updateHighWatermark(logEndOffset) } diff --git a/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala b/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala index b327c3b39e7..b1bc8651587 100644 --- a/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala +++ b/core/src/test/scala/unit/kafka/log/LogLoaderTest.scala @@ -926,17 +926,17 @@ class LogLoaderTest { val set3 = MemoryRecords.withRecords(Integer.MAX_VALUE.toLong + 3, Compression.NONE, 0, new SimpleRecord("v4".getBytes(), "k4".getBytes())) val set4 = MemoryRecords.withRecords(Integer.MAX_VALUE.toLong + 4, Compression.NONE, 0, new SimpleRecord("v5".getBytes(), "k5".getBytes())) //Writes into an empty log with baseOffset 0 - log.appendAsFollower(set1) + log.appendAsFollower(set1, Int.MaxValue) assertEquals(0L, log.activeSegment.baseOffset) //This write will roll the segment, yielding a new segment with base offset = max(1, Integer.MAX_VALUE+2) = Integer.MAX_VALUE+2 - log.appendAsFollower(set2) + log.appendAsFollower(set2, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) assertTrue(LogFileUtils.producerSnapshotFile(logDir, Integer.MAX_VALUE.toLong + 2).exists) //This will go into the existing log - log.appendAsFollower(set3) + log.appendAsFollower(set3, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) //This will go into the existing log - log.appendAsFollower(set4) + log.appendAsFollower(set4, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) log.close() val indexFiles = logDir.listFiles.filter(file => file.getName.contains(".index")) @@ -965,17 +965,17 @@ class LogLoaderTest { new SimpleRecord("v7".getBytes(), "k7".getBytes()), new SimpleRecord("v8".getBytes(), "k8".getBytes())) //Writes into an empty log with baseOffset 0 - log.appendAsFollower(set1) + log.appendAsFollower(set1, Int.MaxValue) assertEquals(0L, log.activeSegment.baseOffset) //This write will roll the segment, yielding a new segment with base offset = max(1, Integer.MAX_VALUE+2) = Integer.MAX_VALUE+2 - log.appendAsFollower(set2) + log.appendAsFollower(set2, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) assertTrue(LogFileUtils.producerSnapshotFile(logDir, Integer.MAX_VALUE.toLong + 2).exists) //This will go into the existing log - log.appendAsFollower(set3) + log.appendAsFollower(set3, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) //This will go into the existing log - log.appendAsFollower(set4) + log.appendAsFollower(set4, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 2, log.activeSegment.baseOffset) log.close() val indexFiles = logDir.listFiles.filter(file => file.getName.contains(".index")) @@ -1005,18 +1005,18 @@ class LogLoaderTest { new SimpleRecord("v7".getBytes(), "k7".getBytes()), new SimpleRecord("v8".getBytes(), "k8".getBytes())) //Writes into an empty log with baseOffset 0 - log.appendAsFollower(set1) + log.appendAsFollower(set1, Int.MaxValue) assertEquals(0L, log.activeSegment.baseOffset) //This write will roll the segment, yielding a new segment with base offset = max(1, 3) = 3 - log.appendAsFollower(set2) + log.appendAsFollower(set2, Int.MaxValue) assertEquals(3, log.activeSegment.baseOffset) assertTrue(LogFileUtils.producerSnapshotFile(logDir, 3).exists) //This will also roll the segment, yielding a new segment with base offset = max(5, Integer.MAX_VALUE+4) = Integer.MAX_VALUE+4 - log.appendAsFollower(set3) + log.appendAsFollower(set3, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 4, log.activeSegment.baseOffset) assertTrue(LogFileUtils.producerSnapshotFile(logDir, Integer.MAX_VALUE.toLong + 4).exists) //This will go into the existing log - log.appendAsFollower(set4) + log.appendAsFollower(set4, Int.MaxValue) assertEquals(Integer.MAX_VALUE.toLong + 4, log.activeSegment.baseOffset) log.close() val indexFiles = logDir.listFiles.filter(file => file.getName.contains(".index")) @@ -1206,16 +1206,16 @@ class LogLoaderTest { val log = createLog(logDir, new LogConfig(new Properties)) val leaderEpochCache = log.leaderEpochCache val firstBatch = singletonRecordsWithLeaderEpoch(value = "random".getBytes, leaderEpoch = 1, offset = 0) - log.appendAsFollower(records = firstBatch) + log.appendAsFollower(records = firstBatch, Int.MaxValue) val secondBatch = singletonRecordsWithLeaderEpoch(value = "random".getBytes, leaderEpoch = 2, offset = 1) - log.appendAsFollower(records = secondBatch) + log.appendAsFollower(records = secondBatch, Int.MaxValue) val thirdBatch = singletonRecordsWithLeaderEpoch(value = "random".getBytes, leaderEpoch = 2, offset = 2) - log.appendAsFollower(records = thirdBatch) + log.appendAsFollower(records = thirdBatch, Int.MaxValue) val fourthBatch = singletonRecordsWithLeaderEpoch(value = "random".getBytes, leaderEpoch = 3, offset = 3) - log.appendAsFollower(records = fourthBatch) + log.appendAsFollower(records = fourthBatch, Int.MaxValue) assertEquals(java.util.Arrays.asList(new EpochEntry(1, 0), new EpochEntry(2, 1), new EpochEntry(3, 3)), leaderEpochCache.epochEntries) diff --git a/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala b/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala index bbcda01451f..2b9d2d04375 100755 --- a/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala +++ b/core/src/test/scala/unit/kafka/log/UnifiedLogTest.scala @@ -48,11 +48,16 @@ import org.apache.kafka.storage.log.metrics.{BrokerTopicMetrics, BrokerTopicStat import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.junit.jupiter.params.ParameterizedTest +import org.junit.jupiter.params.provider.ArgumentsSource import org.junit.jupiter.params.provider.{EnumSource, ValueSource} import org.mockito.ArgumentMatchers import org.mockito.ArgumentMatchers.{any, anyLong} import org.mockito.Mockito.{doAnswer, doThrow, spy} +import net.jqwik.api.AfterFailureMode +import net.jqwik.api.ForAll +import net.jqwik.api.Property + import java.io._ import java.nio.ByteBuffer import java.nio.file.Files @@ -304,7 +309,7 @@ class UnifiedLogTest { assertHighWatermark(3L) // Update high watermark as follower - log.appendAsFollower(records(3L)) + log.appendAsFollower(records(3L), leaderEpoch) log.updateHighWatermark(6L) assertHighWatermark(6L) @@ -589,6 +594,7 @@ class UnifiedLogTest { @Test def testRollSegmentThatAlreadyExists(): Unit = { val logConfig = LogTestUtils.createLogConfig(segmentMs = 1 * 60 * 60L) + val partitionLeaderEpoch = 0 // create a log val log = createLog(logDir, logConfig) @@ -601,16 +607,16 @@ class UnifiedLogTest { // should be able to append records to active segment val records = TestUtils.records( List(new SimpleRecord(mockTime.milliseconds, "k1".getBytes, "v1".getBytes)), - baseOffset = 0L, partitionLeaderEpoch = 0) - log.appendAsFollower(records) + baseOffset = 0L, partitionLeaderEpoch = partitionLeaderEpoch) + log.appendAsFollower(records, partitionLeaderEpoch) assertEquals(1, log.numberOfSegments, "Expect one segment.") assertEquals(0L, log.activeSegment.baseOffset) // make sure we can append more records val records2 = TestUtils.records( List(new SimpleRecord(mockTime.milliseconds + 10, "k2".getBytes, "v2".getBytes)), - baseOffset = 1L, partitionLeaderEpoch = 0) - log.appendAsFollower(records2) + baseOffset = 1L, partitionLeaderEpoch = partitionLeaderEpoch) + log.appendAsFollower(records2, partitionLeaderEpoch) assertEquals(2, log.logEndOffset, "Expect two records in the log") assertEquals(0, LogTestUtils.readLog(log, 0, 1).records.batches.iterator.next().lastOffset) @@ -625,8 +631,8 @@ class UnifiedLogTest { log.activeSegment.offsetIndex.resize(0) val records3 = TestUtils.records( List(new SimpleRecord(mockTime.milliseconds + 12, "k3".getBytes, "v3".getBytes)), - baseOffset = 2L, partitionLeaderEpoch = 0) - log.appendAsFollower(records3) + baseOffset = 2L, partitionLeaderEpoch = partitionLeaderEpoch) + log.appendAsFollower(records3, partitionLeaderEpoch) assertTrue(log.activeSegment.offsetIndex.maxEntries > 1) assertEquals(2, LogTestUtils.readLog(log, 2, 1).records.batches.iterator.next().lastOffset) assertEquals(2, log.numberOfSegments, "Expect two segments.") @@ -800,17 +806,25 @@ class UnifiedLogTest { val logConfig = LogTestUtils.createLogConfig(segmentBytes = 2048 * 5) val log = createLog(logDir, logConfig) val pid = 1L - val epoch = 0.toShort + val producerEpoch = 0.toShort + val partitionLeaderEpoch = 0 val seq = 0 val baseOffset = 23L // create a batch with a couple gaps to simulate compaction - val records = TestUtils.records(producerId = pid, producerEpoch = epoch, sequence = seq, baseOffset = baseOffset, records = List( - new SimpleRecord(mockTime.milliseconds(), "a".getBytes), - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes), - new SimpleRecord(mockTime.milliseconds(), "c".getBytes), - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "d".getBytes))) - records.batches.forEach(_.setPartitionLeaderEpoch(0)) + val records = TestUtils.records( + producerId = pid, + producerEpoch = producerEpoch, + sequence = seq, + baseOffset = baseOffset, + records = List( + new SimpleRecord(mockTime.milliseconds(), "a".getBytes), + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes), + new SimpleRecord(mockTime.milliseconds(), "c".getBytes), + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "d".getBytes) + ) + ) + records.batches.forEach(_.setPartitionLeaderEpoch(partitionLeaderEpoch)) val filtered = ByteBuffer.allocate(2048) records.filterTo(new RecordFilter(0, 0) { @@ -821,14 +835,18 @@ class UnifiedLogTest { filtered.flip() val filteredRecords = MemoryRecords.readableRecords(filtered) - log.appendAsFollower(filteredRecords) + log.appendAsFollower(filteredRecords, partitionLeaderEpoch) // append some more data and then truncate to force rebuilding of the PID map - val moreRecords = TestUtils.records(baseOffset = baseOffset + 4, records = List( - new SimpleRecord(mockTime.milliseconds(), "e".getBytes), - new SimpleRecord(mockTime.milliseconds(), "f".getBytes))) - moreRecords.batches.forEach(_.setPartitionLeaderEpoch(0)) - log.appendAsFollower(moreRecords) + val moreRecords = TestUtils.records( + baseOffset = baseOffset + 4, + records = List( + new SimpleRecord(mockTime.milliseconds(), "e".getBytes), + new SimpleRecord(mockTime.milliseconds(), "f".getBytes) + ) + ) + moreRecords.batches.forEach(_.setPartitionLeaderEpoch(partitionLeaderEpoch)) + log.appendAsFollower(moreRecords, partitionLeaderEpoch) log.truncateTo(baseOffset + 4) @@ -844,15 +862,23 @@ class UnifiedLogTest { val logConfig = LogTestUtils.createLogConfig(segmentBytes = 2048 * 5) val log = createLog(logDir, logConfig) val pid = 1L - val epoch = 0.toShort + val producerEpoch = 0.toShort + val partitionLeaderEpoch = 0 val seq = 0 val baseOffset = 23L // create an empty batch - val records = TestUtils.records(producerId = pid, producerEpoch = epoch, sequence = seq, baseOffset = baseOffset, records = List( - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "a".getBytes), - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes))) - records.batches.forEach(_.setPartitionLeaderEpoch(0)) + val records = TestUtils.records( + producerId = pid, + producerEpoch = producerEpoch, + sequence = seq, + baseOffset = baseOffset, + records = List( + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "a".getBytes), + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes) + ) + ) + records.batches.forEach(_.setPartitionLeaderEpoch(partitionLeaderEpoch)) val filtered = ByteBuffer.allocate(2048) records.filterTo(new RecordFilter(0, 0) { @@ -863,14 +889,18 @@ class UnifiedLogTest { filtered.flip() val filteredRecords = MemoryRecords.readableRecords(filtered) - log.appendAsFollower(filteredRecords) + log.appendAsFollower(filteredRecords, partitionLeaderEpoch) // append some more data and then truncate to force rebuilding of the PID map - val moreRecords = TestUtils.records(baseOffset = baseOffset + 2, records = List( - new SimpleRecord(mockTime.milliseconds(), "e".getBytes), - new SimpleRecord(mockTime.milliseconds(), "f".getBytes))) - moreRecords.batches.forEach(_.setPartitionLeaderEpoch(0)) - log.appendAsFollower(moreRecords) + val moreRecords = TestUtils.records( + baseOffset = baseOffset + 2, + records = List( + new SimpleRecord(mockTime.milliseconds(), "e".getBytes), + new SimpleRecord(mockTime.milliseconds(), "f".getBytes) + ) + ) + moreRecords.batches.forEach(_.setPartitionLeaderEpoch(partitionLeaderEpoch)) + log.appendAsFollower(moreRecords, partitionLeaderEpoch) log.truncateTo(baseOffset + 2) @@ -886,17 +916,25 @@ class UnifiedLogTest { val logConfig = LogTestUtils.createLogConfig(segmentBytes = 2048 * 5) val log = createLog(logDir, logConfig) val pid = 1L - val epoch = 0.toShort + val producerEpoch = 0.toShort + val partitionLeaderEpoch = 0 val seq = 0 val baseOffset = 23L // create a batch with a couple gaps to simulate compaction - val records = TestUtils.records(producerId = pid, producerEpoch = epoch, sequence = seq, baseOffset = baseOffset, records = List( - new SimpleRecord(mockTime.milliseconds(), "a".getBytes), - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes), - new SimpleRecord(mockTime.milliseconds(), "c".getBytes), - new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "d".getBytes))) - records.batches.forEach(_.setPartitionLeaderEpoch(0)) + val records = TestUtils.records( + producerId = pid, + producerEpoch = producerEpoch, + sequence = seq, + baseOffset = baseOffset, + records = List( + new SimpleRecord(mockTime.milliseconds(), "a".getBytes), + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "b".getBytes), + new SimpleRecord(mockTime.milliseconds(), "c".getBytes), + new SimpleRecord(mockTime.milliseconds(), "key".getBytes, "d".getBytes) + ) + ) + records.batches.forEach(_.setPartitionLeaderEpoch(partitionLeaderEpoch)) val filtered = ByteBuffer.allocate(2048) records.filterTo(new RecordFilter(0, 0) { @@ -907,7 +945,7 @@ class UnifiedLogTest { filtered.flip() val filteredRecords = MemoryRecords.readableRecords(filtered) - log.appendAsFollower(filteredRecords) + log.appendAsFollower(filteredRecords, partitionLeaderEpoch) val activeProducers = log.activeProducersWithLastSequence assertTrue(activeProducers.contains(pid)) @@ -1337,33 +1375,44 @@ class UnifiedLogTest { // create a log val log = createLog(logDir, new LogConfig(new Properties)) - val epoch: Short = 0 + val producerEpoch: Short = 0 + val partitionLeaderEpoch = 0 val buffer = ByteBuffer.allocate(512) - var builder = MemoryRecords.builder(buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 0L, mockTime.milliseconds(), 1L, epoch, 0, false, 0) + var builder = MemoryRecords.builder( + buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, + TimestampType.LOG_APPEND_TIME, 0L, mockTime.milliseconds(), 1L, producerEpoch, 0, false, + partitionLeaderEpoch + ) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() builder = MemoryRecords.builder(buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 1L, mockTime.milliseconds(), 2L, epoch, 0, false, 0) + TimestampType.LOG_APPEND_TIME, 1L, mockTime.milliseconds(), 2L, producerEpoch, 0, false, + partitionLeaderEpoch) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() - builder = MemoryRecords.builder(buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 2L, mockTime.milliseconds(), 3L, epoch, 0, false, 0) + builder = MemoryRecords.builder( + buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, + TimestampType.LOG_APPEND_TIME, 2L, mockTime.milliseconds(), 3L, producerEpoch, 0, false, + partitionLeaderEpoch + ) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() - builder = MemoryRecords.builder(buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 3L, mockTime.milliseconds(), 4L, epoch, 0, false, 0) + builder = MemoryRecords.builder( + buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, + TimestampType.LOG_APPEND_TIME, 3L, mockTime.milliseconds(), 4L, producerEpoch, 0, false, + partitionLeaderEpoch + ) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() buffer.flip() val memoryRecords = MemoryRecords.readableRecords(buffer) - log.appendAsFollower(memoryRecords) + log.appendAsFollower(memoryRecords, partitionLeaderEpoch) log.flush(false) val fetchedData = LogTestUtils.readLog(log, 0, Int.MaxValue) @@ -1382,7 +1431,7 @@ class UnifiedLogTest { def testDuplicateAppendToFollower(): Unit = { val logConfig = LogTestUtils.createLogConfig(segmentBytes = 1024 * 1024 * 5) val log = createLog(logDir, logConfig) - val epoch: Short = 0 + val producerEpoch: Short = 0 val pid = 1L val baseSequence = 0 val partitionLeaderEpoch = 0 @@ -1390,10 +1439,32 @@ class UnifiedLogTest { // this is a bit contrived. to trigger the duplicate case for a follower append, we have to append // a batch with matching sequence numbers, but valid increasing offsets assertEquals(0L, log.logEndOffset) - log.appendAsFollower(MemoryRecords.withIdempotentRecords(0L, Compression.NONE, pid, epoch, baseSequence, - partitionLeaderEpoch, new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))) - log.appendAsFollower(MemoryRecords.withIdempotentRecords(2L, Compression.NONE, pid, epoch, baseSequence, - partitionLeaderEpoch, new SimpleRecord("a".getBytes), new SimpleRecord("b".getBytes))) + log.appendAsFollower( + MemoryRecords.withIdempotentRecords( + 0L, + Compression.NONE, + pid, + producerEpoch, + baseSequence, + partitionLeaderEpoch, + new SimpleRecord("a".getBytes), + new SimpleRecord("b".getBytes) + ), + partitionLeaderEpoch + ) + log.appendAsFollower( + MemoryRecords.withIdempotentRecords( + 2L, + Compression.NONE, + pid, + producerEpoch, + baseSequence, + partitionLeaderEpoch, + new SimpleRecord("a".getBytes), + new SimpleRecord("b".getBytes) + ), + partitionLeaderEpoch + ) // Ensure that even the duplicate sequences are accepted on the follower. assertEquals(4L, log.logEndOffset) @@ -1406,48 +1477,49 @@ class UnifiedLogTest { val pid1 = 1L val pid2 = 2L - val epoch: Short = 0 + val producerEpoch: Short = 0 val buffer = ByteBuffer.allocate(512) // pid1 seq = 0 var builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 0L, mockTime.milliseconds(), pid1, epoch, 0) + TimestampType.LOG_APPEND_TIME, 0L, mockTime.milliseconds(), pid1, producerEpoch, 0) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() // pid2 seq = 0 builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 1L, mockTime.milliseconds(), pid2, epoch, 0) + TimestampType.LOG_APPEND_TIME, 1L, mockTime.milliseconds(), pid2, producerEpoch, 0) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() // pid1 seq = 1 builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 2L, mockTime.milliseconds(), pid1, epoch, 1) + TimestampType.LOG_APPEND_TIME, 2L, mockTime.milliseconds(), pid1, producerEpoch, 1) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() // pid2 seq = 1 builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 3L, mockTime.milliseconds(), pid2, epoch, 1) + TimestampType.LOG_APPEND_TIME, 3L, mockTime.milliseconds(), pid2, producerEpoch, 1) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() // // pid1 seq = 1 (duplicate) builder = MemoryRecords.builder(buffer, RecordBatch.CURRENT_MAGIC_VALUE, Compression.NONE, - TimestampType.LOG_APPEND_TIME, 4L, mockTime.milliseconds(), pid1, epoch, 1) + TimestampType.LOG_APPEND_TIME, 4L, mockTime.milliseconds(), pid1, producerEpoch, 1) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() buffer.flip() + val epoch = 0 val records = MemoryRecords.readableRecords(buffer) - records.batches.forEach(_.setPartitionLeaderEpoch(0)) + records.batches.forEach(_.setPartitionLeaderEpoch(epoch)) // Ensure that batches with duplicates are accepted on the follower. assertEquals(0L, log.logEndOffset) - log.appendAsFollower(records) + log.appendAsFollower(records, epoch) assertEquals(5L, log.logEndOffset) } @@ -1589,8 +1661,12 @@ class UnifiedLogTest { val records = messageIds.map(id => new SimpleRecord(id.toString.getBytes)) // now test the case that we give the offsets and use non-sequential offsets - for (i <- records.indices) - log.appendAsFollower(MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i))) + for (i <- records.indices) { + log.appendAsFollower( + MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i)), + Int.MaxValue + ) + } for (i <- 50 until messageIds.max) { val idx = messageIds.indexWhere(_ >= i) val read = LogTestUtils.readLog(log, i, 100).records.records.iterator.next() @@ -1637,8 +1713,12 @@ class UnifiedLogTest { val records = messageIds.map(id => new SimpleRecord(id.toString.getBytes)) // now test the case that we give the offsets and use non-sequential offsets - for (i <- records.indices) - log.appendAsFollower(MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i))) + for (i <- records.indices) { + log.appendAsFollower( + MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i)), + Int.MaxValue + ) + } for (i <- 50 until messageIds.max) { val idx = messageIds.indexWhere(_ >= i) @@ -1662,8 +1742,12 @@ class UnifiedLogTest { val records = messageIds.map(id => new SimpleRecord(id.toString.getBytes)) // now test the case that we give the offsets and use non-sequential offsets - for (i <- records.indices) - log.appendAsFollower(MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i))) + for (i <- records.indices) { + log.appendAsFollower( + MemoryRecords.withRecords(messageIds(i), Compression.NONE, 0, records(i)), + Int.MaxValue + ) + } for (i <- 50 until messageIds.max) { assertEquals(MemoryRecords.EMPTY, LogTestUtils.readLog(log, i, maxLength = 0, minOneMessage = false).records) @@ -1911,9 +1995,94 @@ class UnifiedLogTest { val log = createLog(logDir, LogTestUtils.createLogConfig(maxMessageBytes = second.sizeInBytes - 1)) - log.appendAsFollower(first) + log.appendAsFollower(first, Int.MaxValue) // the second record is larger then limit but appendAsFollower does not validate the size. - log.appendAsFollower(second) + log.appendAsFollower(second, Int.MaxValue) + } + + @ParameterizedTest + @ArgumentsSource(classOf[InvalidMemoryRecordsProvider]) + def testInvalidMemoryRecords(records: MemoryRecords, expectedException: Optional[Class[Exception]]): Unit = { + val logConfig = LogTestUtils.createLogConfig() + val log = createLog(logDir, logConfig) + val previousEndOffset = log.logEndOffsetMetadata.messageOffset + + if (expectedException.isPresent()) { + assertThrows( + expectedException.get(), + () => log.appendAsFollower(records, Int.MaxValue) + ) + } else { + log.appendAsFollower(records, Int.MaxValue) + } + + assertEquals(previousEndOffset, log.logEndOffsetMetadata.messageOffset) + } + + @Property(tries = 100, afterFailure = AfterFailureMode.SAMPLE_ONLY) + def testRandomRecords( + @ForAll(supplier = classOf[ArbitraryMemoryRecords]) records: MemoryRecords + ): Unit = { + val tempDir = TestUtils.tempDir() + val logDir = TestUtils.randomPartitionLogDir(tempDir) + try { + val logConfig = LogTestUtils.createLogConfig() + val log = createLog(logDir, logConfig) + val previousEndOffset = log.logEndOffsetMetadata.messageOffset + + // Depending on the corruption, unified log sometimes throws and sometimes returns an + // empty set of batches + assertThrows( + classOf[CorruptRecordException], + () => { + val info = log.appendAsFollower(records, Int.MaxValue) + if (info.firstOffset == UnifiedLog.UnknownOffset) { + throw new CorruptRecordException("Unknown offset is test") + } + } + ) + + assertEquals(previousEndOffset, log.logEndOffsetMetadata.messageOffset) + } finally { + Utils.delete(tempDir) + } + } + + @Test + def testInvalidLeaderEpoch(): Unit = { + val logConfig = LogTestUtils.createLogConfig() + val log = createLog(logDir, logConfig) + val previousEndOffset = log.logEndOffsetMetadata.messageOffset + val epoch = log.latestEpoch.getOrElse(0) + 1 + val numberOfRecords = 10 + + val batchWithValidEpoch = MemoryRecords.withRecords( + previousEndOffset, + Compression.NONE, + epoch, + (0 until numberOfRecords).map(number => new SimpleRecord(number.toString.getBytes)): _* + ) + + val batchWithInvalidEpoch = MemoryRecords.withRecords( + previousEndOffset + numberOfRecords, + Compression.NONE, + epoch + 1, + (0 until numberOfRecords).map(number => new SimpleRecord(number.toString.getBytes)): _* + ) + + val buffer = ByteBuffer.allocate(batchWithValidEpoch.sizeInBytes() + batchWithInvalidEpoch.sizeInBytes()) + buffer.put(batchWithValidEpoch.buffer()) + buffer.put(batchWithInvalidEpoch.buffer()) + buffer.flip() + + val records = MemoryRecords.readableRecords(buffer) + + log.appendAsFollower(records, epoch) + + // Check that only the first batch was appended + assertEquals(previousEndOffset + numberOfRecords, log.logEndOffsetMetadata.messageOffset) + // Check that the last fetched epoch matches the first batch + assertEquals(epoch, log.latestEpoch.get) } @Test @@ -2014,7 +2183,7 @@ class UnifiedLogTest { val messages = (0 until numMessages).map { i => MemoryRecords.withRecords(100 + i, Compression.NONE, 0, new SimpleRecord(mockTime.milliseconds + i, i.toString.getBytes())) } - messages.foreach(log.appendAsFollower) + messages.foreach(message => log.appendAsFollower(message, Int.MaxValue)) val timeIndexEntries = log.logSegments.asScala.foldLeft(0) { (entries, segment) => entries + segment.timeIndex.entries } assertEquals(numMessages - 1, timeIndexEntries, s"There should be ${numMessages - 1} time index entries") assertEquals(mockTime.milliseconds + numMessages - 1, log.activeSegment.timeIndex.lastEntry.timestamp, @@ -2158,7 +2327,7 @@ class UnifiedLogTest { // The cache can be updated directly after a leader change. // The new latest offset should reflect the updated epoch. log.assignEpochStartOffset(2, 2L) - + assertEquals(new OffsetResultHolder(new TimestampAndOffset(ListOffsetsResponse.UNKNOWN_TIMESTAMP, 2L, Optional.of(2))), log.fetchOffsetByTimestamp(ListOffsetsRequest.LATEST_TIMESTAMP, Some(remoteLogManager))) } @@ -2426,20 +2595,22 @@ class UnifiedLogTest { def testAppendWithOutOfOrderOffsetsThrowsException(): Unit = { val log = createLog(logDir, new LogConfig(new Properties)) + val epoch = 0 val appendOffsets = Seq(0L, 1L, 3L, 2L, 4L) val buffer = ByteBuffer.allocate(512) for (offset <- appendOffsets) { val builder = MemoryRecords.builder(buffer, RecordBatch.MAGIC_VALUE_V2, Compression.NONE, TimestampType.LOG_APPEND_TIME, offset, mockTime.milliseconds(), - 1L, 0, 0, false, 0) + 1L, 0, 0, false, epoch) builder.append(new SimpleRecord("key".getBytes, "value".getBytes)) builder.close() } buffer.flip() val memoryRecords = MemoryRecords.readableRecords(buffer) - assertThrows(classOf[OffsetsOutOfOrderException], () => - log.appendAsFollower(memoryRecords) + assertThrows( + classOf[OffsetsOutOfOrderException], + () => log.appendAsFollower(memoryRecords, epoch) ) } @@ -2454,9 +2625,11 @@ class UnifiedLogTest { for (magic <- magicVals; compressionType <- compressionTypes) { val compression = Compression.of(compressionType).build() val invalidRecord = MemoryRecords.withRecords(magic, compression, new SimpleRecord(1.toString.getBytes)) - assertThrows(classOf[UnexpectedAppendOffsetException], - () => log.appendAsFollower(invalidRecord), - () => s"Magic=$magic, compressionType=$compressionType") + assertThrows( + classOf[UnexpectedAppendOffsetException], + () => log.appendAsFollower(invalidRecord, Int.MaxValue), + () => s"Magic=$magic, compressionType=$compressionType" + ) } } @@ -2477,7 +2650,10 @@ class UnifiedLogTest { magicValue = magic, codec = Compression.of(compressionType).build(), baseOffset = firstOffset) - val exception = assertThrows(classOf[UnexpectedAppendOffsetException], () => log.appendAsFollower(records = batch)) + val exception = assertThrows( + classOf[UnexpectedAppendOffsetException], + () => log.appendAsFollower(records = batch, Int.MaxValue) + ) assertEquals(firstOffset, exception.firstOffset, s"Magic=$magic, compressionType=$compressionType, UnexpectedAppendOffsetException#firstOffset") assertEquals(firstOffset + 2, exception.lastOffset, s"Magic=$magic, compressionType=$compressionType, UnexpectedAppendOffsetException#lastOffset") } @@ -2576,9 +2752,16 @@ class UnifiedLogTest { log.appendAsLeader(TestUtils.records(List(new SimpleRecord("foo".getBytes()))), leaderEpoch = 5) assertEquals(OptionalInt.of(5), log.leaderEpochCache.latestEpoch) - log.appendAsFollower(TestUtils.records(List(new SimpleRecord("foo".getBytes())), - baseOffset = 1L, - magicValue = RecordVersion.V1.value)) + log.appendAsFollower( + TestUtils.records( + List( + new SimpleRecord("foo".getBytes()) + ), + baseOffset = 1L, + magicValue = RecordVersion.V1.value + ), + 5 + ) assertEquals(OptionalInt.empty, log.leaderEpochCache.latestEpoch) } @@ -2934,7 +3117,7 @@ class UnifiedLogTest { //When appending as follower (assignOffsets = false) for (i <- records.indices) - log.appendAsFollower(recordsForEpoch(i)) + log.appendAsFollower(recordsForEpoch(i), i) assertEquals(Some(42), log.latestEpoch) } @@ -3002,7 +3185,7 @@ class UnifiedLogTest { def append(epoch: Int, startOffset: Long, count: Int): Unit = { for (i <- 0 until count) - log.appendAsFollower(createRecords(startOffset + i, epoch)) + log.appendAsFollower(createRecords(startOffset + i, epoch), epoch) } //Given 2 segments, 10 messages per segment @@ -3236,7 +3419,7 @@ class UnifiedLogTest { buffer.flip() - appendAsFollower(log, MemoryRecords.readableRecords(buffer)) + appendAsFollower(log, MemoryRecords.readableRecords(buffer), epoch) val abortedTransactions = LogTestUtils.allAbortedTransactions(log) val expectedTransactions = List( @@ -3320,7 +3503,7 @@ class UnifiedLogTest { appendEndTxnMarkerToBuffer(buffer, pid, epoch, 10L, ControlRecordType.COMMIT, leaderEpoch = 1) buffer.flip() - log.appendAsFollower(MemoryRecords.readableRecords(buffer)) + log.appendAsFollower(MemoryRecords.readableRecords(buffer), epoch) LogTestUtils.appendEndTxnMarkerAsLeader(log, pid, epoch, ControlRecordType.ABORT, mockTime.milliseconds(), coordinatorEpoch = 2, leaderEpoch = 1) LogTestUtils.appendEndTxnMarkerAsLeader(log, pid, epoch, ControlRecordType.ABORT, mockTime.milliseconds(), coordinatorEpoch = 2, leaderEpoch = 1) @@ -3441,10 +3624,16 @@ class UnifiedLogTest { val log = createLog(logDir, logConfig) // append a few records - appendAsFollower(log, MemoryRecords.withRecords(Compression.NONE, - new SimpleRecord("a".getBytes), - new SimpleRecord("b".getBytes), - new SimpleRecord("c".getBytes)), 5) + appendAsFollower( + log, + MemoryRecords.withRecords( + Compression.NONE, + new SimpleRecord("a".getBytes), + new SimpleRecord("b".getBytes), + new SimpleRecord("c".getBytes) + ), + 5 + ) log.updateHighWatermark(3L) @@ -4455,8 +4644,8 @@ class UnifiedLogTest { segments.add(seg2) assertEquals(Seq(Long.MaxValue, Long.MaxValue), log.getFirstBatchTimestampForSegments(segments).asScala.toSeq) - seg1.append(1, 1000L, 1, MemoryRecords.withRecords(1, Compression.NONE, new SimpleRecord("one".getBytes))) - seg2.append(2, 2000L, 1, MemoryRecords.withRecords(2, Compression.NONE, new SimpleRecord("two".getBytes))) + seg1.append(1, MemoryRecords.withRecords(1, Compression.NONE, new SimpleRecord(1000L, "one".getBytes))) + seg2.append(2, MemoryRecords.withRecords(2, Compression.NONE, new SimpleRecord(2000L, "two".getBytes))) assertEquals(Seq(1000L, 2000L), log.getFirstBatchTimestampForSegments(segments).asScala.toSeq) seg1.close() @@ -4508,9 +4697,9 @@ class UnifiedLogTest { builder.close() } - private def appendAsFollower(log: UnifiedLog, records: MemoryRecords, leaderEpoch: Int = 0): Unit = { + private def appendAsFollower(log: UnifiedLog, records: MemoryRecords, leaderEpoch: Int): Unit = { records.batches.forEach(_.setPartitionLeaderEpoch(leaderEpoch)) - log.appendAsFollower(records) + log.appendAsFollower(records, leaderEpoch) } private def createLog(dir: File, diff --git a/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala b/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala index 1ab0f0ae8e8..ecea412e989 100644 --- a/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala +++ b/core/src/test/scala/unit/kafka/network/RequestChannelTest.scala @@ -131,11 +131,21 @@ class RequestChannelTest { op: OpType, entries: Map[String, String], expectedValues: Map[String, String]): Unit = { - val alterConfigs = request(incrementalAlterConfigs(resource, entries, op)) - val loggableAlterConfigs = alterConfigs.loggableRequest.asInstanceOf[IncrementalAlterConfigsRequest] + val alterConfigs = incrementalAlterConfigs(resource, entries, op) + val alterConfigsString = alterConfigs.toString + entries.foreach { entry => + if (!alterConfigsString.contains(entry._1)) { + fail("Config names should be in the request string") + } + if (entry._2 != null && alterConfigsString.contains(entry._2)) { + fail("Config values should not be in the request string") + } + } + val req = request(alterConfigs) + val loggableAlterConfigs = req.loggableRequest.asInstanceOf[IncrementalAlterConfigsRequest] val loggedConfig = loggableAlterConfigs.data.resources.find(resource.`type`.id, resource.name).configs assertEquals(expectedValues, toMap(loggedConfig)) - val alterConfigsDesc = RequestConvertToJson.requestDesc(alterConfigs.header, alterConfigs.requestLog.toJava, alterConfigs.isForwarded).toString + val alterConfigsDesc = RequestConvertToJson.requestDesc(req.header, req.requestLog.toJava, req.isForwarded).toString assertFalse(alterConfigsDesc.contains(sensitiveValue), s"Sensitive config logged $alterConfigsDesc") } diff --git a/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala b/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala index 7528eefc420..d1a05e7d915 100644 --- a/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala +++ b/core/src/test/scala/unit/kafka/server/AbstractFetcherManagerTest.scala @@ -328,9 +328,12 @@ class AbstractFetcherManagerTest { fetchBackOffMs = 0, brokerTopicStats = new BrokerTopicStats) { - override protected def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = { - None - } + override protected def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = None override protected def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = {} diff --git a/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala index 5f01458ffa7..7a555b400af 100644 --- a/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala +++ b/core/src/test/scala/unit/kafka/server/AbstractFetcherThreadTest.scala @@ -32,6 +32,7 @@ import org.junit.jupiter.api.Assumptions.assumeTrue import org.junit.jupiter.api.{BeforeEach, Test} import kafka.server.FetcherThreadTestUtils.{initialFetchState, mkBatch} +import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.ArrayBuffer import scala.collection.{Map, Set} @@ -631,6 +632,7 @@ class AbstractFetcherThreadTest { @Test def testFollowerFetchOutOfRangeLow(): Unit = { + val leaderEpoch = 4 val partition = new TopicPartition("topic", 0) val mockLeaderEndpoint = new MockLeaderEndPoint(truncateOnFetch = truncateOnFetch, version = version) val mockTierStateMachine = new MockTierStateMachine(mockLeaderEndpoint) @@ -640,14 +642,19 @@ class AbstractFetcherThreadTest { val replicaLog = Seq( mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes))) - val replicaState = PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L) + val replicaState = PartitionState(replicaLog, leaderEpoch = leaderEpoch, highWatermark = 0L) fetcher.setReplicaState(partition, replicaState) - fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 0))) + fetcher.addPartitions( + Map( + partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = leaderEpoch) + ) + ) val leaderLog = Seq( - mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes))) + mkBatch(baseOffset = 2, leaderEpoch = leaderEpoch, new SimpleRecord("c".getBytes)) + ) - val leaderState = PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L) + val leaderState = PartitionState(leaderLog, leaderEpoch = leaderEpoch, highWatermark = 2L) fetcher.mockLeader.setLeaderState(partition, leaderState) fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState) @@ -674,6 +681,7 @@ class AbstractFetcherThreadTest { @Test def testRetryAfterUnknownLeaderEpochInLatestOffsetFetch(): Unit = { + val leaderEpoch = 4 val partition = new TopicPartition("topic", 0) val mockLeaderEndPoint = new MockLeaderEndPoint(truncateOnFetch = truncateOnFetch, version = version) { val tries = new AtomicInteger(0) @@ -688,16 +696,18 @@ class AbstractFetcherThreadTest { // The follower begins from an offset which is behind the leader's log start offset val replicaLog = Seq( - mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes))) + mkBatch(baseOffset = 0, leaderEpoch = 0, new SimpleRecord("a".getBytes)) + ) - val replicaState = PartitionState(replicaLog, leaderEpoch = 0, highWatermark = 0L) + val replicaState = PartitionState(replicaLog, leaderEpoch = leaderEpoch, highWatermark = 0L) fetcher.setReplicaState(partition, replicaState) - fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = 0))) + fetcher.addPartitions(Map(partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = leaderEpoch))) val leaderLog = Seq( - mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes))) + mkBatch(baseOffset = 2, leaderEpoch = 4, new SimpleRecord("c".getBytes)) + ) - val leaderState = PartitionState(leaderLog, leaderEpoch = 0, highWatermark = 2L) + val leaderState = PartitionState(leaderLog, leaderEpoch = leaderEpoch, highWatermark = 2L) fetcher.mockLeader.setLeaderState(partition, leaderState) fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState) @@ -715,6 +725,46 @@ class AbstractFetcherThreadTest { assertEquals(leaderState.highWatermark, replicaState.highWatermark) } + @Test + def testReplicateBatchesUpToLeaderEpoch(): Unit = { + val leaderEpoch = 4 + val partition = new TopicPartition("topic", 0) + val mockLeaderEndpoint = new MockLeaderEndPoint(version = version) + val mockTierStateMachine = new MockTierStateMachine(mockLeaderEndpoint) + val fetcher = new MockFetcherThread(mockLeaderEndpoint, mockTierStateMachine, failedPartitions = failedPartitions) + + val replicaState = PartitionState(Seq(), leaderEpoch = leaderEpoch, highWatermark = 0L) + fetcher.setReplicaState(partition, replicaState) + fetcher.addPartitions( + Map( + partition -> initialFetchState(topicIds.get(partition.topic), 3L, leaderEpoch = leaderEpoch) + ) + ) + + val leaderLog = Seq( + mkBatch(baseOffset = 0, leaderEpoch = leaderEpoch - 1, new SimpleRecord("c".getBytes)), + mkBatch(baseOffset = 1, leaderEpoch = leaderEpoch, new SimpleRecord("d".getBytes)), + mkBatch(baseOffset = 2, leaderEpoch = leaderEpoch + 1, new SimpleRecord("e".getBytes)) + ) + + val leaderState = PartitionState(leaderLog, leaderEpoch = leaderEpoch, highWatermark = 0L) + fetcher.mockLeader.setLeaderState(partition, leaderState) + fetcher.mockLeader.setReplicaPartitionStateCallback(fetcher.replicaPartitionState) + + assertEquals(Option(Fetching), fetcher.fetchState(partition).map(_.state)) + assertEquals(0, replicaState.logStartOffset) + assertEquals(List(), replicaState.log.toList) + + TestUtils.waitUntilTrue(() => { + fetcher.doWork() + fetcher.replicaPartitionState(partition).log == fetcher.mockLeader.leaderPartitionState(partition).log.dropRight(1) + }, "Failed to reconcile leader and follower logs up to the leader epoch") + + assertEquals(leaderState.logStartOffset, replicaState.logStartOffset) + assertEquals(leaderState.logEndOffset - 1, replicaState.logEndOffset) + assertEquals(leaderState.highWatermark, replicaState.highWatermark) + } + @Test def testCorruptMessage(): Unit = { val partition = new TopicPartition("topic", 0) @@ -904,11 +954,16 @@ class AbstractFetcherThreadTest { val mockLeaderEndpoint = new MockLeaderEndPoint(truncateOnFetch = truncateOnFetch, version = version) val mockTierStateMachine = new MockTierStateMachine(mockLeaderEndpoint) val fetcherForAppend = new MockFetcherThread(mockLeaderEndpoint, mockTierStateMachine, failedPartitions = failedPartitions) { - override def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = { if (topicPartition == partition1) { throw new KafkaException() } else { - super.processPartitionData(topicPartition, fetchOffset, partitionData) + super.processPartitionData(topicPartition, fetchOffset, partitionLeaderEpoch, partitionData) } } } @@ -1013,9 +1068,14 @@ class AbstractFetcherThreadTest { val mockLeaderEndpoint = new MockLeaderEndPoint(truncateOnFetch = truncateOnFetch, version = version) val mockTierStateMachine = new MockTierStateMachine(mockLeaderEndpoint) val fetcher = new MockFetcherThread(mockLeaderEndpoint, mockTierStateMachine) { - override def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = { processPartitionDataCalls += 1 - super.processPartitionData(topicPartition, fetchOffset, partitionData) + super.processPartitionData(topicPartition, fetchOffset, partitionLeaderEpoch, partitionData) } override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = { @@ -1103,4 +1163,28 @@ class AbstractFetcherThreadTest { assertTrue(fetcher.fetchState(unknownPartition).isEmpty) } + @Test + def testIgnoreFetchResponseWhenLeaderEpochChanged(): Unit = { + val newEpoch = 1 + val initEpoch = 0 + + val partition = new TopicPartition("topic", 0) + val mockLeaderEndpoint = new MockLeaderEndPoint(version = version) + val mockTierStateMachine = new MockTierStateMachine(mockLeaderEndpoint) + val fetcher = new MockFetcherThread(mockLeaderEndpoint, mockTierStateMachine) + val replicaState = PartitionState(leaderEpoch = newEpoch) + fetcher.setReplicaState(partition, replicaState) + val initFetchState = initialFetchState(topicIds.get(partition.topic), 0L, leaderEpoch = newEpoch) + fetcher.addPartitions(Map(partition -> initFetchState)) + + val batch = mkBatch(baseOffset = 0L, leaderEpoch = initEpoch, new SimpleRecord("a".getBytes)) + val leaderState = PartitionState(Seq(batch), leaderEpoch = initEpoch, highWatermark = 1L) + fetcher.mockLeader.setLeaderState(partition, leaderState) + + val partitionData = Map(partition -> new FetchRequest.PartitionData(Uuid.randomUuid(), 0, 0, 1048576, Optional.of(initEpoch), Optional.of(initEpoch))).asJava + val fetchRequestOpt = FetchRequest.Builder.forReplica(0, 0, initEpoch, 0, Int.MaxValue, partitionData) + + fetcher.processFetchRequest(partitionData, fetchRequestOpt) + assertEquals(0, replicaState.logEndOffset, "FetchResponse should be ignored when leader epoch does not match") + } } diff --git a/core/src/test/scala/unit/kafka/server/AlterUserScramCredentialsRequestTest.scala b/core/src/test/scala/unit/kafka/server/AlterUserScramCredentialsRequestTest.scala index ced78873510..4ebe65ec9a9 100644 --- a/core/src/test/scala/unit/kafka/server/AlterUserScramCredentialsRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/AlterUserScramCredentialsRequestTest.scala @@ -244,6 +244,9 @@ class AlterUserScramCredentialsRequestTest extends BaseRequestTest { // create a bunch of credentials val request1_0 = new AlterUserScramCredentialsRequest.Builder( new AlterUserScramCredentialsRequestData() + .setDeletions(util.Arrays.asList( + new AlterUserScramCredentialsRequestData.ScramCredentialDeletion() + .setName(user2).setMechanism(ScramMechanism.SCRAM_SHA_256.`type`))) .setUpsertions(util.Arrays.asList( new AlterUserScramCredentialsRequestData.ScramCredentialUpsertion() .setName(user1).setMechanism(ScramMechanism.SCRAM_SHA_256.`type`) @@ -251,10 +254,15 @@ class AlterUserScramCredentialsRequestTest extends BaseRequestTest { .setSalt(saltBytes) .setSaltedPassword(saltedPasswordBytes), ))).build() + assertEquals("AlterUserScramCredentialsRequestData(" + + "deletions=[ScramCredentialDeletion(name='" + user2 + "', mechanism=" + ScramMechanism.SCRAM_SHA_256.`type` + ")], " + + "upsertions=[ScramCredentialUpsertion(name='" + user1 + "', mechanism=" + ScramMechanism.SCRAM_SHA_256.`type` + + ", iterations=4096, salt=[], saltedPassword=[])])", request1_0.toString) val results1_0 = sendAlterUserScramCredentialsRequest(request1_0).data.results - assertEquals(1, results1_0.size) - checkNoErrorsAlteringCredentials(results1_0) + assertEquals(2, results1_0.size) + assertEquals(1, results1_0.asScala.count(_.errorCode == Errors.RESOURCE_NOT_FOUND.code())) checkUserAppearsInAlterResults(results1_0, user1) + checkUserAppearsInAlterResults(results1_0, user2) // When creating credentials, do not update the same user more than once per request val request1_1 = new AlterUserScramCredentialsRequest.Builder( @@ -276,6 +284,8 @@ class AlterUserScramCredentialsRequestTest extends BaseRequestTest { .setSalt(saltBytes) .setSaltedPassword(saltedPasswordBytes), ))).build() + assertFalse(request1_1.toString.contains(saltBytes)) + assertFalse(request1_1.toString.contains(saltedPasswordBytes)) val results1_1 = sendAlterUserScramCredentialsRequest(request1_1).data.results assertEquals(3, results1_1.size) checkNoErrorsAlteringCredentials(results1_1) diff --git a/core/src/test/scala/unit/kafka/server/ConsumerGroupDescribeRequestTest.scala b/core/src/test/scala/unit/kafka/server/ConsumerGroupDescribeRequestTest.scala index f6831ca8e3d..709c3aa9dcd 100644 --- a/core/src/test/scala/unit/kafka/server/ConsumerGroupDescribeRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/ConsumerGroupDescribeRequestTest.scala @@ -211,6 +211,20 @@ class ConsumerGroupDescribeRequestTest(cluster: ClusterInstance) extends GroupCo ) assertEquals(expected, actual) + + val unknownGroupResponse = consumerGroupDescribe( + groupIds = List("grp-unknown"), + includeAuthorizedOperations = true, + version = version.toShort, + ) + assertEquals(Errors.GROUP_ID_NOT_FOUND.code, unknownGroupResponse.head.errorCode()) + + val emptyGroupResponse = consumerGroupDescribe( + groupIds = List(""), + includeAuthorizedOperations = true, + version = version.toShort, + ) + assertEquals(Errors.INVALID_GROUP_ID.code, emptyGroupResponse.head.errorCode()) } } finally { admin.close() diff --git a/core/src/test/scala/unit/kafka/server/ConsumerGroupHeartbeatRequestTest.scala b/core/src/test/scala/unit/kafka/server/ConsumerGroupHeartbeatRequestTest.scala index e94bcbc56a3..b6428540b18 100644 --- a/core/src/test/scala/unit/kafka/server/ConsumerGroupHeartbeatRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/ConsumerGroupHeartbeatRequestTest.scala @@ -301,6 +301,48 @@ class ConsumerGroupHeartbeatRequestTest(cluster: ClusterInstance) { } } + @ClusterTest + def testEmptyConsumerGroupId(): Unit = { + val admin = cluster.admin() + + // Creates the __consumer_offsets topics because it won't be created automatically + // in this test because it does not use FindCoordinator API. + try { + TestUtils.createOffsetsTopicWithAdmin( + admin = admin, + brokers = cluster.brokers.values().asScala.toSeq, + controllers = cluster.controllers().values().asScala.toSeq + ) + + // Heartbeat request to join the group. Note that the member subscribes + // to an nonexistent topic. + val consumerGroupHeartbeatRequest = new ConsumerGroupHeartbeatRequest.Builder( + new ConsumerGroupHeartbeatRequestData() + .setGroupId("") + .setMemberId(Uuid.randomUuid().toString) + .setMemberEpoch(0) + .setRebalanceTimeoutMs(5 * 60 * 1000) + .setSubscribedTopicNames(List("foo").asJava) + .setTopicPartitions(List.empty.asJava), + true + ).build() + + // Send the request until receiving a successful response. There is a delay + // here because the group coordinator is loaded in the background. + var consumerGroupHeartbeatResponse: ConsumerGroupHeartbeatResponse = null + TestUtils.waitUntilTrue(() => { + consumerGroupHeartbeatResponse = connectAndReceive(consumerGroupHeartbeatRequest) + consumerGroupHeartbeatResponse.data.errorCode == Errors.INVALID_REQUEST.code + }, msg = s"Did not receive the expected error. Last response $consumerGroupHeartbeatResponse.") + + // Verify the response. + assertEquals(Errors.INVALID_REQUEST.code, consumerGroupHeartbeatResponse.data.errorCode) + assertEquals("GroupId can't be empty.", consumerGroupHeartbeatResponse.data.errorMessage) + } finally { + admin.close() + } + } + @ClusterTest def testConsumerGroupHeartbeatWithEmptySubscription(): Unit = { val admin = cluster.admin() diff --git a/core/src/test/scala/unit/kafka/server/DeleteGroupsRequestTest.scala b/core/src/test/scala/unit/kafka/server/DeleteGroupsRequestTest.scala index f9b9e9c946a..a17c22ea330 100644 --- a/core/src/test/scala/unit/kafka/server/DeleteGroupsRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/DeleteGroupsRequestTest.scala @@ -99,8 +99,8 @@ class DeleteGroupsRequestTest(cluster: ClusterInstance) extends GroupCoordinator ) deleteGroups( - groupIds = List("grp-non-empty", "grp"), - expectedErrors = List(Errors.NON_EMPTY_GROUP, Errors.NONE), + groupIds = List("grp-non-empty", "grp", ""), + expectedErrors = List(Errors.NON_EMPTY_GROUP, Errors.NONE, Errors.GROUP_ID_NOT_FOUND), version = version.toShort ) diff --git a/core/src/test/scala/unit/kafka/server/DescribeGroupsRequestTest.scala b/core/src/test/scala/unit/kafka/server/DescribeGroupsRequestTest.scala index 4f1ba4b9b2c..1223ad6efa9 100644 --- a/core/src/test/scala/unit/kafka/server/DescribeGroupsRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/DescribeGroupsRequestTest.scala @@ -104,10 +104,15 @@ class DescribeGroupsRequestTest(cluster: ClusterInstance) extends GroupCoordinat .setGroupId("grp-unknown") .setGroupState(ClassicGroupState.DEAD.toString) // Return DEAD group when the group does not exist. .setErrorCode(if (version >= 6) Errors.GROUP_ID_NOT_FOUND.code() else Errors.NONE.code()) - .setErrorMessage(if (version >= 6) "Group grp-unknown not found." else null) + .setErrorMessage(if (version >= 6) "Group grp-unknown not found." else null), + new DescribedGroup() + .setGroupId("") + .setGroupState(ClassicGroupState.DEAD.toString) // Return DEAD group when the group does not exist. + .setErrorCode(if (version >= 6) Errors.GROUP_ID_NOT_FOUND.code() else Errors.NONE.code()) + .setErrorMessage(if (version >= 6) "Group not found." else null) ), describeGroups( - groupIds = List("grp-1", "grp-2", "grp-unknown"), + groupIds = List("grp-1", "grp-2", "grp-unknown", ""), version = version.toShort ) ) diff --git a/core/src/test/scala/unit/kafka/server/HeartbeatRequestTest.scala b/core/src/test/scala/unit/kafka/server/HeartbeatRequestTest.scala index 332c01aeeb5..ad9f77c9fe5 100644 --- a/core/src/test/scala/unit/kafka/server/HeartbeatRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/HeartbeatRequestTest.scala @@ -190,6 +190,15 @@ class HeartbeatRequestTest(cluster: ClusterInstance) extends GroupCoordinatorBas expectedError = Errors.UNKNOWN_MEMBER_ID, version = version.toShort ) + + // Heartbeat with empty group id. + heartbeat( + groupId = "", + memberId = leaderMemberId, + generationId = -1, + expectedError = Errors.INVALID_GROUP_ID, + version = version.toShort + ) } } } diff --git a/core/src/test/scala/unit/kafka/server/JoinGroupRequestTest.scala b/core/src/test/scala/unit/kafka/server/JoinGroupRequestTest.scala index f77c2fc1bfa..3e8fad1ed43 100644 --- a/core/src/test/scala/unit/kafka/server/JoinGroupRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/JoinGroupRequestTest.scala @@ -149,6 +149,17 @@ class JoinGroupRequestTest(cluster: ClusterInstance) extends GroupCoordinatorBas ) ) + // Join with an empty group id. + verifyJoinGroupResponseDataEquals( + new JoinGroupResponseData() + .setErrorCode(Errors.INVALID_GROUP_ID.code) + .setProtocolName(if (version >= 7) null else ""), + sendJoinRequest( + groupId = "", + version = version.toShort + ) + ) + // Join with an inconsistent protocolType. verifyJoinGroupResponseDataEquals( new JoinGroupResponseData() diff --git a/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala b/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala index c6ec0d00a35..f9f197569cb 100644 --- a/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala +++ b/core/src/test/scala/unit/kafka/server/KafkaApisTest.scala @@ -9800,7 +9800,12 @@ class KafkaApisTest extends Logging { @Test def testEmptyIncrementalAlterConfigsRequestWithKRaft(): Unit = { - val request = buildRequest(new IncrementalAlterConfigsRequest(new IncrementalAlterConfigsRequestData(), 1.toShort)) + val alterConfigsRequest = new IncrementalAlterConfigsRequest(new IncrementalAlterConfigsRequestData(), 1.toShort) + assertEquals( + "IncrementalAlterConfigsRequestData(resources=[], validateOnly=false)", + alterConfigsRequest.toString + ) + val request = buildRequest(alterConfigsRequest) metadataCache = MetadataCache.kRaftMetadataCache(brokerId, () => KRaftVersion.KRAFT_VERSION_0) when(clientRequestQuotaManager.maybeRecordAndGetThrottleTimeMs(any[RequestChannel.Request](), any[Long])).thenReturn(0) @@ -9812,7 +9817,7 @@ class KafkaApisTest extends Logging { @Test def testLog4jIncrementalAlterConfigsRequestWithKRaft(): Unit = { - val request = buildRequest(new IncrementalAlterConfigsRequest(new IncrementalAlterConfigsRequestData(). + val alterConfigsRequest = new IncrementalAlterConfigsRequest(new IncrementalAlterConfigsRequestData(). setValidateOnly(true). setResources(new IAlterConfigsResourceCollection(asList(new IAlterConfigsResource(). setResourceName(brokerId.toString). @@ -9820,7 +9825,16 @@ class KafkaApisTest extends Logging { setConfigs(new IAlterableConfigCollection(asList(new IAlterableConfig(). setName(Log4jController.ROOT_LOGGER). setValue("TRACE")).iterator()))).iterator())), - 1.toShort)) + 1.toShort) + assertEquals( + "IncrementalAlterConfigsRequestData(resources=[" + + "AlterConfigsResource(resourceType=" + BROKER_LOGGER.id() + ", " + + "resourceName='"+ brokerId + "', " + + "configs=[AlterableConfig(name='" + Log4jController.ROOT_LOGGER + "', configOperation=0, value='REDACTED')])], " + + "validateOnly=true)", + alterConfigsRequest.toString + ) + val request = buildRequest(alterConfigsRequest) metadataCache = MetadataCache.kRaftMetadataCache(brokerId, () => KRaftVersion.KRAFT_VERSION_0) when(clientRequestQuotaManager.maybeRecordAndGetThrottleTimeMs(any[RequestChannel.Request](), any[Long])).thenReturn(0) diff --git a/core/src/test/scala/unit/kafka/server/MockFetcherThread.scala b/core/src/test/scala/unit/kafka/server/MockFetcherThread.scala index 2754685b8f4..ff1e9196568 100644 --- a/core/src/test/scala/unit/kafka/server/MockFetcherThread.scala +++ b/core/src/test/scala/unit/kafka/server/MockFetcherThread.scala @@ -66,9 +66,12 @@ class MockFetcherThread(val mockLeader: MockLeaderEndPoint, partitions } - override def processPartitionData(topicPartition: TopicPartition, - fetchOffset: Long, - partitionData: FetchData): Option[LogAppendInfo] = { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + leaderEpochForReplica: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = { val state = replicaPartitionState(topicPartition) if (leader.isTruncationOnFetchSupported && FetchResponse.isDivergingEpoch(partitionData)) { @@ -87,17 +90,24 @@ class MockFetcherThread(val mockLeader: MockLeaderEndPoint, var shallowOffsetOfMaxTimestamp = -1L var lastOffset = state.logEndOffset var lastEpoch: OptionalInt = OptionalInt.empty() + var skipRemainingBatches = false for (batch <- batches) { batch.ensureValid() - if (batch.maxTimestamp > maxTimestamp) { - maxTimestamp = batch.maxTimestamp - shallowOffsetOfMaxTimestamp = batch.baseOffset + + skipRemainingBatches = skipRemainingBatches || hasHigherPartitionLeaderEpoch(batch, leaderEpochForReplica); + if (skipRemainingBatches) { + info(s"Skipping batch $batch because leader epoch is $leaderEpochForReplica") + } else { + if (batch.maxTimestamp > maxTimestamp) { + maxTimestamp = batch.maxTimestamp + shallowOffsetOfMaxTimestamp = batch.baseOffset + } + state.log.append(batch) + state.logEndOffset = batch.nextOffset + lastOffset = batch.lastOffset + lastEpoch = OptionalInt.of(batch.partitionLeaderEpoch) } - state.log.append(batch) - state.logEndOffset = batch.nextOffset - lastOffset = batch.lastOffset - lastEpoch = OptionalInt.of(batch.partitionLeaderEpoch) } state.logStartOffset = partitionData.logStartOffset @@ -107,7 +117,6 @@ class MockFetcherThread(val mockLeader: MockLeaderEndPoint, lastOffset, lastEpoch, maxTimestamp, - shallowOffsetOfMaxTimestamp, Time.SYSTEM.milliseconds(), state.logStartOffset, RecordValidationStats.EMPTY, @@ -116,6 +125,11 @@ class MockFetcherThread(val mockLeader: MockLeaderEndPoint, batches.headOption.map(_.lastOffset).getOrElse(-1))) } + private def hasHigherPartitionLeaderEpoch(batch: RecordBatch, leaderEpoch: Int): Boolean = { + batch.partitionLeaderEpoch() != RecordBatch.NO_PARTITION_LEADER_EPOCH && + batch.partitionLeaderEpoch() > leaderEpoch + } + override def truncate(topicPartition: TopicPartition, truncationState: OffsetTruncationState): Unit = { val state = replicaPartitionState(topicPartition) state.log = state.log.takeWhile { batch => diff --git a/core/src/test/scala/unit/kafka/server/OffsetFetchRequestTest.scala b/core/src/test/scala/unit/kafka/server/OffsetFetchRequestTest.scala index a504ecdeea0..3518f50b4c0 100644 --- a/core/src/test/scala/unit/kafka/server/OffsetFetchRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/OffsetFetchRequestTest.scala @@ -269,6 +269,39 @@ class OffsetFetchRequestTest(cluster: ClusterInstance) extends GroupCoordinatorB ) ) + // Fetch with empty group id. + assertEquals( + new OffsetFetchResponseData.OffsetFetchResponseGroup() + .setGroupId("") + .setTopics(List( + new OffsetFetchResponseData.OffsetFetchResponseTopics() + .setName("foo") + .setPartitions(List( + new OffsetFetchResponseData.OffsetFetchResponsePartitions() + .setPartitionIndex(0) + .setCommittedOffset(-1L), + new OffsetFetchResponseData.OffsetFetchResponsePartitions() + .setPartitionIndex(1) + .setCommittedOffset(-1L), + new OffsetFetchResponseData.OffsetFetchResponsePartitions() + .setPartitionIndex(5) + .setCommittedOffset(-1L) + ).asJava) + ).asJava), + fetchOffsets( + groupId = "", + memberId = memberId, + memberEpoch = memberEpoch, + partitions = List( + new TopicPartition("foo", 0), + new TopicPartition("foo", 1), + new TopicPartition("foo", 5) // This one does not exist. + ), + requireStable = requireStable, + version = version.toShort + ) + ) + // Fetch with stale member epoch. assertEquals( new OffsetFetchResponseData.OffsetFetchResponseGroup() diff --git a/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala index ff556f586c4..b0ee5a2d148 100644 --- a/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala +++ b/core/src/test/scala/unit/kafka/server/ReplicaFetcherThreadTest.scala @@ -281,9 +281,22 @@ class ReplicaFetcherThreadTest { val fetchSessionHandler = new FetchSessionHandler(logContext, brokerEndPoint.id) val leader = new RemoteLeaderEndPoint(logContext.logPrefix, mockNetwork, fetchSessionHandler, config, replicaManager, quota, () => MetadataVersion.MINIMUM_VERSION, () => 1) - val thread = new ReplicaFetcherThread("bob", leader, config, failedPartitions, - replicaManager, quota, logContext.logPrefix, () => MetadataVersion.MINIMUM_VERSION) { - override def processPartitionData(topicPartition: TopicPartition, fetchOffset: Long, partitionData: FetchData): Option[LogAppendInfo] = None + val thread = new ReplicaFetcherThread( + "bob", + leader, + config, + failedPartitions, + replicaManager, + quota, + logContext.logPrefix, + () => MetadataVersion.MINIMUM_VERSION + ) { + override def processPartitionData( + topicPartition: TopicPartition, + fetchOffset: Long, + partitionLeaderEpoch: Int, + partitionData: FetchData + ): Option[LogAppendInfo] = None } thread.addPartitions(Map(t1p0 -> initialFetchState(Some(topicId1), initialLEO), t1p1 -> initialFetchState(Some(topicId1), initialLEO))) val partitions = Set(t1p0, t1p1) @@ -379,7 +392,7 @@ class ReplicaFetcherThreadTest { when(replicaManager.getPartitionOrException(t1p0)).thenReturn(partition) when(partition.localLogOrException).thenReturn(log) - when(partition.appendRecordsToFollowerOrFutureReplica(any(), any())).thenReturn(None) + when(partition.appendRecordsToFollowerOrFutureReplica(any(), any(), any())).thenReturn(None) val logContext = new LogContext(s"[ReplicaFetcher replicaId=${config.brokerId}, leaderId=${brokerEndPoint.id}, fetcherId=0] ") @@ -460,12 +473,11 @@ class ReplicaFetcherThreadTest { when(replicaManager.brokerTopicStats).thenReturn(mock(classOf[BrokerTopicStats])) when(partition.localLogOrException).thenReturn(log) - when(partition.appendRecordsToFollowerOrFutureReplica(any(), any())).thenReturn(Some(new LogAppendInfo( + when(partition.appendRecordsToFollowerOrFutureReplica(any(), any(), any())).thenReturn(Some(new LogAppendInfo( -1, 0, OptionalInt.empty, RecordBatch.NO_TIMESTAMP, - -1L, RecordBatch.NO_TIMESTAMP, -1L, RecordValidationStats.EMPTY, @@ -680,7 +692,7 @@ class ReplicaFetcherThreadTest { val partition: Partition = mock(classOf[Partition]) when(partition.localLogOrException).thenReturn(log) - when(partition.appendRecordsToFollowerOrFutureReplica(any[MemoryRecords], any[Boolean])).thenReturn(appendInfo) + when(partition.appendRecordsToFollowerOrFutureReplica(any[MemoryRecords], any[Boolean], any[Int])).thenReturn(appendInfo) // Capture the argument at the time of invocation. val completeDelayedFetchRequestsArgument = mutable.Buffer.empty[TopicPartition] @@ -711,8 +723,8 @@ class ReplicaFetcherThreadTest { .setRecords(records) .setHighWatermark(highWatermarkReceivedFromLeader) - thread.processPartitionData(tp0, 0, partitionData.setPartitionIndex(0)) - thread.processPartitionData(tp1, 0, partitionData.setPartitionIndex(1)) + thread.processPartitionData(tp0, 0, Int.MaxValue, partitionData.setPartitionIndex(0)) + thread.processPartitionData(tp1, 0, Int.MaxValue, partitionData.setPartitionIndex(1)) verify(replicaManager, times(0)).completeDelayedFetchRequests(any[Seq[TopicPartition]]) thread.doWork() @@ -762,7 +774,7 @@ class ReplicaFetcherThreadTest { when(partition.localLogOrException).thenReturn(log) when(partition.isReassigning).thenReturn(isReassigning) when(partition.isAddingLocalReplica).thenReturn(isReassigning) - when(partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false)).thenReturn(None) + when(partition.appendRecordsToFollowerOrFutureReplica(records, isFuture = false, Int.MaxValue)).thenReturn(None) val replicaManager: ReplicaManager = mock(classOf[ReplicaManager]) when(replicaManager.getPartitionOrException(any[TopicPartition])).thenReturn(partition) @@ -786,7 +798,7 @@ class ReplicaFetcherThreadTest { .setLastStableOffset(0) .setLogStartOffset(0) .setRecords(records) - thread.processPartitionData(t1p0, 0, partitionData) + thread.processPartitionData(t1p0, 0, Int.MaxValue, partitionData) if (isReassigning) assertEquals(records.sizeInBytes(), brokerTopicStats.allTopicsStats.reassignmentBytesInPerSec.get.count()) diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala index 7ca5e4d4c24..fcc99ef35e1 100644 --- a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala +++ b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala @@ -5263,9 +5263,12 @@ class ReplicaManagerTest { replicaManager.getPartition(topicPartition) match { case HostedPartition.Online(partition) => partition.appendRecordsToFollowerOrFutureReplica( - records = MemoryRecords.withRecords(Compression.NONE, 0, - new SimpleRecord("first message".getBytes)), - isFuture = false + records = MemoryRecords.withRecords( + Compression.NONE, 0, + new SimpleRecord("first message".getBytes) + ), + isFuture = false, + partitionLeaderEpoch = 0 ) case _ => diff --git a/core/src/test/scala/unit/kafka/server/SyncGroupRequestTest.scala b/core/src/test/scala/unit/kafka/server/SyncGroupRequestTest.scala index 3a53fbf144a..f89defd3f19 100644 --- a/core/src/test/scala/unit/kafka/server/SyncGroupRequestTest.scala +++ b/core/src/test/scala/unit/kafka/server/SyncGroupRequestTest.scala @@ -76,6 +76,17 @@ class SyncGroupRequestTest(cluster: ClusterInstance) extends GroupCoordinatorBas version = version.toShort ) + // Sync with empty group id. + verifySyncGroupWithOldProtocol( + groupId = "", + memberId = "member-id", + generationId = -1, + expectedProtocolType = null, + expectedProtocolName = null, + expectedError = Errors.INVALID_GROUP_ID, + version = version.toShort + ) + val metadata = ConsumerProtocol.serializeSubscription( new ConsumerPartitionAssignor.Subscription(Collections.singletonList("foo")) ).array diff --git a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala index 1978018f05f..0d2df4ae608 100644 --- a/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala +++ b/core/src/test/scala/unit/kafka/server/metadata/BrokerMetadataPublisherTest.scala @@ -18,13 +18,18 @@ package kafka.server.metadata import kafka.coordinator.transaction.TransactionCoordinator + +import java.util.OptionalInt import kafka.log.LogManager import kafka.server.{BrokerServer, KafkaConfig, ReplicaManager} import kafka.utils.TestUtils import org.apache.kafka.clients.admin.AlterConfigOp.OpType.SET import org.apache.kafka.clients.admin.{Admin, AlterConfigOp, ConfigEntry, NewTopic} +import org.apache.kafka.common.Uuid import org.apache.kafka.common.config.ConfigResource import org.apache.kafka.common.config.ConfigResource.Type.BROKER +import org.apache.kafka.common.internals.Topic +import org.apache.kafka.common.metadata.{PartitionRecord, RemoveTopicRecord, TopicRecord} import org.apache.kafka.common.test.{KafkaClusterTestKit, TestKitNodes} import org.apache.kafka.common.utils.Exit import org.apache.kafka.coordinator.group.GroupCoordinator @@ -178,6 +183,69 @@ class BrokerMetadataPublisherTest { } } + @Test + def testGroupCoordinatorTopicDeletion(): Unit = { + val config = KafkaConfig.fromProps(TestUtils.createBrokerConfig(0)) + val metadataCache = new KRaftMetadataCache(0, () => KRaftVersion.KRAFT_VERSION_1) + val logManager = mock(classOf[LogManager]) + val replicaManager = mock(classOf[ReplicaManager]) + val groupCoordinator = mock(classOf[GroupCoordinator]) + val faultHandler = mock(classOf[FaultHandler]) + + val metadataPublisher = new BrokerMetadataPublisher( + config, + metadataCache, + logManager, + replicaManager, + groupCoordinator, + mock(classOf[TransactionCoordinator]), + Some(mock(classOf[ShareCoordinator])), + mock(classOf[DynamicConfigPublisher]), + mock(classOf[DynamicClientQuotaPublisher]), + mock(classOf[DynamicTopicClusterQuotaPublisher]), + mock(classOf[ScramPublisher]), + mock(classOf[DelegationTokenPublisher]), + mock(classOf[AclPublisher]), + faultHandler, + faultHandler + ) + + val topicId = Uuid.randomUuid() + var delta = new MetadataDelta(MetadataImage.EMPTY) + delta.replay(new TopicRecord() + .setName(Topic.GROUP_METADATA_TOPIC_NAME) + .setTopicId(topicId) + ) + delta.replay(new PartitionRecord() + .setTopicId(topicId) + .setPartitionId(0) + .setLeader(config.brokerId) + ) + delta.replay(new PartitionRecord() + .setTopicId(topicId) + .setPartitionId(1) + .setLeader(config.brokerId) + ) + val image = delta.apply(MetadataProvenance.EMPTY) + + delta = new MetadataDelta(image) + delta.replay(new RemoveTopicRecord() + .setTopicId(topicId) + ) + + metadataPublisher.onMetadataUpdate(delta, delta.apply(MetadataProvenance.EMPTY), + LogDeltaManifest.newBuilder() + .provenance(MetadataProvenance.EMPTY) + .leaderAndEpoch(LeaderAndEpoch.UNKNOWN) + .numBatches(1) + .elapsedNs(100) + .numBytes(42) + .build()) + + verify(groupCoordinator).onResignation(0, OptionalInt.empty()) + verify(groupCoordinator).onResignation(1, OptionalInt.empty()) + } + @Test def testNewImagePushedToGroupCoordinator(): Unit = { val config = KafkaConfig.fromProps(TestUtils.createBrokerConfig(0)) diff --git a/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala b/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala index b86a5608c3d..bf8cafac629 100644 --- a/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala +++ b/core/src/test/scala/unit/kafka/tools/DumpLogSegmentsTest.scala @@ -36,7 +36,7 @@ import org.apache.kafka.common.compress.Compression import org.apache.kafka.common.config.TopicConfig import org.apache.kafka.common.metadata.{PartitionChangeRecord, RegisterBrokerRecord, TopicRecord} import org.apache.kafka.common.protocol.{ByteBufferAccessor, ObjectSerializationCache} -import org.apache.kafka.common.record.{ControlRecordType, EndTransactionMarker, MemoryRecords, Record, RecordBatch, RecordVersion, SimpleRecord} +import org.apache.kafka.common.record.{ControlRecordType, EndTransactionMarker, MemoryRecords, Record, RecordVersion, SimpleRecord} import org.apache.kafka.common.utils.{Exit, Utils} import org.apache.kafka.coordinator.common.runtime.CoordinatorRecord import org.apache.kafka.coordinator.group.GroupCoordinatorRecordSerde @@ -402,7 +402,7 @@ class DumpLogSegmentsTest { log = LogTestUtils.createLog(logDir, logConfig, new BrokerTopicStats, time.scheduler, time) log.appendAsLeader(MemoryRecords.withRecords(Compression.NONE, metadataRecords:_*), leaderEpoch = 0) val secondSegment = log.roll() - secondSegment.append(1L, RecordBatch.NO_TIMESTAMP, 1L, MemoryRecords.withRecords(Compression.NONE, metadataRecords:_*)) + secondSegment.append(1L, MemoryRecords.withRecords(Compression.NONE, metadataRecords: _*)) secondSegment.flush() log.flush(true) diff --git a/docs/js/templateData.js b/docs/js/templateData.js index 4009b347a4e..28cc698af4d 100644 --- a/docs/js/templateData.js +++ b/docs/js/templateData.js @@ -19,6 +19,6 @@ limitations under the License. var context={ "version": "40inkless", "dotVersion": "4.0-inkless", - "fullDotVersion": "4.0.0-inkless", + "fullDotVersion": "4.0.1-inkless", "scalaVersion": "2.13" }; diff --git a/docs/streams/core-concepts.html b/docs/streams/core-concepts.html index d9a2851e271..e82d5fcbb61 100644 --- a/docs/streams/core-concepts.html +++ b/docs/streams/core-concepts.html @@ -279,7 +279,7 @@

Lambda Architecture. + to the stream processing pipeline, known as the Lambda Architecture. Prior to 0.11.0.0, Kafka only provides at-least-once delivery guarantees and hence any stream processing systems that leverage it as the backend storage could not guarantee end-to-end exactly-once semantics. In fact, even for those stream processing systems that claim to support exactly-once processing, as long as they are reading from / writing to Kafka as the source / sink, their applications cannot actually guarantee that no duplicates will be generated throughout the pipeline.
diff --git a/docs/streams/developer-guide/config-streams.html b/docs/streams/developer-guide/config-streams.html index a7d82b97ccd..b14bd04272b 100644 --- a/docs/streams/developer-guide/config-streams.html +++ b/docs/streams/developer-guide/config-streams.html @@ -296,12 +296,12 @@

num.standby.replicascommit.interval.ms Low The frequency in milliseconds with which to save the position (offsets in source topics) of tasks. - 30000 (30 seconds) + 30000 (30 seconds) (at-least-once) / 100 (exactly-once) default.deserialization.exception.handler (Deprecated. Use deserialization.exception.handler instead.) Medium Exception handling class that implements the DeserializationExceptionHandler interface. - LogAndContinueExceptionHandler + LogAndFailExceptionHandler default.key.serde Medium @@ -326,11 +326,10 @@

num.standby.replicasnull - default.dsl.store + default.dsl.store (Deprecated. Use dsl.store.suppliers.class instead.) Low - [DEPRECATED] The default state store type used by DSL operators. Deprecated in - favor of dsl.store.suppliers.class + The default state store type used by DSL operators. "ROCKS_DB" @@ -481,54 +480,59 @@

num.standby.replicas-1 - retry.backoff.ms + repartition.purge.interval.ms + Low + The frequency in milliseconds with which to delete fully consumed records from repartition topics. Purging will occur after at least this value since the last purge, but may be delayed until later. + 30000 (30 seconds) + + retry.backoff.ms Low The amount of time in milliseconds, before a request is retried. 100 - rocksdb.config.setter + rocksdb.config.setter Medium The RocksDB configuration. null - state.cleanup.delay.ms + state.cleanup.delay.ms Low The amount of time in milliseconds to wait before deleting state when a partition has migrated. 600000 (10 minutes) - state.dir + state.dir High Directory location for state stores. /${java.io.tmpdir}/kafka-streams - task.assignor.class + task.assignor.class Medium A task assignor class or class name implementing the TaskAssignor interface. The high-availability task assignor. - task.timeout.ms + task.timeout.ms Medium The maximum amount of time in milliseconds a task might stall due to internal errors and retries until an error is raised. For a timeout of 0 ms, a task would raise an error for the first internal error. For any timeout larger than 0 ms, a task will retry at least once before an error is raised. 300000 (5 minutes) - topology.optimization + topology.optimization Medium A configuration telling Kafka Streams if it should optimize the topology and what optimizations to apply. Acceptable values are: StreamsConfig.NO_OPTIMIZATION (none), StreamsConfig.OPTIMIZE (all) or a comma separated list of specific optimizations: StreamsConfig.REUSE_KTABLE_SOURCE_TOPICS (reuse.ktable.source.topics), StreamsConfig.MERGE_REPARTITION_TOPICS (merge.repartition.topics), StreamsConfig.SINGLE_STORE_SELF_JOIN (single.store.self.join). "NO_OPTIMIZATION" - upgrade.from + upgrade.from Medium The version you are upgrading from during a rolling upgrade. See Upgrade From null - windowstore.changelog.additional.retention.ms + windowstore.changelog.additional.retention.ms Low Added to a windows maintainMs to ensure data is not deleted from the log prematurely. Allows for clock drift. 86400000 (1 day) - window.size.ms + window.size.ms Low Sets window size for the deserializer in order to calculate window end times. null diff --git a/docs/streams/developer-guide/dsl-api.html b/docs/streams/developer-guide/dsl-api.html index b59ac764f32..4de5389ac75 100644 --- a/docs/streams/developer-guide/dsl-api.html +++ b/docs/streams/developer-guide/dsl-api.html @@ -764,10 +764,10 @@

Manually trigger repartitioning of the stream with desired number of partitions. (details)

- repartition() is similar to through() however Kafka Streams will manage the topic for you. + Kafka Streams will manage the topic for repartition(). Generated topic is treated as internal topic, as a result data will be purged automatically as any other internal repartition topic. In addition, you can specify the desired number of partitions, which allows to easily scale in/out downstream sub-topologies. - repartition() operation always triggers repartitioning of the stream, as a result it can be used with embedded Processor API methods (like transform() et al.) that do not trigger auto repartitioning when key changing operation is performed beforehand. + repartition() operation always triggers repartitioning of the stream, as a result it can be used with embedded Processor API methods (like process() et al.) that do not trigger auto repartitioning when key changing operation is performed beforehand.
KStream<byte[], String> stream = ... ;
 KStream<byte[], String> repartitionedStream = stream.repartition(Repartitioned.numberOfPartitions(10));
@@ -3130,15 +3130,20 @@

Operations and concepts

Processor (provided by a given ProcessorSupplier);
  • KStream#processValues: Process all records in a stream, one record at a time, by applying a - FixedKeyProcessor (provided by a given FixedKeyProcessorSupplier); + FixedKeyProcessor (provided by a given FixedKeyProcessorSupplier) + [CAUTION: If you are deploying a new Kafka Streams application, and you are using the + "merge repartition topics" optimization, you should enable the fix for + KAFKA-19668 to avoid compatibility + issues for future upgrades to newer versions of Kafka Streams; + For more details, see the migration guide below];
  • Processor: A processor of key-value pair records;
  • ContextualProcessor: An abstract implementation of Processor that manages the - ProcessorContext instance. + ProcessorContext instance;
  • FixedKeyProcessor: A processor of key-value pair records where keys are immutable;
  • ContextualFixedKeyProcessor: An abstract implementation of FixedKeyProcessor that - manages the FixedKeyProcessorContext instance. + manages the FixedKeyProcessorContext instance;
  • ProcessorSupplier: A processor supplier that can create one or more Processor instances; and
  • @@ -3456,6 +3461,25 @@

    The Processor API now serves as a unified replacement for all these methods. It simplifies the API surface while maintaining support for both stateless and stateful operations.

    + +

    CAUTION: If you are using KStream.transformValues() and you have the "merge repartition topics" + optimization enabled, rewriting your program to KStream.processValues() might not be safe due to + KAFKA-19668. For this case, you should not upgrade + to Kafka Streams 4.0.0 or 4.1.0, but use Kafka Streams 4.0.1 instead, which contains a fix. + Note, that the fix is not enabled by default for backward compatibility reasons, and you would need to + enable the fix by setting config __enable.process.processValue.fix__ = true and pass it + into StreamsBuilder() constructor.

    +
    final Properties properties = new Properties();
    +properties.put(StreamsConfig.APPLICATION_ID_CONFIG, ...);
    +properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, ...);
    +properties.put(TopologyConfig.InternalConfig.ENABLE_PROCESS_PROCESSVALUE_FIX, true);
    +
    +final StreamsBuilder builder = new StreamsBuilder(new TopologyConfig(new StreamsConfig(properties)));
    + +

    It is recommended, that you compare the output of Topology.describe() for the old and new topology, + to verify if the rewrite to processValues() is correct, and that it does not introduce any incompatibilities. + You should also test the upgrade in a non-production environment.

    +

    Migration Examples

    To migrate from the deprecated transform, transformValues, flatTransform, and flatTransformValues methods to the Processor API (PAPI) in Kafka Streams, let's resume the diff --git a/docs/streams/upgrade-guide.html b/docs/streams/upgrade-guide.html index 1c6de66ab80..f3d9713d927 100644 --- a/docs/streams/upgrade-guide.html +++ b/docs/streams/upgrade-guide.html @@ -148,7 +148,15 @@

    Streams API
    • Old processor APIs
    • KStream#through() in both Java and Scala
    • -
    • "transformer" methods and classes in both Java and Scala
    • +
    • + "transformer" methods and classes in both Java and Scala +
        +
      • migrating from KStreams#transformValues() to KStreams.processValues() might not be safe + due to KAFKA-19668. + Please refer to the migration guide for more details. +
      • +
      +
    • kstream.KStream#branch in both Java and Scala
    • builder methods for Time/Session/Join/SlidingWindows
    • KafkaStreams#setUncaughtExceptionHandler()
    • @@ -231,22 +239,22 @@

      Streams API

      - You can now configure your topology with a ProcessorWrapper, which allows you to access and optionally wrap/replace - any processor in the topology by injecting an alternative ProcessorSupplier in its place. This can be used to peek - records and access the processor context even for DSL operators, for example to implement a logging or tracing framework, or to - aid in testing or debugging scenarios. You must implement the ProcessorWrapper interface and then pass the class - or class name into the configs via the new StreamsConfig#PROCESSOR_WRAPPER_CLASS_CONFIG config. NOTE: this config is - applied during the topology building phase, and therefore will not take effect unless the config is passed in when creating - the StreamsBuilder (DSL) or Topology(PAPI) objects. You MUST use the StreamsBuilder/Topology constructor overload that - accepts a TopologyConfig parameter for the StreamsConfig#PROCESSOR_WRAPPER_CLASS_CONFIG to be picked up. - See KIP-1112 for more details. + You can now configure your topology with a ProcessorWrapper, which allows you to access and optionally wrap/replace + any processor in the topology by injecting an alternative ProcessorSupplier in its place. This can be used to peek + records and access the processor context even for DSL operators, for example to implement a logging or tracing framework, or to + aid in testing or debugging scenarios. You must implement the ProcessorWrapper interface and then pass the class + or class name into the configs via the new StreamsConfig#PROCESSOR_WRAPPER_CLASS_CONFIG config. NOTE: this config is + applied during the topology building phase, and therefore will not take effect unless the config is passed in when creating + the StreamsBuilder (DSL) or Topology(PAPI) objects. You MUST use the StreamsBuilder/Topology constructor overload that + accepts a TopologyConfig parameter for the StreamsConfig#PROCESSOR_WRAPPER_CLASS_CONFIG to be picked up. + See KIP-1112 for more details.

      Upgraded RocksDB dependency to version 9.7.3 (from 7.9.2). This upgrade incorporates various improvements and optimizations within RocksDB. However, it also introduces some API changes. The org.rocksdb.AccessHint class, along with its associated methods, has been removed. Several methods related to compressed block cache configuration in the BlockBasedTableConfig class have been removed, including blockCacheCompressedNumShardBits, blockCacheCompressedSize, and their corresponding setters. These functionalities are now consolidated under the cache option, and developers should configure their compressed block cache using the setCache method instead. - The NO_FILE_CLOSES field has been removed from the org.rocksdb.TickerTypeenum as a result the number-open-files metrics does not work as expected. Metric number-open-files returns constant -1 from now on until it will officially be removed. + The NO_FILE_CLOSES field has been removed from the org.rocksdb.TickerTypeenum as a result the number-open-files metrics does not work as expected. Metric number-open-files returns constant -1 from now on until it will officially be removed. The org.rocksdb.Options.setLogger() method now accepts a LoggerInterface as a parameter instead of the previous Logger. Some data types used in RocksDB's Java API have been modified. These changes, along with the removed class, field, and new methods, are primarily relevant to users implementing custom RocksDB configurations. These changes are expected to be largely transparent to most Kafka Streams users. However, those employing advanced RocksDB customizations within their Streams applications, particularly through the rocksdb.config.setter, are advised to consult the detailed RocksDB 9.7.3 changelog to ensure a smooth transition and adapt their configurations as needed. Specifically, users leveraging the removed AccessHint class, the removed methods from the BlockBasedTableConfig class, the NO_FILE_CLOSES field from TickerType, or relying on the previous signature of setLogger() will need to update their implementations. @@ -525,6 +533,11 @@

      Streams API FixedKeyProcessorContext, and ContextualFixedKeyProcessor are introduced to guard against disallowed key modification inside processValues(). Furthermore, ProcessingContext is added for a better interface hierarchy. + CAUTION: The newly added KStream.processValues() method introduced a regression bug + (KAFKA-19668). + If you have "merge repartition topics" optimization enabled, it is not safe to migrate from transformValues() + to processValues() in 3.3.0 release. The bug is only fixed with Kafka Streams 4.0.1, 4.1.1, and 4.2.0. + For more details, please refer to the migration guide.

      diff --git a/docs/toc.html b/docs/toc.html index c42961cf7fb..f0bd3e84e47 100644 --- a/docs/toc.html +++ b/docs/toc.html @@ -148,6 +148,7 @@

    • 6.7 Monitoring
      • Security Considerations for Remote Monitoring using JMX +
      • Group Coordinator Monitoring
      • Tiered Storage Monitoring
      • KRaft Monitoring
      • Selector Monitoring diff --git a/docs/upgrade.html b/docs/upgrade.html index eba558f39ce..86507f66e9d 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -19,9 +19,9 @@