-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[exporter/kafka] Enable Trace batch chunking before exporting to kafka #37176
Draft
shivanshuraj1333
wants to merge
8
commits into
open-telemetry:main
Choose a base branch
from
shivanshuraj1333:fix/issues/36982
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
691860f
feat: process message chunks for kafka exporter
shivanshuraj1333 45c15b8
feat: implement size based chunking for jaeger marshaler
shivanshuraj1333 52a48bc
test: add unit tests for jaeger marshaler
shivanshuraj1333 09bdc64
feat: implement size based chunking with binary search for marshaler
shivanshuraj1333 8a6f66f
test: add unit tests for marshaler
shivanshuraj1333 9e06f7d
feat: implement size based chunking with binary search for pdata mars…
shivanshuraj1333 0465509
test: add unit tests for pdata marshaler
shivanshuraj1333 90814f2
chore: refactor
shivanshuraj1333 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ package kafkaexporter // import "github.com/open-telemetry/opentelemetry-collect | |
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
|
||
"github.com/IBM/sarama" | ||
"github.com/gogo/protobuf/jsonpb" | ||
|
@@ -16,34 +17,78 @@ import ( | |
) | ||
|
||
type jaegerMarshaler struct { | ||
marshaler jaegerSpanMarshaler | ||
marshaler jaegerSpanMarshaler | ||
partitionedByTraceID bool | ||
maxMessageBytes int | ||
} | ||
|
||
var _ TracesMarshaler = (*jaegerMarshaler)(nil) | ||
|
||
func (j jaegerMarshaler) Marshal(traces ptrace.Traces, topic string) ([]*sarama.ProducerMessage, error) { | ||
func (j jaegerMarshaler) Marshal(traces ptrace.Traces, topic string) ([]*ProducerMessageChunks, error) { | ||
if j.maxMessageBytes <= 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this can be checked in Config.Validate method once |
||
return nil, fmt.Errorf("maxMessageBytes must be positive, got %d", j.maxMessageBytes) | ||
} | ||
|
||
batches := jaeger.ProtoFromTraces(traces) | ||
var messages []*sarama.ProducerMessage | ||
if len(batches) == 0 { | ||
return []*ProducerMessageChunks{}, nil | ||
} | ||
|
||
var messages []*sarama.ProducerMessage | ||
var messageChunks []*ProducerMessageChunks | ||
var packetSize int | ||
var errs error | ||
|
||
for _, batch := range batches { | ||
for _, span := range batch.Spans { | ||
span.Process = batch.Process | ||
|
||
bts, err := j.marshaler.marshal(span) | ||
// continue to process spans that can be serialized | ||
if err != nil { | ||
errs = multierr.Append(errs, err) | ||
errs = multierr.Append(errs, fmt.Errorf("failed to marshal span %s: %w", span.SpanID.String(), err)) | ||
continue | ||
} | ||
|
||
key := []byte(span.TraceID.String()) | ||
messages = append(messages, &sarama.ProducerMessage{ | ||
msg := &sarama.ProducerMessage{ | ||
Topic: topic, | ||
Value: sarama.ByteEncoder(bts), | ||
Key: sarama.ByteEncoder(key), | ||
}) | ||
} | ||
|
||
// Deriving version from sarama library | ||
// https://github.com/IBM/sarama/blob/main/async_producer.go#L454 | ||
currentMsgSize := msg.ByteSize(2) | ||
|
||
// Check if message itself exceeds size limit | ||
if currentMsgSize > j.maxMessageBytes { | ||
errs = multierr.Append(errs, fmt.Errorf("span %s exceeds maximum message size: %d > %d", | ||
span.SpanID.String(), currentMsgSize, j.maxMessageBytes)) | ||
continue | ||
} | ||
|
||
// Check if adding this message would exceed the chunk size limit | ||
if (packetSize + currentMsgSize) <= j.maxMessageBytes { | ||
packetSize += currentMsgSize | ||
messages = append(messages, msg) | ||
} else { | ||
// Current chunk is full, create new chunk | ||
if len(messages) > 0 { | ||
messageChunks = append(messageChunks, &ProducerMessageChunks{messages}) | ||
messages = make([]*sarama.ProducerMessage, 0, len(messages)) // Preallocate with previous size | ||
} | ||
messages = append(messages, msg) | ||
packetSize = currentMsgSize | ||
} | ||
} | ||
} | ||
return messages, errs | ||
|
||
// Add final chunk if there are remaining messages | ||
if len(messages) > 0 { | ||
messageChunks = append(messageChunks, &ProducerMessageChunks{messages}) | ||
} | ||
|
||
return messageChunks, errs | ||
} | ||
|
||
func (j jaegerMarshaler) Encoding() string { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why does this type need to change? The exporter is getting an array of messages and calls
SendMessages(msgs []*ProducerMessage)
onsarama.Producer
. At that point, is the max message size applied to the whole batch or to each individual message? Intuitively it's the latter (I don't see evidence to the contrary), and since each message contains exactly one span per the previous logic, there's really nothing else to do here.And if my assumption is correct, I don't think the introduction of
ProducerMessageChunks
struct is necessary - just make sure that each message is not larger than max.