From 9946b110f95ae45860e6f282fae7aeb71e15cca4 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Tue, 25 Jul 2023 21:22:47 +0800 Subject: [PATCH 01/13] Optimize proto translation for merger --- aggregators/aggregator.go | 5 +- aggregators/aggregator_test.go | 1180 ++++++++++++------------ aggregators/codec.go | 144 ++- aggregators/codec_test.go | 45 +- aggregators/combined_metrics_test.go | 399 +++++++++ aggregators/converter.go | 28 +- aggregators/converter_test.go | 1246 +++++++++++++------------- aggregators/merger.go | 445 ++++++--- aggregators/merger_test.go | 1244 +++++++++++++------------ aggregators/models.go | 204 ++--- 10 files changed, 2718 insertions(+), 2222 deletions(-) create mode 100644 aggregators/combined_metrics_test.go diff --git a/aggregators/aggregator.go b/aggregators/aggregator.go index e9c6a57..bccc0bd 100644 --- a/aggregators/aggregator.go +++ b/aggregators/aggregator.go @@ -23,6 +23,7 @@ import ( "github.com/elastic/apm-aggregation/aggregationpb" "github.com/elastic/apm-aggregation/aggregators/internal/telemetry" + "github.com/elastic/apm-aggregation/aggregators/internal/timestamppb" "github.com/elastic/apm-data/model/modelpb" ) @@ -608,7 +609,7 @@ func (a *Aggregator) processHarvest( if err := a.cfg.Processor(ctx, cmk, cm, aggIvl); err != nil { return hs, fmt.Errorf("failed to process combined metrics ID %s: %w", cmk.ID, err) } - hs.eventsTotal = cm.eventsTotal - hs.youngestEventTimestamp = cm.youngestEventTimestamp + hs.eventsTotal = cm.EventsTotal + hs.youngestEventTimestamp = timestamppb.PBTimestampToTime(cm.YoungestEventTimestamp) return hs, nil } diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go index bfc8bb6..dc2b24d 100644 --- a/aggregators/aggregator_test.go +++ b/aggregators/aggregator_test.go @@ -9,7 +9,6 @@ import ( "fmt" "math/rand" "net/netip" - "sort" "strings" "sync/atomic" "testing" @@ -28,14 +27,11 @@ import ( apmmodel "go.elastic.co/apm/v2/model" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric" - sdktrace "go.opentelemetry.io/otel/sdk/trace" - "go.opentelemetry.io/otel/sdk/trace/tracetest" "golang.org/x/sync/errgroup" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" "github.com/elastic/apm-data/model/modelpb" ) @@ -45,581 +41,581 @@ func TestNew(t *testing.T) { assert.NotNil(t, agg) } -func TestAggregateBatch(t *testing.T) { - exp := tracetest.NewInMemoryExporter() - tp := sdktrace.NewTracerProvider( - sdktrace.WithSyncer(exp), - ) - gatherer, err := apmotel.NewGatherer() - require.NoError(t, err) - mp := metric.NewMeterProvider(metric.WithReader(gatherer)) +// func TestAggregateBatch(t *testing.T) { +// exp := tracetest.NewInMemoryExporter() +// tp := sdktrace.NewTracerProvider( +// sdktrace.WithSyncer(exp), +// ) +// gatherer, err := apmotel.NewGatherer() +// require.NoError(t, err) +// mp := metric.NewMeterProvider(metric.WithReader(gatherer)) +// +// cmID := EncodeToCombinedMetricsKeyID(t, "ab01") +// txnDuration := 100 * time.Millisecond +// uniqueEventCount := 100 // for each of txns and spans +// uniqueServices := 10 +// repCount := 5 +// ts := time.Date(2022, 12, 31, 0, 0, 0, 0, time.UTC) +// batch := make(modelpb.Batch, 0, uniqueEventCount*repCount*2) +// // Distribute the total unique transaction count amongst the total +// // unique services uniformly. +// for i := 0; i < uniqueEventCount*repCount; i++ { +// batch = append(batch, &modelpb.APMEvent{ +// Event: &modelpb.Event{ +// Outcome: "success", +// Duration: durationpb.New(txnDuration), +// Received: timestamppb.New(ts), +// }, +// Transaction: &modelpb.Transaction{ +// Name: fmt.Sprintf("foo%d", i%uniqueEventCount), +// Type: fmt.Sprintf("txtype%d", i%uniqueEventCount), +// RepresentativeCount: 1, +// DroppedSpansStats: []*modelpb.DroppedSpanStats{ +// { +// DestinationServiceResource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), +// Outcome: "success", +// Duration: &modelpb.AggregatedDuration{ +// Count: 1, +// Sum: durationpb.New(10 * time.Millisecond), +// }, +// }, +// }, +// }, +// Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, +// }) +// batch = append(batch, &modelpb.APMEvent{ +// Event: &modelpb.Event{ +// Received: timestamppb.New(ts), +// }, +// Span: &modelpb.Span{ +// Name: fmt.Sprintf("bar%d", i%uniqueEventCount), +// Type: "type", +// RepresentativeCount: 1, +// DestinationService: &modelpb.DestinationService{ +// Resource: "test_dest", +// }, +// }, +// Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, +// }) +// } +// +// out := make(chan CombinedMetrics, 1) +// aggIvl := time.Minute +// agg, err := New( +// WithDataDir(t.TempDir()), +// WithLimits(Limits{ +// MaxSpanGroups: 1000, +// MaxSpanGroupsPerService: 100, +// MaxTransactionGroups: 100, +// MaxTransactionGroupsPerService: 10, +// MaxServiceTransactionGroups: 100, +// MaxServiceTransactionGroupsPerService: 10, +// MaxServices: 10, +// MaxServiceInstanceGroupsPerService: 10, +// }), +// WithProcessor(combinedMetricsProcessor(out)), +// WithAggregationIntervals([]time.Duration{aggIvl}), +// WithHarvestDelay(time.Hour), // disable auto harvest +// WithTracer(tp.Tracer("test")), +// WithMeter(mp.Meter("test")), +// WithCombinedMetricsIDToKVs(func(id [16]byte) []attribute.KeyValue { +// return []attribute.KeyValue{attribute.String("id_key", string(id[:]))} +// }), +// ) +// require.NoError(t, err) +// +// require.NoError(t, agg.AggregateBatch(context.Background(), cmID, &batch)) +// require.NoError(t, agg.Stop(context.Background())) +// var cm CombinedMetrics +// select { +// case cm = <-out: +// default: +// t.Error("failed to get aggregated metrics") +// t.FailNow() +// } +// +// var span tracetest.SpanStub +// for _, s := range exp.GetSpans() { +// if s.Name == "AggregateBatch" { +// span = s +// } +// } +// assert.NotNil(t, span) +// +// expectedCombinedMetrics := CombinedMetrics{ +// Services: make(map[ServiceAggregationKey]ServiceMetrics), +// eventsTotal: float64(len(batch)), +// youngestEventTimestamp: ts, +// } +// expectedMeasurements := []apmmodel.Metrics{ +// { +// Samples: map[string]apmmodel.Metric{ +// "aggregator.requests.total": {Value: 1}, +// "aggregator.bytes.ingested": {Value: 133750}, +// }, +// Labels: apmmodel.StringMap{ +// apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, +// }, +// }, +// { +// Samples: map[string]apmmodel.Metric{ +// "aggregator.events.total": {Value: float64(len(batch))}, +// "aggregator.events.processed": {Value: float64(len(batch))}, +// "events.processing-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, +// "events.queued-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, +// }, +// Labels: apmmodel.StringMap{ +// apmmodel.StringMapItem{Key: aggregationIvlKey, Value: formatDuration(aggIvl)}, +// apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, +// }, +// }, +// } +// sik := ServiceInstanceAggregationKey{GlobalLabelsStr: ""} +// for i := 0; i < uniqueEventCount*repCount; i++ { +// svcKey := ServiceAggregationKey{ +// Timestamp: time.Unix(0, 0).UTC(), +// ServiceName: fmt.Sprintf("svc%d", i%uniqueServices), +// } +// txKey := TransactionAggregationKey{ +// TraceRoot: true, +// TransactionName: fmt.Sprintf("foo%d", i%uniqueEventCount), +// TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), +// EventOutcome: "success", +// } +// stxKey := ServiceTransactionAggregationKey{ +// TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), +// } +// spanKey := SpanAggregationKey{ +// SpanName: fmt.Sprintf("bar%d", i%uniqueEventCount), +// Resource: "test_dest", +// } +// if _, ok := expectedCombinedMetrics.Services[svcKey]; !ok { +// expectedCombinedMetrics.Services[svcKey] = newServiceMetrics() +// } +// if _, ok := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik]; !ok { +// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik] = newServiceInstanceMetrics() +// } +// var ok bool +// var tm TransactionMetrics +// if tm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey]; !ok { +// tm = newTransactionMetrics() +// } +// tm.Histogram.RecordDuration(txnDuration, 1) +// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey] = tm +// var stm ServiceTransactionMetrics +// if stm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey]; !ok { +// stm = newServiceTransactionMetrics() +// } +// stm.Histogram.RecordDuration(txnDuration, 1) +// stm.SuccessCount++ +// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey] = stm +// sm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] +// sm.Count++ +// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] = sm +// +// droppedSpanStatsKey := SpanAggregationKey{ +// SpanName: "", +// Resource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), +// Outcome: "success", +// } +// dssm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] +// dssm.Count++ +// dssm.Sum += float64(10 * time.Millisecond) +// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] = dssm +// } +// assert.Empty(t, cmp.Diff( +// expectedCombinedMetrics, cm, +// cmpopts.EquateEmpty(), +// cmpopts.EquateApprox(0, 0.01), +// cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { +// return a.Equal(&b) +// }), +// cmp.AllowUnexported(CombinedMetrics{}), +// )) +// assert.Empty(t, cmp.Diff( +// expectedMeasurements, +// gatherMetrics( +// gatherer, +// withIgnoreMetricPrefix("pebble."), +// withZeroHistogramValues(true), +// ), +// cmpopts.IgnoreUnexported(apmmodel.Time{}), +// cmpopts.EquateApprox(0, 0.01), +// )) +// } - cmID := EncodeToCombinedMetricsKeyID(t, "ab01") - txnDuration := 100 * time.Millisecond - uniqueEventCount := 100 // for each of txns and spans - uniqueServices := 10 - repCount := 5 - ts := time.Date(2022, 12, 31, 0, 0, 0, 0, time.UTC) - batch := make(modelpb.Batch, 0, uniqueEventCount*repCount*2) - // Distribute the total unique transaction count amongst the total - // unique services uniformly. - for i := 0; i < uniqueEventCount*repCount; i++ { - batch = append(batch, &modelpb.APMEvent{ - Event: &modelpb.Event{ - Outcome: "success", - Duration: durationpb.New(txnDuration), - Received: timestamppb.New(ts), - }, - Transaction: &modelpb.Transaction{ - Name: fmt.Sprintf("foo%d", i%uniqueEventCount), - Type: fmt.Sprintf("txtype%d", i%uniqueEventCount), - RepresentativeCount: 1, - DroppedSpansStats: []*modelpb.DroppedSpanStats{ - { - DestinationServiceResource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), - Outcome: "success", - Duration: &modelpb.AggregatedDuration{ - Count: 1, - Sum: durationpb.New(10 * time.Millisecond), - }, - }, - }, - }, - Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, - }) - batch = append(batch, &modelpb.APMEvent{ - Event: &modelpb.Event{ - Received: timestamppb.New(ts), - }, - Span: &modelpb.Span{ - Name: fmt.Sprintf("bar%d", i%uniqueEventCount), - Type: "type", - RepresentativeCount: 1, - DestinationService: &modelpb.DestinationService{ - Resource: "test_dest", - }, - }, - Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, - }) - } - - out := make(chan CombinedMetrics, 1) - aggIvl := time.Minute - agg, err := New( - WithDataDir(t.TempDir()), - WithLimits(Limits{ - MaxSpanGroups: 1000, - MaxSpanGroupsPerService: 100, - MaxTransactionGroups: 100, - MaxTransactionGroupsPerService: 10, - MaxServiceTransactionGroups: 100, - MaxServiceTransactionGroupsPerService: 10, - MaxServices: 10, - MaxServiceInstanceGroupsPerService: 10, - }), - WithProcessor(combinedMetricsProcessor(out)), - WithAggregationIntervals([]time.Duration{aggIvl}), - WithHarvestDelay(time.Hour), // disable auto harvest - WithTracer(tp.Tracer("test")), - WithMeter(mp.Meter("test")), - WithCombinedMetricsIDToKVs(func(id [16]byte) []attribute.KeyValue { - return []attribute.KeyValue{attribute.String("id_key", string(id[:]))} - }), - ) - require.NoError(t, err) - - require.NoError(t, agg.AggregateBatch(context.Background(), cmID, &batch)) - require.NoError(t, agg.Stop(context.Background())) - var cm CombinedMetrics - select { - case cm = <-out: - default: - t.Error("failed to get aggregated metrics") - t.FailNow() - } - - var span tracetest.SpanStub - for _, s := range exp.GetSpans() { - if s.Name == "AggregateBatch" { - span = s - } - } - assert.NotNil(t, span) - - expectedCombinedMetrics := CombinedMetrics{ - Services: make(map[ServiceAggregationKey]ServiceMetrics), - eventsTotal: float64(len(batch)), - youngestEventTimestamp: ts, - } - expectedMeasurements := []apmmodel.Metrics{ - { - Samples: map[string]apmmodel.Metric{ - "aggregator.requests.total": {Value: 1}, - "aggregator.bytes.ingested": {Value: 133750}, - }, - Labels: apmmodel.StringMap{ - apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, - }, - }, - { - Samples: map[string]apmmodel.Metric{ - "aggregator.events.total": {Value: float64(len(batch))}, - "aggregator.events.processed": {Value: float64(len(batch))}, - "events.processing-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, - "events.queued-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, - }, - Labels: apmmodel.StringMap{ - apmmodel.StringMapItem{Key: aggregationIvlKey, Value: formatDuration(aggIvl)}, - apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, - }, - }, - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: ""} - for i := 0; i < uniqueEventCount*repCount; i++ { - svcKey := ServiceAggregationKey{ - Timestamp: time.Unix(0, 0).UTC(), - ServiceName: fmt.Sprintf("svc%d", i%uniqueServices), - } - txKey := TransactionAggregationKey{ - TraceRoot: true, - TransactionName: fmt.Sprintf("foo%d", i%uniqueEventCount), - TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), - EventOutcome: "success", - } - stxKey := ServiceTransactionAggregationKey{ - TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), - } - spanKey := SpanAggregationKey{ - SpanName: fmt.Sprintf("bar%d", i%uniqueEventCount), - Resource: "test_dest", - } - if _, ok := expectedCombinedMetrics.Services[svcKey]; !ok { - expectedCombinedMetrics.Services[svcKey] = newServiceMetrics() - } - if _, ok := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik]; !ok { - expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik] = newServiceInstanceMetrics() - } - var ok bool - var tm TransactionMetrics - if tm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey]; !ok { - tm = newTransactionMetrics() - } - tm.Histogram.RecordDuration(txnDuration, 1) - expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey] = tm - var stm ServiceTransactionMetrics - if stm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey]; !ok { - stm = newServiceTransactionMetrics() - } - stm.Histogram.RecordDuration(txnDuration, 1) - stm.SuccessCount++ - expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey] = stm - sm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] - sm.Count++ - expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] = sm - - droppedSpanStatsKey := SpanAggregationKey{ - SpanName: "", - Resource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), - Outcome: "success", - } - dssm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] - dssm.Count++ - dssm.Sum += float64(10 * time.Millisecond) - expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] = dssm - } - assert.Empty(t, cmp.Diff( - expectedCombinedMetrics, cm, - cmpopts.EquateEmpty(), - cmpopts.EquateApprox(0, 0.01), - cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { - return a.Equal(&b) - }), - cmp.AllowUnexported(CombinedMetrics{}), - )) - assert.Empty(t, cmp.Diff( - expectedMeasurements, - gatherMetrics( - gatherer, - withIgnoreMetricPrefix("pebble."), - withZeroHistogramValues(true), - ), - cmpopts.IgnoreUnexported(apmmodel.Time{}), - cmpopts.EquateApprox(0, 0.01), - )) -} - -func TestAggregateSpanMetrics(t *testing.T) { - type input struct { - serviceName string - agentName string - destination string - targetType string - targetName string - outcome string - representativeCount float64 - } - - destinationX := "destination-X" - destinationZ := "destination-Z" - trgTypeX := "trg-type-X" - trgNameX := "trg-name-X" - trgTypeZ := "trg-type-Z" - trgNameZ := "trg-name-Z" - defaultLabels := modelpb.Labels{ - "department_name": &modelpb.LabelValue{Global: true, Value: "apm"}, - "organization": &modelpb.LabelValue{Global: true, Value: "observability"}, - "company": &modelpb.LabelValue{Global: true, Value: "elastic"}, - } - defaultNumericLabels := modelpb.NumericLabels{ - "user_id": &modelpb.NumericLabelValue{Global: true, Value: 100}, - "cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10}, - } - - for _, tt := range []struct { - name string - inputs []input - getExpectedEvents func(time.Time, time.Duration, time.Duration, int) []*modelpb.APMEvent - }{ - { - name: "with destination and service targets", - inputs: []input{ - {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 2}, - {serviceName: "service-A", agentName: "java", destination: destinationX, targetType: trgTypeX, targetName: trgNameX, outcome: "success", representativeCount: 1}, - {serviceName: "service-B", agentName: "python", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, - {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, - {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 0}, - {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "failure", representativeCount: 1}, - }, - getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { - return []*modelpb.APMEvent{ - { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - }, - Metricset: &modelpb.Metricset{ - Name: "service_summary", - Interval: formatDuration(ivl), - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "python"}, - Service: &modelpb.Service{ - Name: "service-B", - }, - Metricset: &modelpb.Metricset{ - Name: "service_summary", - Interval: formatDuration(ivl), - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - Target: &modelpb.ServiceTarget{ - Type: trgTypeX, - Name: trgNameX, - }, - }, - Event: &modelpb.Event{Outcome: "success"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(count), - }, - Span: &modelpb.Span{ - Name: "service-A:" + destinationX, - DestinationService: &modelpb.DestinationService{ - Resource: destinationX, - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(count), - Sum: durationpb.New(time.Duration(count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - Target: &modelpb.ServiceTarget{ - Type: trgTypeZ, - Name: trgNameZ, - }, - }, - Event: &modelpb.Event{Outcome: "failure"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(count), - }, - Span: &modelpb.Span{ - Name: "service-A:" + destinationZ, - DestinationService: &modelpb.DestinationService{ - Resource: destinationZ, - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(count), - Sum: durationpb.New(time.Duration(count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - Target: &modelpb.ServiceTarget{ - Type: trgTypeZ, - Name: trgNameZ, - }, - }, - Event: &modelpb.Event{Outcome: "success"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(3 * count), - }, - Span: &modelpb.Span{ - Name: "service-A:" + destinationZ, - DestinationService: &modelpb.DestinationService{ - Resource: destinationZ, - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(3 * count), - Sum: durationpb.New(time.Duration(3*count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "python"}, - Service: &modelpb.Service{ - Name: "service-B", - Target: &modelpb.ServiceTarget{ - Type: trgTypeZ, - Name: trgNameZ, - }, - }, - Event: &modelpb.Event{Outcome: "success"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(count), - }, - Span: &modelpb.Span{ - Name: "service-B:" + destinationZ, - DestinationService: &modelpb.DestinationService{ - Resource: destinationZ, - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(count), - Sum: durationpb.New(time.Duration(count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, - } - }, - }, { - name: "with_no_destination_and_no_service_target", - inputs: []input{ - {serviceName: "service-A", agentName: "java", outcome: "success", representativeCount: 1}, - }, - getExpectedEvents: func(_ time.Time, _, _ time.Duration, _ int) []*modelpb.APMEvent { - return nil - }, - }, { - name: "with no destination and a service target", - inputs: []input{ - {serviceName: "service-A", agentName: "java", targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, - }, - getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { - return []*modelpb.APMEvent{ - { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - }, - Metricset: &modelpb.Metricset{ - Name: "service_summary", - Interval: formatDuration(ivl), - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - Target: &modelpb.ServiceTarget{ - Type: trgTypeZ, - Name: trgNameZ, - }, - }, - Event: &modelpb.Event{Outcome: "success"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(count), - }, - Span: &modelpb.Span{ - Name: "service-A:", - DestinationService: &modelpb.DestinationService{ - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(count), - Sum: durationpb.New(time.Duration(count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, - } - }, - }, { - name: "with a destination and no service target", - inputs: []input{ - {serviceName: "service-A", agentName: "java", destination: destinationZ, outcome: "success", representativeCount: 1}, - }, - getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { - return []*modelpb.APMEvent{ - { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - }, - Metricset: &modelpb.Metricset{ - Name: "service_summary", - Interval: formatDuration(ivl), - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, { - Timestamp: timestamppb.New(ts.Truncate(ivl)), - Agent: &modelpb.Agent{Name: "java"}, - Service: &modelpb.Service{ - Name: "service-A", - }, - Event: &modelpb.Event{Outcome: "success"}, - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - DocCount: int64(count), - }, - Span: &modelpb.Span{ - Name: "service-A:" + destinationZ, - DestinationService: &modelpb.DestinationService{ - Resource: destinationZ, - ResponseTime: &modelpb.AggregatedDuration{ - Count: int64(count), - Sum: durationpb.New(time.Duration(count) * duration), - }, - }, - }, - Labels: defaultLabels, - NumericLabels: defaultNumericLabels, - }, - } - }, - }, - } { - t.Run(tt.name, func(t *testing.T) { - var actualEvents []*modelpb.APMEvent - aggregationIvls := []time.Duration{time.Minute, 10 * time.Minute, time.Hour} - agg, err := New( - WithLimits(Limits{ - MaxSpanGroups: 1000, - MaxSpanGroupsPerService: 100, - MaxTransactionGroups: 100, - MaxTransactionGroupsPerService: 10, - MaxServiceTransactionGroups: 100, - MaxServiceTransactionGroupsPerService: 10, - MaxServices: 10, - MaxServiceInstanceGroupsPerService: 10, - }), - WithAggregationIntervals(aggregationIvls), - WithProcessor(sliceProcessor(&actualEvents)), - WithDataDir(t.TempDir()), - ) - require.NoError(t, err) - - count := 100 - now := time.Now() - duration := 100 * time.Millisecond - for _, in := range tt.inputs { - span := makeSpan( - now, - in.serviceName, - in.agentName, - in.destination, - in.targetType, - in.targetName, - in.outcome, - duration, - in.representativeCount, - defaultLabels, - defaultNumericLabels, - ) - for i := 0; i < count; i++ { - err := agg.AggregateBatch( - context.Background(), - EncodeToCombinedMetricsKeyID(t, "ab01"), - &modelpb.Batch{span}, - ) - require.NoError(t, err) - } - } - require.NoError(t, agg.Stop(context.Background())) - var expectedEvents []*modelpb.APMEvent - for _, ivl := range aggregationIvls { - expectedEvents = append(expectedEvents, tt.getExpectedEvents(now, duration, ivl, count)...) - } - sortKey := func(e *modelpb.APMEvent) string { - var sb strings.Builder - sb.WriteString(e.GetService().GetName()) - sb.WriteString(e.GetAgent().GetName()) - sb.WriteString(e.GetMetricset().GetName()) - sb.WriteString(e.GetMetricset().GetInterval()) - destSvc := e.GetSpan().GetDestinationService() - if destSvc != nil { - sb.WriteString(destSvc.GetResource()) - } - target := e.GetService().GetTarget() - if target != nil { - sb.WriteString(target.GetName()) - sb.WriteString(target.GetType()) - } - sb.WriteString(e.GetEvent().GetOutcome()) - return sb.String() - } - sort.Slice(expectedEvents, func(i, j int) bool { - return sortKey(expectedEvents[i]) < sortKey(expectedEvents[j]) - }) - sort.Slice(actualEvents, func(i, j int) bool { - return sortKey(actualEvents[i]) < sortKey(actualEvents[j]) - }) - assert.Empty(t, cmp.Diff( - expectedEvents, actualEvents, - cmpopts.EquateEmpty(), - cmpopts.IgnoreTypes(netip.Addr{}), - protocmp.Transform(), - )) - }) - } -} +// func TestAggregateSpanMetrics(t *testing.T) { +// type input struct { +// serviceName string +// agentName string +// destination string +// targetType string +// targetName string +// outcome string +// representativeCount float64 +// } +// +// destinationX := "destination-X" +// destinationZ := "destination-Z" +// trgTypeX := "trg-type-X" +// trgNameX := "trg-name-X" +// trgTypeZ := "trg-type-Z" +// trgNameZ := "trg-name-Z" +// defaultLabels := modelpb.Labels{ +// "department_name": &modelpb.LabelValue{Global: true, Value: "apm"}, +// "organization": &modelpb.LabelValue{Global: true, Value: "observability"}, +// "company": &modelpb.LabelValue{Global: true, Value: "elastic"}, +// } +// defaultNumericLabels := modelpb.NumericLabels{ +// "user_id": &modelpb.NumericLabelValue{Global: true, Value: 100}, +// "cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10}, +// } +// +// for _, tt := range []struct { +// name string +// inputs []input +// getExpectedEvents func(time.Time, time.Duration, time.Duration, int) []*modelpb.APMEvent +// }{ +// { +// name: "with destination and service targets", +// inputs: []input{ +// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 2}, +// {serviceName: "service-A", agentName: "java", destination: destinationX, targetType: trgTypeX, targetName: trgNameX, outcome: "success", representativeCount: 1}, +// {serviceName: "service-B", agentName: "python", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, +// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, +// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 0}, +// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "failure", representativeCount: 1}, +// }, +// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { +// return []*modelpb.APMEvent{ +// { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// }, +// Metricset: &modelpb.Metricset{ +// Name: "service_summary", +// Interval: formatDuration(ivl), +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "python"}, +// Service: &modelpb.Service{ +// Name: "service-B", +// }, +// Metricset: &modelpb.Metricset{ +// Name: "service_summary", +// Interval: formatDuration(ivl), +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// Target: &modelpb.ServiceTarget{ +// Type: trgTypeX, +// Name: trgNameX, +// }, +// }, +// Event: &modelpb.Event{Outcome: "success"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(count), +// }, +// Span: &modelpb.Span{ +// Name: "service-A:" + destinationX, +// DestinationService: &modelpb.DestinationService{ +// Resource: destinationX, +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(count), +// Sum: durationpb.New(time.Duration(count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// Target: &modelpb.ServiceTarget{ +// Type: trgTypeZ, +// Name: trgNameZ, +// }, +// }, +// Event: &modelpb.Event{Outcome: "failure"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(count), +// }, +// Span: &modelpb.Span{ +// Name: "service-A:" + destinationZ, +// DestinationService: &modelpb.DestinationService{ +// Resource: destinationZ, +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(count), +// Sum: durationpb.New(time.Duration(count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// Target: &modelpb.ServiceTarget{ +// Type: trgTypeZ, +// Name: trgNameZ, +// }, +// }, +// Event: &modelpb.Event{Outcome: "success"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(3 * count), +// }, +// Span: &modelpb.Span{ +// Name: "service-A:" + destinationZ, +// DestinationService: &modelpb.DestinationService{ +// Resource: destinationZ, +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(3 * count), +// Sum: durationpb.New(time.Duration(3*count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "python"}, +// Service: &modelpb.Service{ +// Name: "service-B", +// Target: &modelpb.ServiceTarget{ +// Type: trgTypeZ, +// Name: trgNameZ, +// }, +// }, +// Event: &modelpb.Event{Outcome: "success"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(count), +// }, +// Span: &modelpb.Span{ +// Name: "service-B:" + destinationZ, +// DestinationService: &modelpb.DestinationService{ +// Resource: destinationZ, +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(count), +// Sum: durationpb.New(time.Duration(count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, +// } +// }, +// }, { +// name: "with_no_destination_and_no_service_target", +// inputs: []input{ +// {serviceName: "service-A", agentName: "java", outcome: "success", representativeCount: 1}, +// }, +// getExpectedEvents: func(_ time.Time, _, _ time.Duration, _ int) []*modelpb.APMEvent { +// return nil +// }, +// }, { +// name: "with no destination and a service target", +// inputs: []input{ +// {serviceName: "service-A", agentName: "java", targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, +// }, +// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { +// return []*modelpb.APMEvent{ +// { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// }, +// Metricset: &modelpb.Metricset{ +// Name: "service_summary", +// Interval: formatDuration(ivl), +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// Target: &modelpb.ServiceTarget{ +// Type: trgTypeZ, +// Name: trgNameZ, +// }, +// }, +// Event: &modelpb.Event{Outcome: "success"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(count), +// }, +// Span: &modelpb.Span{ +// Name: "service-A:", +// DestinationService: &modelpb.DestinationService{ +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(count), +// Sum: durationpb.New(time.Duration(count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, +// } +// }, +// }, { +// name: "with a destination and no service target", +// inputs: []input{ +// {serviceName: "service-A", agentName: "java", destination: destinationZ, outcome: "success", representativeCount: 1}, +// }, +// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { +// return []*modelpb.APMEvent{ +// { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// }, +// Metricset: &modelpb.Metricset{ +// Name: "service_summary", +// Interval: formatDuration(ivl), +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, { +// Timestamp: timestamppb.New(ts.Truncate(ivl)), +// Agent: &modelpb.Agent{Name: "java"}, +// Service: &modelpb.Service{ +// Name: "service-A", +// }, +// Event: &modelpb.Event{Outcome: "success"}, +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// DocCount: int64(count), +// }, +// Span: &modelpb.Span{ +// Name: "service-A:" + destinationZ, +// DestinationService: &modelpb.DestinationService{ +// Resource: destinationZ, +// ResponseTime: &modelpb.AggregatedDuration{ +// Count: int64(count), +// Sum: durationpb.New(time.Duration(count) * duration), +// }, +// }, +// }, +// Labels: defaultLabels, +// NumericLabels: defaultNumericLabels, +// }, +// } +// }, +// }, +// } { +// t.Run(tt.name, func(t *testing.T) { +// var actualEvents []*modelpb.APMEvent +// aggregationIvls := []time.Duration{time.Minute, 10 * time.Minute, time.Hour} +// agg, err := New( +// WithLimits(Limits{ +// MaxSpanGroups: 1000, +// MaxSpanGroupsPerService: 100, +// MaxTransactionGroups: 100, +// MaxTransactionGroupsPerService: 10, +// MaxServiceTransactionGroups: 100, +// MaxServiceTransactionGroupsPerService: 10, +// MaxServices: 10, +// MaxServiceInstanceGroupsPerService: 10, +// }), +// WithAggregationIntervals(aggregationIvls), +// WithProcessor(sliceProcessor(&actualEvents)), +// WithDataDir(t.TempDir()), +// ) +// require.NoError(t, err) +// +// count := 100 +// now := time.Now() +// duration := 100 * time.Millisecond +// for _, in := range tt.inputs { +// span := makeSpan( +// now, +// in.serviceName, +// in.agentName, +// in.destination, +// in.targetType, +// in.targetName, +// in.outcome, +// duration, +// in.representativeCount, +// defaultLabels, +// defaultNumericLabels, +// ) +// for i := 0; i < count; i++ { +// err := agg.AggregateBatch( +// context.Background(), +// EncodeToCombinedMetricsKeyID(t, "ab01"), +// &modelpb.Batch{span}, +// ) +// require.NoError(t, err) +// } +// } +// require.NoError(t, agg.Stop(context.Background())) +// var expectedEvents []*modelpb.APMEvent +// for _, ivl := range aggregationIvls { +// expectedEvents = append(expectedEvents, tt.getExpectedEvents(now, duration, ivl, count)...) +// } +// sortKey := func(e *modelpb.APMEvent) string { +// var sb strings.Builder +// sb.WriteString(e.GetService().GetName()) +// sb.WriteString(e.GetAgent().GetName()) +// sb.WriteString(e.GetMetricset().GetName()) +// sb.WriteString(e.GetMetricset().GetInterval()) +// destSvc := e.GetSpan().GetDestinationService() +// if destSvc != nil { +// sb.WriteString(destSvc.GetResource()) +// } +// target := e.GetService().GetTarget() +// if target != nil { +// sb.WriteString(target.GetName()) +// sb.WriteString(target.GetType()) +// } +// sb.WriteString(e.GetEvent().GetOutcome()) +// return sb.String() +// } +// sort.Slice(expectedEvents, func(i, j int) bool { +// return sortKey(expectedEvents[i]) < sortKey(expectedEvents[j]) +// }) +// sort.Slice(actualEvents, func(i, j int) bool { +// return sortKey(actualEvents[i]) < sortKey(actualEvents[j]) +// }) +// assert.Empty(t, cmp.Diff( +// expectedEvents, actualEvents, +// cmpopts.EquateEmpty(), +// cmpopts.IgnoreTypes(netip.Addr{}), +// protocmp.Transform(), +// )) +// }) +// } +// } func TestCombinedMetricsKeyOrdered(t *testing.T) { // To Allow for retrieving combined metrics by time range, the metrics should @@ -1116,20 +1112,20 @@ func BenchmarkAggregateCombinedMetrics(b *testing.B) { ProcessingTime: time.Now().Truncate(aggIvl), ID: EncodeToCombinedMetricsKeyID(b, "ab01"), } - cm := (*CombinedMetrics)(createTestCombinedMetrics(withEventsTotal(1)). - addServiceTransaction( - time.Now(), - "test-svc", - "", - testServiceTransaction{txnType: "txntype", count: 1}, - ). - addTransaction( - time.Now(), - "test-svc", - "", - testTransaction{txnName: "txntest", txnType: "txntype", count: 1}, - ), - ).ToProto() + cm := NewTestCombinedMetrics(WithEventsTotal(1)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: time.Now(), + ServiceName: "test-svc", + }). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddTransaction(TransactionAggregationKey{ + TransactionName: "txntest", + TransactionType: "txntype", + }). + AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: "txntype", + }). + GetProto() b.Cleanup(func() { cm.ReturnToVTPool() }) ctx, cancel := context.WithCancel(context.Background()) b.Cleanup(func() { cancel() }) diff --git a/aggregators/codec.go b/aggregators/codec.go index 4180de4..ccf4813 100644 --- a/aggregators/codec.go +++ b/aggregators/codec.go @@ -95,8 +95,8 @@ func (m *CombinedMetrics) ToProto() *aggregationpb.CombinedMetrics { pb.OverflowServices = m.OverflowServices.ToProto() pb.OverflowServiceInstancesEstimator = hllBytes(m.OverflowServiceInstancesEstimator) } - pb.EventsTotal = m.eventsTotal - pb.YoungestEventTimestamp = timestamppb.TimeToPBTimestamp(m.youngestEventTimestamp) + pb.EventsTotal = m.EventsTotal + pb.YoungestEventTimestamp = m.YoungestEventTimestamp return pb } @@ -114,8 +114,8 @@ func (m *CombinedMetrics) FromProto(pb *aggregationpb.CombinedMetrics) { m.OverflowServices.FromProto(pb.OverflowServices) m.OverflowServiceInstancesEstimator = hllSketch(pb.OverflowServiceInstancesEstimator) } - m.eventsTotal = pb.EventsTotal - m.youngestEventTimestamp = timestamppb.PBTimestampToTime(pb.YoungestEventTimestamp) + m.EventsTotal = pb.EventsTotal + m.YoungestEventTimestamp = pb.YoungestEventTimestamp } // MarshalBinary marshals CombinedMetrics to binary using protobuf. @@ -205,60 +205,67 @@ func (m *ServiceInstanceMetrics) ToProto() *aggregationpb.ServiceInstanceMetrics if len(m.TransactionGroups) > cap(pb.TransactionMetrics) { pb.TransactionMetrics = make([]*aggregationpb.KeyedTransactionMetrics, 0, len(m.TransactionGroups)) } - for k, m := range m.TransactionGroups { - ktm := aggregationpb.KeyedTransactionMetricsFromVTPool() - ktm.Key = k.ToProto() - ktm.Metrics = m.ToProto() - pb.TransactionMetrics = append(pb.TransactionMetrics, ktm) + for _, m := range m.TransactionGroups { + pb.TransactionMetrics = append(pb.TransactionMetrics, m) } if len(m.ServiceTransactionGroups) > cap(pb.ServiceTransactionMetrics) { pb.ServiceTransactionMetrics = make([]*aggregationpb.KeyedServiceTransactionMetrics, 0, len(m.ServiceTransactionGroups)) } - for k, m := range m.ServiceTransactionGroups { - kstm := aggregationpb.KeyedServiceTransactionMetricsFromVTPool() - kstm.Key = k.ToProto() - kstm.Metrics = m.ToProto() - pb.ServiceTransactionMetrics = append(pb.ServiceTransactionMetrics, kstm) + for _, m := range m.ServiceTransactionGroups { + pb.ServiceTransactionMetrics = append(pb.ServiceTransactionMetrics, m) } if len(m.SpanGroups) > cap(pb.SpanMetrics) { pb.SpanMetrics = make([]*aggregationpb.KeyedSpanMetrics, 0, len(m.SpanGroups)) } - for k, m := range m.SpanGroups { - ksm := aggregationpb.KeyedSpanMetricsFromVTPool() - ksm.Key = k.ToProto() - ksm.Metrics = m.ToProto() - pb.SpanMetrics = append(pb.SpanMetrics, ksm) + for _, m := range m.SpanGroups { + pb.SpanMetrics = append(pb.SpanMetrics, m) } return pb } // FromProto converts protobuf representation to ServiceInstanceMetrics. func (m *ServiceInstanceMetrics) FromProto(pb *aggregationpb.ServiceInstanceMetrics) { - m.TransactionGroups = make(map[TransactionAggregationKey]TransactionMetrics, len(pb.TransactionMetrics)) - for _, ktm := range pb.TransactionMetrics { + m.TransactionGroups = make( + map[TransactionAggregationKey]*aggregationpb.KeyedTransactionMetrics, + len(pb.TransactionMetrics), + ) + for i := range pb.TransactionMetrics { + ktm := pb.TransactionMetrics[i] var k TransactionAggregationKey - var v TransactionMetrics k.FromProto(ktm.Key) - v.FromProto(ktm.Metrics) - m.TransactionGroups[k] = v - } - m.ServiceTransactionGroups = make(map[ServiceTransactionAggregationKey]ServiceTransactionMetrics, - len(pb.ServiceTransactionMetrics)) - for _, kstm := range pb.ServiceTransactionMetrics { + m.TransactionGroups[k] = ktm + // TODO: Either clone proto or add a comment that we change the input + pb.TransactionMetrics[i] = nil + } + pb.TransactionMetrics = pb.TransactionMetrics[:0] + + m.ServiceTransactionGroups = make( + map[ServiceTransactionAggregationKey]*aggregationpb.KeyedServiceTransactionMetrics, + len(pb.ServiceTransactionMetrics), + ) + for i := range pb.ServiceTransactionMetrics { + kstm := pb.ServiceTransactionMetrics[i] var k ServiceTransactionAggregationKey - var v ServiceTransactionMetrics k.FromProto(kstm.Key) - v.FromProto(kstm.Metrics) - m.ServiceTransactionGroups[k] = v - } - m.SpanGroups = make(map[SpanAggregationKey]SpanMetrics, len(pb.SpanMetrics)) - for _, ksm := range pb.SpanMetrics { + m.ServiceTransactionGroups[k] = kstm + // TODO: Either clone proto or add a comment that we change the input + pb.ServiceTransactionMetrics[i] = nil + } + pb.ServiceTransactionMetrics = pb.ServiceTransactionMetrics[:0] + + m.SpanGroups = make( + map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics, + len(pb.SpanMetrics), + ) + for i := range pb.SpanMetrics { + ksm := pb.SpanMetrics[i] var k SpanAggregationKey - var v SpanMetrics k.FromProto(ksm.Key) - v.FromProto(ksm.Metrics) - m.SpanGroups[k] = v + m.SpanGroups[k] = ksm + // TODO: Either clone proto or add a comment that we change the input + pb.SpanMetrics[i] = nil } + pb.SpanMetrics = pb.SpanMetrics[:0] } // ToProto converts TransactionAggregationKey to its protobuf representation. @@ -345,21 +352,6 @@ func (k *TransactionAggregationKey) FromProto(pb *aggregationpb.TransactionAggre k.CloudProjectName = pb.CloudProjectName } -// ToProto converts the TransactionMetrics to its protobuf representation. -func (m *TransactionMetrics) ToProto() *aggregationpb.TransactionMetrics { - pb := aggregationpb.TransactionMetricsFromVTPool() - pb.Histogram = HistogramToProto(m.Histogram) - return pb -} - -// FromProto converts protobuf representation to TransactionMetrics. -func (m *TransactionMetrics) FromProto(pb *aggregationpb.TransactionMetrics) { - if m.Histogram == nil && pb.Histogram != nil { - m.Histogram = hdrhistogram.New() - } - HistogramFromProto(m.Histogram, pb.Histogram) -} - // ToProto converts ServiceTransactionAggregationKey to its protobuf representation. func (k *ServiceTransactionAggregationKey) ToProto() *aggregationpb.ServiceTransactionAggregationKey { pb := aggregationpb.ServiceTransactionAggregationKeyFromVTPool() @@ -372,25 +364,6 @@ func (k *ServiceTransactionAggregationKey) FromProto(pb *aggregationpb.ServiceTr k.TransactionType = pb.TransactionType } -// ToProto converts the ServiceTransactionMetrics to its protobuf representation. -func (m *ServiceTransactionMetrics) ToProto() *aggregationpb.ServiceTransactionMetrics { - pb := aggregationpb.ServiceTransactionMetricsFromVTPool() - pb.Histogram = HistogramToProto(m.Histogram) - pb.FailureCount = m.FailureCount - pb.SuccessCount = m.SuccessCount - return pb -} - -// FromProto converts protobuf representation to ServiceTransactionMetrics. -func (m *ServiceTransactionMetrics) FromProto(pb *aggregationpb.ServiceTransactionMetrics) { - m.FailureCount = pb.FailureCount - m.SuccessCount = pb.SuccessCount - if m.Histogram == nil && pb.Histogram != nil { - m.Histogram = hdrhistogram.New() - } - HistogramFromProto(m.Histogram, pb.Histogram) -} - // HistogramToProto converts the histogram representation to protobuf. func HistogramToProto(h *hdrhistogram.HistogramRepresentation) *aggregationpb.HDRHistogram { if h == nil { @@ -455,33 +428,19 @@ func (k *SpanAggregationKey) FromProto(pb *aggregationpb.SpanAggregationKey) { k.Resource = pb.Resource } -// ToProto converts the SpanMetrics to its protobuf representation. -func (m *SpanMetrics) ToProto() *aggregationpb.SpanMetrics { - pb := aggregationpb.SpanMetricsFromVTPool() - pb.Count = m.Count - pb.Sum = m.Sum - return pb -} - -// FromProto converts protobuf representation to SpanMetrics. -func (m *SpanMetrics) FromProto(pb *aggregationpb.SpanMetrics) { - m.Count = pb.Count - m.Sum = pb.Sum -} - // ToProto converts Overflow to its protobuf representation. func (o *Overflow) ToProto() *aggregationpb.Overflow { pb := aggregationpb.OverflowFromVTPool() if !o.OverflowTransaction.Empty() { - pb.OverflowTransactions = o.OverflowTransaction.Metrics.ToProto() + pb.OverflowTransactions = o.OverflowTransaction.Metrics pb.OverflowTransactionsEstimator = hllBytes(o.OverflowTransaction.Estimator) } if !o.OverflowServiceTransaction.Empty() { - pb.OverflowServiceTransactions = o.OverflowServiceTransaction.Metrics.ToProto() + pb.OverflowServiceTransactions = o.OverflowServiceTransaction.Metrics pb.OverflowServiceTransactionsEstimator = hllBytes(o.OverflowServiceTransaction.Estimator) } if !o.OverflowSpan.Empty() { - pb.OverflowSpans = o.OverflowSpan.Metrics.ToProto() + pb.OverflowSpans = o.OverflowSpan.Metrics pb.OverflowSpansEstimator = hllBytes(o.OverflowSpan.Estimator) } return pb @@ -490,16 +449,19 @@ func (o *Overflow) ToProto() *aggregationpb.Overflow { // FromProto converts protobuf representation to Overflow. func (o *Overflow) FromProto(pb *aggregationpb.Overflow) { if pb.OverflowTransactions != nil { - o.OverflowTransaction.Metrics.FromProto(pb.OverflowTransactions) o.OverflowTransaction.Estimator = hllSketch(pb.OverflowTransactionsEstimator) + o.OverflowTransaction.Metrics = pb.OverflowTransactions + pb.OverflowTransactions = nil } if pb.OverflowServiceTransactions != nil { - o.OverflowServiceTransaction.Metrics.FromProto(pb.OverflowServiceTransactions) o.OverflowServiceTransaction.Estimator = hllSketch(pb.OverflowServiceTransactionsEstimator) + o.OverflowServiceTransaction.Metrics = pb.OverflowServiceTransactions + pb.OverflowServiceTransactions = nil } if pb.OverflowSpans != nil { - o.OverflowSpan.Metrics.FromProto(pb.OverflowSpans) o.OverflowSpan.Estimator = hllSketch(pb.OverflowSpansEstimator) + o.OverflowSpan.Metrics = pb.OverflowSpans + pb.OverflowSpans = nil } } diff --git a/aggregators/codec_test.go b/aggregators/codec_test.go index 0c3a53d..5b6ce06 100644 --- a/aggregators/codec_test.go +++ b/aggregators/codec_test.go @@ -76,16 +76,28 @@ func BenchmarkCombinedMetricsEncoding(b *testing.B) { b.ReportAllocs() ts := time.Now() cardinality := 10 - tcm := createTestCombinedMetrics() + tcm := NewTestCombinedMetrics() + sim := tcm.AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts, + ServiceName: "bench", + }).AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}) for i := 0; i < cardinality; i++ { txnName := fmt.Sprintf("txn%d", i) txnType := fmt.Sprintf("typ%d", i) spanName := fmt.Sprintf("spn%d", i) - tcm = tcm.addTransaction(ts, "bench", "", testTransaction{txnName: txnName, txnType: txnType, count: 200}) - tcm = tcm.addServiceTransaction(ts, "bench", "", testServiceTransaction{txnType: txnType, count: 200}) - tcm = tcm.addSpan(ts, "bench", "", testSpan{spanName: spanName}) + + sim.AddTransaction(TransactionAggregationKey{ + TransactionName: txnName, + TransactionType: txnType, + }, WithTransactionCount(200)) + sim.AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: txnType, + }, WithTransactionCount(200)) + sim.AddSpan(SpanAggregationKey{ + SpanName: spanName, + }) } - cm := CombinedMetrics(*tcm) + cm := tcm.Get() b.ResetTimer() for i := 0; i < b.N; i++ { cmproto := cm.ToProto() @@ -97,17 +109,28 @@ func BenchmarkCombinedMetricsDecoding(b *testing.B) { b.ReportAllocs() ts := time.Now() cardinality := 10 - tcm := createTestCombinedMetrics() + tcm := NewTestCombinedMetrics() + sim := tcm.AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts, + ServiceName: "bench", + }).AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}) for i := 0; i < cardinality; i++ { txnName := fmt.Sprintf("txn%d", i) txnType := fmt.Sprintf("typ%d", i) spanName := fmt.Sprintf("spn%d", i) - tcm = tcm.addTransaction(ts, "bench", "", testTransaction{txnName: txnName, txnType: txnType, count: 200}) - tcm = tcm.addServiceTransaction(ts, "bench", "", testServiceTransaction{txnType: txnType, count: 200}) - tcm = tcm.addSpan(ts, "bench", "", testSpan{spanName: spanName}) + + sim.AddTransaction(TransactionAggregationKey{ + TransactionName: txnName, + TransactionType: txnType, + }, WithTransactionCount(200)) + sim.AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: txnType, + }, WithTransactionCount(200)) + sim.AddSpan(SpanAggregationKey{ + SpanName: spanName, + }) } - cm := CombinedMetrics(*tcm) - cmproto := cm.ToProto() + cmproto := tcm.GetProto() b.Cleanup(func() { cmproto.ReturnToVTPool() }) diff --git a/aggregators/combined_metrics_test.go b/aggregators/combined_metrics_test.go new file mode 100644 index 0000000..6c8808f --- /dev/null +++ b/aggregators/combined_metrics_test.go @@ -0,0 +1,399 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + +package aggregators + +import ( + "time" + + "github.com/elastic/apm-aggregation/aggregationpb" + "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-aggregation/aggregators/internal/timestamppb" +) + +type TestCombinedMetricsCfg struct { + eventsTotal float64 + youngestEventTimestamp time.Time +} + +type TestCombinedMetricsOpt func(cfg TestCombinedMetricsCfg) TestCombinedMetricsCfg + +func WithEventsTotal(total float64) TestCombinedMetricsOpt { + return func(cfg TestCombinedMetricsCfg) TestCombinedMetricsCfg { + cfg.eventsTotal = total + return cfg + } +} + +func WithYoungestEventTimestamp(ts time.Time) TestCombinedMetricsOpt { + return func(cfg TestCombinedMetricsCfg) TestCombinedMetricsCfg { + cfg.youngestEventTimestamp = ts + return cfg + } +} + +var defaultTestCombinedMetricsCfg = TestCombinedMetricsCfg{ + eventsTotal: 1, + youngestEventTimestamp: time.Time{}, +} + +// TestCombinedMetrics creates combined metrics for testing. The creation logic +// is arranged in a way to allow chained creation and addition of leaf nodes +// to combined metrics. +type TestCombinedMetrics CombinedMetrics + +func NewTestCombinedMetrics(opts ...TestCombinedMetricsOpt) *TestCombinedMetrics { + cfg := defaultTestCombinedMetricsCfg + for _, opt := range opts { + cfg = opt(cfg) + } + var cm CombinedMetrics + cm.EventsTotal = cfg.eventsTotal + cm.YoungestEventTimestamp = timestamppb.TimeToPBTimestamp(cfg.youngestEventTimestamp) + cm.Services = make(map[ServiceAggregationKey]ServiceMetrics) + return (*TestCombinedMetrics)(&cm) +} + +func (tcm *TestCombinedMetrics) GetProto() *aggregationpb.CombinedMetrics { + cm := (*CombinedMetrics)(tcm) + cmproto := cm.ToProto() + return cmproto +} + +func (tcm *TestCombinedMetrics) Get() CombinedMetrics { + cm := (*CombinedMetrics)(tcm) + return *cm +} + +type TestServiceMetrics struct { + sk ServiceAggregationKey + tcm *TestCombinedMetrics +} + +func (tcm *TestCombinedMetrics) AddServiceMetrics( + sk ServiceAggregationKey, +) *TestServiceMetrics { + tcm.Services[sk] = newServiceMetrics() + return &TestServiceMetrics{sk: sk, tcm: tcm} +} + +type TestServiceInstanceMetrics struct { + sk ServiceAggregationKey + sik ServiceInstanceAggregationKey + tcm *TestCombinedMetrics +} + +func (tsm *TestServiceMetrics) AddServiceInstanceMetrics( + sik ServiceInstanceAggregationKey, +) *TestServiceInstanceMetrics { + svc := tsm.tcm.Services[tsm.sk] + svc.ServiceInstanceGroups[sik] = newServiceInstanceMetrics() + return &TestServiceInstanceMetrics{ + sik: sik, + sk: tsm.sk, + tcm: tsm.tcm, + } +} + +func (tsim *TestServiceInstanceMetrics) GetProto() *aggregationpb.CombinedMetrics { + return tsim.tcm.GetProto() +} + +func (tsim *TestServiceInstanceMetrics) Get() CombinedMetrics { + return tsim.tcm.Get() +} + +type TestTransactionCfg struct { + duration time.Duration + count int +} + +func (tsim *TestServiceInstanceMetrics) AddGlobalServiceInstanceOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, +) *TestServiceInstanceMetrics { + hash := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()). + Sum() + insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, hash) + return tsim +} + +type TestTransactionOpt func(TestTransactionCfg) TestTransactionCfg + +func WithTransactionDuration(d time.Duration) TestTransactionOpt { + return func(cfg TestTransactionCfg) TestTransactionCfg { + cfg.duration = d + return cfg + } +} + +func WithTransactionCount(c int) TestTransactionOpt { + return func(cfg TestTransactionCfg) TestTransactionCfg { + cfg.count = c + return cfg + } +} + +var defaultTestTransactionCfg = TestTransactionCfg{ + duration: time.Second, + count: 1, +} + +func (tsim *TestServiceInstanceMetrics) AddTransaction( + tk TransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + ktm := aggregationpb.KeyedTransactionMetricsFromVTPool() + ktm.Key = tk.ToProto() + ktm.Metrics = aggregationpb.TransactionMetricsFromVTPool() + ktm.Metrics.Histogram = HistogramToProto(hdr) + + svc := tsim.tcm.Services[tsim.sk] + svcIns := svc.ServiceInstanceGroups[tsim.sik] + svcIns.TransactionGroups[tk] = ktm + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddTransactionOverflow( + tk TransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + return tsim.AddTransactionOverflowWithServiceInstance(tsim.sik, tk, opts...) +} + +func (tsim *TestServiceInstanceMetrics) AddTransactionOverflowWithServiceInstance( + sik ServiceInstanceAggregationKey, + tk TransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.TransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + + svc := tsim.tcm.Services[tsim.sk] + hash := Hasher{}. + Chain(tsim.sk.ToProto()). + Chain(sik.ToProto()). + Chain(tk.ToProto()). + Sum() + svc.OverflowGroups.OverflowTransaction.Merge(from, hash) + tsim.tcm.Services[tsim.sk] = svc + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddGlobalTransactionOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + tk TransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.TransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(tk.ToProto()). + Sum() + tsim.tcm.OverflowServices.OverflowTransaction.Merge(from, hash) + insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddServiceTransaction( + stk ServiceTransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + kstm := aggregationpb.KeyedServiceTransactionMetricsFromVTPool() + kstm.Key = stk.ToProto() + kstm.Metrics = aggregationpb.ServiceTransactionMetricsFromVTPool() + kstm.Metrics.Histogram = HistogramToProto(hdr) + kstm.Metrics.SuccessCount += float64(cfg.count) + + svc := tsim.tcm.Services[tsim.sk] + svcIns := svc.ServiceInstanceGroups[tsim.sik] + svcIns.ServiceTransactionGroups[stk] = kstm + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddServiceTransactionOverflow( + stk ServiceTransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.ServiceTransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + from.SuccessCount += float64(cfg.count) + + svc := tsim.tcm.Services[tsim.sk] + hash := Hasher{}. + Chain(tsim.sk.ToProto()). + Chain(tsim.sik.ToProto()). + Chain(stk.ToProto()). + Sum() + svc.OverflowGroups.OverflowServiceTransaction.Merge(from, hash) + tsim.tcm.Services[tsim.sk] = svc + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddGlobalServiceTransactionOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + stk ServiceTransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.ServiceTransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + from.SuccessCount += float64(cfg.count) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(stk.ToProto()). + Sum() + tsim.tcm.OverflowServices.OverflowServiceTransaction.Merge(from, hash) + insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tsim +} + +type TestSpanCfg struct { + duration time.Duration + count int +} + +type TestSpanOpt func(TestSpanCfg) TestSpanCfg + +func WithSpanDuration(d time.Duration) TestSpanOpt { + return func(cfg TestSpanCfg) TestSpanCfg { + cfg.duration = d + return cfg + } +} + +func WithSpanCount(c int) TestSpanOpt { + return func(cfg TestSpanCfg) TestSpanCfg { + cfg.count = c + return cfg + } +} + +var defaultTestSpanCfg = TestSpanCfg{ + duration: time.Nanosecond, // for backward compatibility with previous tests + count: 1, +} + +func (tsim *TestServiceInstanceMetrics) AddSpan( + spk SpanAggregationKey, + opts ...TestSpanOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestSpanCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + ksm := aggregationpb.KeyedSpanMetricsFromVTPool() + ksm.Key = spk.ToProto() + ksm.Metrics = aggregationpb.SpanMetricsFromVTPool() + ksm.Metrics.Sum += float64(cfg.duration * time.Duration(cfg.count)) + ksm.Metrics.Count += float64(cfg.count) + + svc := tsim.tcm.Services[tsim.sk] + svcIns := svc.ServiceInstanceGroups[tsim.sik] + svcIns.SpanGroups[spk] = ksm + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddSpanOverflow( + spk SpanAggregationKey, + opts ...TestSpanOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestSpanCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + from := aggregationpb.SpanMetricsFromVTPool() + from.Sum += float64(cfg.duration * time.Duration(cfg.count)) + from.Count += float64(cfg.count) + + svc := tsim.tcm.Services[tsim.sk] + hash := Hasher{}. + Chain(tsim.sk.ToProto()). + Chain(tsim.sik.ToProto()). + Chain(spk.ToProto()). + Sum() + svc.OverflowGroups.OverflowSpan.Merge(from, hash) + tsim.tcm.Services[tsim.sk] = svc + return tsim +} + +func (tsim *TestServiceInstanceMetrics) AddGlobalSpanOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + spk SpanAggregationKey, + opts ...TestSpanOpt, +) *TestServiceInstanceMetrics { + cfg := defaultTestSpanCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + from := aggregationpb.SpanMetricsFromVTPool() + from.Sum += float64(cfg.duration * time.Duration(cfg.count)) + from.Count += float64(cfg.count) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(spk.ToProto()). + Sum() + tsim.tcm.OverflowServices.OverflowSpan.Merge(from, hash) + insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tsim +} diff --git a/aggregators/converter.go b/aggregators/converter.go index 3d7adc3..6b9f859 100644 --- a/aggregators/converter.go +++ b/aggregators/converter.go @@ -181,21 +181,21 @@ func CombinedMetricsToBatch( } // transaction metrics - for tk, tv := range sim.TransactionGroups { + for tk, ktm := range sim.TransactionGroups { event := getBaseEventWithLabels() - txnMetricsToAPMEvent(tk, tv, event, aggIntervalStr) + txnMetricsToAPMEvent(tk, ktm.Metrics, event, aggIntervalStr) b = append(b, event) } // service transaction metrics - for stk, stv := range sim.ServiceTransactionGroups { + for stk, kstm := range sim.ServiceTransactionGroups { event := getBaseEventWithLabels() - svcTxnMetricsToAPMEvent(stk, stv, event, aggIntervalStr) + svcTxnMetricsToAPMEvent(stk, kstm.Metrics, event, aggIntervalStr) b = append(b, event) } // service destination metrics - for spk, spv := range sim.SpanGroups { + for spk, kspm := range sim.SpanGroups { event := getBaseEventWithLabels() - spanMetricsToAPMEvent(spk, spv, event, aggIntervalStr) + spanMetricsToAPMEvent(spk, kspm.Metrics, event, aggIntervalStr) b = append(b, event) } @@ -414,11 +414,13 @@ func serviceMetricsToAPMEvent( func txnMetricsToAPMEvent( key TransactionAggregationKey, - metrics TransactionMetrics, + metrics *aggregationpb.TransactionMetrics, baseEvent *modelpb.APMEvent, intervalStr string, ) { - totalCount, counts, values := metrics.Histogram.Buckets() + histogram := hdrhistogram.New() + HistogramFromProto(histogram, metrics.Histogram) + totalCount, counts, values := histogram.Buckets() var eventSuccessCount modelpb.SummaryMetric switch key.EventOutcome { case "success": @@ -429,7 +431,6 @@ func txnMetricsToAPMEvent( case "unknown": // Keep both Count and Sum as 0. } - transactionDurationSummary := modelpb.SummaryMetric{ Count: totalCount, } @@ -546,12 +547,13 @@ func txnMetricsToAPMEvent( func svcTxnMetricsToAPMEvent( key ServiceTransactionAggregationKey, - metrics ServiceTransactionMetrics, + metrics *aggregationpb.ServiceTransactionMetrics, baseEvent *modelpb.APMEvent, intervalStr string, ) { - totalCount, counts, values := metrics.Histogram.Buckets() - + histogram := hdrhistogram.New() + HistogramFromProto(histogram, metrics.Histogram) + totalCount, counts, values := histogram.Buckets() transactionDurationSummary := modelpb.SummaryMetric{ Count: totalCount, } @@ -581,7 +583,7 @@ func svcTxnMetricsToAPMEvent( func spanMetricsToAPMEvent( key SpanAggregationKey, - metrics SpanMetrics, + metrics *aggregationpb.SpanMetrics, baseEvent *modelpb.APMEvent, intervalStr string, ) { diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go index f5c0bd0..b5c1dbc 100644 --- a/aggregators/converter_test.go +++ b/aggregators/converter_test.go @@ -4,637 +4,617 @@ package aggregators -import ( - "fmt" - "net/netip" - "testing" - "time" +// func TestEventToCombinedMetrics(t *testing.T) { +// ts := time.Now().UTC() +// receivedTS := ts.Add(time.Second) +// baseEvent := &modelpb.APMEvent{ +// Timestamp: timestamppb.New(ts), +// ParentId: "nonroot", +// Service: &modelpb.Service{Name: "test"}, +// Event: &modelpb.Event{ +// Duration: durationpb.New(time.Second), +// Outcome: "success", +// Received: timestamppb.New(receivedTS), +// }, +// } +// for _, tc := range []struct { +// name string +// input func() *modelpb.APMEvent +// partitioner Partitioner +// expected func() []*aggregationpb.CombinedMetrics +// }{ +// { +// name: "with-zero-rep-count-txn", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Transaction = &modelpb.Transaction{ +// Name: "testtxn", +// Type: "testtyp", +// RepresentativeCount: 0, +// } +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return nil +// }, +// }, +// { +// name: "with-good-txn", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Transaction = &modelpb.Transaction{ +// Name: "testtxn", +// Type: "testtyp", +// RepresentativeCount: 1, +// } +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return []*aggregationpb.CombinedMetrics{ +// (*CombinedMetrics)(createTestCombinedMetrics( +// withEventsTotal(1), +// withYoungestEventTimestamp(receivedTS), +// ).addTransaction( +// ts.Truncate(time.Minute), "test", "", +// testTransaction{ +// txnName: "testtxn", +// txnType: "testtyp", +// eventOutcome: "success", +// count: 1, +// }, +// ).addServiceTransaction( +// ts.Truncate(time.Minute), "test", "", +// testServiceTransaction{ +// txnType: "testtyp", +// count: 1, +// }, +// )).ToProto(), +// } +// }, +// }, +// { +// name: "with-zero-rep-count-span", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Span = &modelpb.Span{ +// Name: "testspan", +// Type: "testtyp", +// RepresentativeCount: 0, +// } +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return nil +// }, +// }, +// { +// name: "with-no-exit-span", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Span = &modelpb.Span{ +// Name: "testspan", +// Type: "testtyp", +// RepresentativeCount: 1, +// } +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return nil +// }, +// }, +// { +// name: "with-good-span-dest-svc", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Span = &modelpb.Span{ +// Name: "testspan", +// Type: "testtyp", +// RepresentativeCount: 1, +// } +// event.Service.Target = &modelpb.ServiceTarget{ +// Name: "psql", +// Type: "db", +// } +// // Current test structs are hardcoded to use 1ns for spans +// event.Event.Duration = durationpb.New(time.Nanosecond) +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return []*aggregationpb.CombinedMetrics{ +// (*CombinedMetrics)(createTestCombinedMetrics( +// withEventsTotal(1), +// withYoungestEventTimestamp(receivedTS), +// ).addSpan( +// ts.Truncate(time.Minute), "test", "", +// testSpan{ +// spanName: "testspan", +// targetName: "psql", +// targetType: "db", +// outcome: "success", +// count: 1, +// }, +// )).ToProto(), +// } +// }, +// }, +// { +// name: "with-good-span-svc-target", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Span = &modelpb.Span{ +// Name: "testspan", +// Type: "testtyp", +// RepresentativeCount: 1, +// DestinationService: &modelpb.DestinationService{ +// Resource: "db", +// }, +// } +// // Current test structs are hardcoded to use 1ns for spans +// event.Event.Duration = durationpb.New(time.Nanosecond) +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return []*aggregationpb.CombinedMetrics{ +// (*CombinedMetrics)(createTestCombinedMetrics( +// withEventsTotal(1), +// withYoungestEventTimestamp(receivedTS), +// ).addSpan( +// ts.Truncate(time.Minute), "test", "", +// testSpan{ +// spanName: "testspan", +// destinationResource: "db", +// outcome: "success", +// count: 1, +// }, +// )).ToProto(), +// } +// }, +// }, +// { +// name: "with-metricset", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Metricset = &modelpb.Metricset{ +// Name: "testmetricset", +// Interval: "1m", +// } +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return []*aggregationpb.CombinedMetrics{ +// (*CombinedMetrics)(createTestCombinedMetrics( +// withEventsTotal(1), +// withYoungestEventTimestamp(receivedTS), +// ).addServiceInstance( +// ts.Truncate(time.Minute), "test", "", +// )).ToProto(), +// } +// }, +// }, +// { +// name: "with-log", +// input: func() *modelpb.APMEvent { +// event := baseEvent.CloneVT() +// event.Log = &modelpb.Log{} +// return event +// }, +// partitioner: NewHashPartitioner(1), +// expected: func() []*aggregationpb.CombinedMetrics { +// return []*aggregationpb.CombinedMetrics{ +// (*CombinedMetrics)(createTestCombinedMetrics( +// withEventsTotal(1), +// withYoungestEventTimestamp(receivedTS), +// ).addServiceInstance( +// ts.Truncate(time.Minute), "test", "", +// )).ToProto(), +// } +// }, +// }, +// } { +// t.Run(tc.name, func(t *testing.T) { +// cmk := CombinedMetricsKey{ +// Interval: time.Minute, +// ProcessingTime: time.Now().Truncate(time.Minute), +// ID: EncodeToCombinedMetricsKeyID(t, "ab01"), +// } +// var actual []*aggregationpb.CombinedMetrics +// collector := func( +// _ CombinedMetricsKey, +// m *aggregationpb.CombinedMetrics, +// ) error { +// actual = append(actual, m.CloneVT()) +// return nil +// } +// err := EventToCombinedMetrics(tc.input(), cmk, tc.partitioner, collector) +// require.NoError(t, err) +// assert.Empty(t, cmp.Diff( +// tc.expected(), actual, +// cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { +// return a.Equal(&b) +// }), +// protocmp.Transform(), +// protocmp.IgnoreEmptyMessages(), +// )) +// }) +// } +// } - "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "google.golang.org/protobuf/testing/protocmp" - "google.golang.org/protobuf/types/known/durationpb" - "google.golang.org/protobuf/types/known/timestamppb" - - "github.com/elastic/apm-data/model/modelpb" - - "github.com/elastic/apm-aggregation/aggregationpb" - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" -) - -func TestEventToCombinedMetrics(t *testing.T) { - ts := time.Now().UTC() - receivedTS := ts.Add(time.Second) - baseEvent := &modelpb.APMEvent{ - Timestamp: timestamppb.New(ts), - ParentId: "nonroot", - Service: &modelpb.Service{Name: "test"}, - Event: &modelpb.Event{ - Duration: durationpb.New(time.Second), - Outcome: "success", - Received: timestamppb.New(receivedTS), - }, - } - for _, tc := range []struct { - name string - input func() *modelpb.APMEvent - partitioner Partitioner - expected func() []*aggregationpb.CombinedMetrics - }{ - { - name: "with-zero-rep-count-txn", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Transaction = &modelpb.Transaction{ - Name: "testtxn", - Type: "testtyp", - RepresentativeCount: 0, - } - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return nil - }, - }, - { - name: "with-good-txn", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Transaction = &modelpb.Transaction{ - Name: "testtxn", - Type: "testtyp", - RepresentativeCount: 1, - } - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return []*aggregationpb.CombinedMetrics{ - (*CombinedMetrics)(createTestCombinedMetrics( - withEventsTotal(1), - withYoungestEventTimestamp(receivedTS), - ).addTransaction( - ts.Truncate(time.Minute), "test", "", - testTransaction{ - txnName: "testtxn", - txnType: "testtyp", - eventOutcome: "success", - count: 1, - }, - ).addServiceTransaction( - ts.Truncate(time.Minute), "test", "", - testServiceTransaction{ - txnType: "testtyp", - count: 1, - }, - )).ToProto(), - } - }, - }, - { - name: "with-zero-rep-count-span", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Span = &modelpb.Span{ - Name: "testspan", - Type: "testtyp", - RepresentativeCount: 0, - } - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return nil - }, - }, - { - name: "with-no-exit-span", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Span = &modelpb.Span{ - Name: "testspan", - Type: "testtyp", - RepresentativeCount: 1, - } - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return nil - }, - }, - { - name: "with-good-span-dest-svc", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Span = &modelpb.Span{ - Name: "testspan", - Type: "testtyp", - RepresentativeCount: 1, - } - event.Service.Target = &modelpb.ServiceTarget{ - Name: "psql", - Type: "db", - } - // Current test structs are hardcoded to use 1ns for spans - event.Event.Duration = durationpb.New(time.Nanosecond) - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return []*aggregationpb.CombinedMetrics{ - (*CombinedMetrics)(createTestCombinedMetrics( - withEventsTotal(1), - withYoungestEventTimestamp(receivedTS), - ).addSpan( - ts.Truncate(time.Minute), "test", "", - testSpan{ - spanName: "testspan", - targetName: "psql", - targetType: "db", - outcome: "success", - count: 1, - }, - )).ToProto(), - } - }, - }, - { - name: "with-good-span-svc-target", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Span = &modelpb.Span{ - Name: "testspan", - Type: "testtyp", - RepresentativeCount: 1, - DestinationService: &modelpb.DestinationService{ - Resource: "db", - }, - } - // Current test structs are hardcoded to use 1ns for spans - event.Event.Duration = durationpb.New(time.Nanosecond) - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return []*aggregationpb.CombinedMetrics{ - (*CombinedMetrics)(createTestCombinedMetrics( - withEventsTotal(1), - withYoungestEventTimestamp(receivedTS), - ).addSpan( - ts.Truncate(time.Minute), "test", "", - testSpan{ - spanName: "testspan", - destinationResource: "db", - outcome: "success", - count: 1, - }, - )).ToProto(), - } - }, - }, - { - name: "with-metricset", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Metricset = &modelpb.Metricset{ - Name: "testmetricset", - Interval: "1m", - } - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return []*aggregationpb.CombinedMetrics{ - (*CombinedMetrics)(createTestCombinedMetrics( - withEventsTotal(1), - withYoungestEventTimestamp(receivedTS), - ).addServiceInstance( - ts.Truncate(time.Minute), "test", "", - )).ToProto(), - } - }, - }, - { - name: "with-log", - input: func() *modelpb.APMEvent { - event := baseEvent.CloneVT() - event.Log = &modelpb.Log{} - return event - }, - partitioner: NewHashPartitioner(1), - expected: func() []*aggregationpb.CombinedMetrics { - return []*aggregationpb.CombinedMetrics{ - (*CombinedMetrics)(createTestCombinedMetrics( - withEventsTotal(1), - withYoungestEventTimestamp(receivedTS), - ).addServiceInstance( - ts.Truncate(time.Minute), "test", "", - )).ToProto(), - } - }, - }, - } { - t.Run(tc.name, func(t *testing.T) { - cmk := CombinedMetricsKey{ - Interval: time.Minute, - ProcessingTime: time.Now().Truncate(time.Minute), - ID: EncodeToCombinedMetricsKeyID(t, "ab01"), - } - var actual []*aggregationpb.CombinedMetrics - collector := func( - _ CombinedMetricsKey, - m *aggregationpb.CombinedMetrics, - ) error { - actual = append(actual, m.CloneVT()) - return nil - } - err := EventToCombinedMetrics(tc.input(), cmk, tc.partitioner, collector) - require.NoError(t, err) - assert.Empty(t, cmp.Diff( - tc.expected(), actual, - cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { - return a.Equal(&b) - }), - protocmp.Transform(), - protocmp.IgnoreEmptyMessages(), - )) - }) - } -} - -func TestCombinedMetricsToBatch(t *testing.T) { - ts := time.Now() - aggIvl := time.Minute - processingTime := ts.Truncate(aggIvl) - svcName := "test" - coldstart := true - var ( - faas = &modelpb.Faas{Id: "f1", ColdStart: &coldstart, Version: "v2", TriggerType: "http"} - txn = testTransaction{txnName: "txn", txnType: "typ", count: 100} - txnFaas = testTransaction{txnName: "txn", txnType: "typ", count: 100, faas: faas} - svcTxn = testServiceTransaction{txnType: "typ", count: 100} - span = testSpan{spanName: "spn", destinationResource: "postgresql", count: 1} - overflowTxn = testTransaction{txnName: "_other", count: 100} - overflowSvcTxn = testServiceTransaction{txnType: "_other", count: 100} - overflowSpan = testSpan{targetName: "_other", count: 1} - ) - for _, tc := range []struct { - name string - aggregationInterval time.Duration - combinedMetrics CombinedMetrics - expectedEvents modelpb.Batch - }{ - { - name: "no_overflow_without_faas", - aggregationInterval: aggIvl, - combinedMetrics: CombinedMetrics( - *createTestCombinedMetrics(). - addTransaction(ts, svcName, "", txn). - addServiceTransaction(ts, svcName, "", svcTxn). - addSpan(ts, svcName, "", span), - ), - expectedEvents: []*modelpb.APMEvent{ - createTestTransactionMetric(ts, aggIvl, svcName, txn, 0), - createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), - createTestSpanMetric(ts, aggIvl, svcName, span, 0), - createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), - }, - }, - { - name: "no_overflow", - aggregationInterval: aggIvl, - combinedMetrics: CombinedMetrics( - *createTestCombinedMetrics(). - addTransaction(ts, svcName, "", txnFaas). - addServiceTransaction(ts, svcName, "", svcTxn). - addSpan(ts, svcName, "", span), - ), - expectedEvents: []*modelpb.APMEvent{ - createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), - createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), - createTestSpanMetric(ts, aggIvl, svcName, span, 0), - createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), - }, - }, - { - name: "overflow", - aggregationInterval: aggIvl, - combinedMetrics: CombinedMetrics( - *createTestCombinedMetrics(). - addTransaction(ts, svcName, "", txnFaas). - addServiceTransaction(ts, svcName, "", svcTxn). - addSpan(ts, svcName, "", span). - addPerServiceOverflowTransaction(ts, svcName, "", txn). - addPerServiceOverflowServiceTransaction(ts, svcName, "", svcTxn). - addPerServiceOverflowSpan(ts, svcName, "", span). - addGlobalServiceOverflowServiceInstance(ts, "overflow", ""), - ), - expectedEvents: []*modelpb.APMEvent{ - createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), - createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), - createTestSpanMetric(ts, aggIvl, svcName, span, 0), - createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), - // Events due to overflow - createTestTransactionMetric(processingTime, aggIvl, svcName, overflowTxn, 1), - createTestServiceTransactionMetric(processingTime, aggIvl, svcName, overflowSvcTxn, 1), - createTestSpanMetric(processingTime, aggIvl, svcName, overflowSpan, 1), - createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 1), - }, - }, - { - name: "service_instance_overflow_in_global_and_per_svc", - aggregationInterval: aggIvl, - combinedMetrics: CombinedMetrics( - *createTestCombinedMetrics(). - addServiceInstance(ts, "svc1", ""). - addGlobalServiceOverflowServiceInstance(ts, "svc1", "1"). - addGlobalServiceOverflowServiceInstance(ts, "svc2", "1"), - ), - expectedEvents: []*modelpb.APMEvent{ - createTestServiceSummaryMetric(ts, aggIvl, "svc1", 0), - createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 2), - }, - }, - } { - t.Run(tc.name, func(t *testing.T) { - b, err := CombinedMetricsToBatch( - tc.combinedMetrics, - processingTime, - tc.aggregationInterval, - ) - assert.NoError(t, err) - assert.Empty(t, cmp.Diff( - tc.expectedEvents, *b, - cmpopts.IgnoreTypes(netip.Addr{}), - cmpopts.SortSlices(func(e1, e2 *modelpb.APMEvent) bool { - m1Name := e1.GetMetricset().GetName() - m2Name := e2.GetMetricset().GetName() - if m1Name != m2Name { - return m1Name < m2Name - } - - a1Name := e1.GetAgent().GetName() - a2Name := e2.GetAgent().GetName() - if a1Name != a2Name { - return a1Name < a2Name - } - - return e1.GetService().GetEnvironment() < e2.GetService().GetEnvironment() - }), - protocmp.Transform(), - )) - }) - } -} - -func BenchmarkCombinedMetricsToBatch(b *testing.B) { - ai := time.Hour - ts := time.Now() - pt := ts.Truncate(ai) - cardinality := 10 - tcm := createTestCombinedMetrics() - for i := 0; i < cardinality; i++ { - txnName := fmt.Sprintf("txn%d", i) - txnType := fmt.Sprintf("typ%d", i) - spanName := fmt.Sprintf("spn%d", i) - tcm = tcm.addTransaction(ts, "bench", "", testTransaction{txnName: txnName, txnType: txnType, count: 200}) - tcm = tcm.addServiceTransaction(ts, "bench", "", testServiceTransaction{txnType: txnType, count: 200}) - tcm = tcm.addSpan(ts, "bench", "", testSpan{spanName: spanName}) - } - cm := CombinedMetrics(*tcm) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err := CombinedMetricsToBatch(cm, pt, ai) - if err != nil { - b.Fatal(err) - } - } -} - -func BenchmarkEventToCombinedMetrics(b *testing.B) { - event := &modelpb.APMEvent{ - Timestamp: timestamppb.Now(), - ParentId: "nonroot", - Service: &modelpb.Service{ - Name: "test", - }, - Event: &modelpb.Event{ - Duration: durationpb.New(time.Second), - Outcome: "success", - }, - Transaction: &modelpb.Transaction{ - RepresentativeCount: 1, - Name: "testtxn", - Type: "testtyp", - }, - } - cmk := CombinedMetricsKey{ - Interval: time.Minute, - ProcessingTime: time.Now().Truncate(time.Minute), - ID: EncodeToCombinedMetricsKeyID(b, "ab01"), - } - partitioner := NewHashPartitioner(1) - noop := func(_ CombinedMetricsKey, _ *aggregationpb.CombinedMetrics) error { - return nil - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - err := EventToCombinedMetrics(event, cmk, partitioner, noop) - if err != nil { - b.Fatal(err) - } - } -} - -func createTestServiceSummaryMetric( - ts time.Time, - ivl time.Duration, - svcName string, - overflowCount int, -) *modelpb.APMEvent { - var metricsetSamples []*modelpb.MetricsetSample - if overflowCount > 0 { - metricsetSamples = []*modelpb.MetricsetSample{ - { - Name: "service_summary.aggregation.overflow_count", - Value: float64(overflowCount), - }, - } - } - return &modelpb.APMEvent{ - Timestamp: timestamppb.New(ts), - Metricset: &modelpb.Metricset{ - Name: "service_summary", - Samples: metricsetSamples, - Interval: formatDuration(ivl), - }, - Service: &modelpb.Service{Name: svcName}, - } -} - -func createTestTransactionMetric( - ts time.Time, - ivl time.Duration, - svcName string, - txn testTransaction, - overflowCount int, -) *modelpb.APMEvent { - histRep := hdrhistogram.New() - for i := 0; i < txn.count; i++ { - histRep.RecordDuration(time.Second, 1) - } - - total, counts, values := histRep.Buckets() - var eventSuccessSummary modelpb.SummaryMetric - switch txn.eventOutcome { - case "success": - eventSuccessSummary.Count = total - eventSuccessSummary.Sum = float64(total) - case "failure": - eventSuccessSummary.Count = total - case "unknown": - // Keep both Count and Sum as 0. - } - transactionDurationSummary := &modelpb.SummaryMetric{ - Count: total, - // only 1 expected element - Sum: values[0] * float64(counts[0]), - } - var metricsetSamples []*modelpb.MetricsetSample - if overflowCount > 0 { - metricsetSamples = []*modelpb.MetricsetSample{ - { - Name: "transaction.aggregation.overflow_count", - Value: float64(overflowCount), - }, - } - } - return &modelpb.APMEvent{ - Timestamp: timestamppb.New(ts), - Metricset: &modelpb.Metricset{ - Name: "transaction", - Interval: formatDuration(ivl), - Samples: metricsetSamples, - DocCount: total, - }, - Service: &modelpb.Service{Name: svcName}, - Transaction: &modelpb.Transaction{ - Name: txn.txnName, - Type: txn.txnType, - DurationHistogram: &modelpb.Histogram{ - Counts: counts, - Values: values, - }, - DurationSummary: transactionDurationSummary, - }, - Faas: txn.faas, - Event: &modelpb.Event{ - SuccessCount: &eventSuccessSummary, - }, - } -} - -func createTestServiceTransactionMetric( - ts time.Time, - ivl time.Duration, - svcName string, - svcTxn testServiceTransaction, - overflowCount int, -) *modelpb.APMEvent { - histRep := hdrhistogram.New() - for i := 0; i < svcTxn.count; i++ { - histRep.RecordDuration(time.Second, 1) - } - total, counts, values := histRep.Buckets() - transactionDurationSummary := &modelpb.SummaryMetric{ - Count: total, - // only 1 expected element - Sum: values[0] * float64(counts[0]), - } - var metricsetSamples []*modelpb.MetricsetSample - if overflowCount > 0 { - metricsetSamples = []*modelpb.MetricsetSample{ - { - Name: "service_transaction.aggregation.overflow_count", - Value: float64(overflowCount), - }, - } - } - return &modelpb.APMEvent{ - Timestamp: timestamppb.New(ts), - Metricset: &modelpb.Metricset{ - Name: "service_transaction", - Interval: formatDuration(ivl), - Samples: metricsetSamples, - DocCount: total, - }, - Service: &modelpb.Service{Name: svcName}, - Transaction: &modelpb.Transaction{ - Type: svcTxn.txnType, - DurationHistogram: &modelpb.Histogram{ - Counts: counts, - Values: values, - }, - DurationSummary: transactionDurationSummary, - }, - Event: &modelpb.Event{ - SuccessCount: &modelpb.SummaryMetric{ - // test code generates all success events - Count: int64(svcTxn.count), - Sum: float64(svcTxn.count), - }, - }, - } -} - -func createTestSpanMetric( - ts time.Time, - ivl time.Duration, - svcName string, - span testSpan, - overflowCount int, -) *modelpb.APMEvent { - var metricsetSamples []*modelpb.MetricsetSample - if overflowCount > 0 { - metricsetSamples = []*modelpb.MetricsetSample{ - { - Name: "service_destination.aggregation.overflow_count", - Value: float64(overflowCount), - }, - } - } - var target *modelpb.ServiceTarget - if span.targetName != "" { - target = &modelpb.ServiceTarget{ - Name: span.targetName, - } - } - return &modelpb.APMEvent{ - Timestamp: timestamppb.New(ts), - Metricset: &modelpb.Metricset{ - Name: "service_destination", - Interval: formatDuration(ivl), - Samples: metricsetSamples, - DocCount: int64(span.count), - }, - Service: &modelpb.Service{ - Name: svcName, - Target: target, - }, - Span: &modelpb.Span{ - Name: span.spanName, - DestinationService: &modelpb.DestinationService{ - Resource: span.destinationResource, - ResponseTime: &modelpb.AggregatedDuration{ - // test code generates 1 count for 1 ns - Count: int64(span.count), - Sum: durationpb.New(time.Duration(span.count)), - }, - }, - }, - } -} +// func TestCombinedMetricsToBatch(t *testing.T) { +// ts := time.Now() +// aggIvl := time.Minute +// processingTime := ts.Truncate(aggIvl) +// svcName := "test" +// coldstart := true +// var ( +// faas = &modelpb.Faas{Id: "f1", ColdStart: &coldstart, Version: "v2", TriggerType: "http"} +// txn = testTransaction{txnName: "txn", txnType: "typ", count: 100} +// txnFaas = testTransaction{txnName: "txn", txnType: "typ", count: 100, faas: faas} +// svcTxn = testServiceTransaction{txnType: "typ", count: 100} +// span = testSpan{spanName: "spn", destinationResource: "postgresql", count: 1} +// overflowTxn = testTransaction{txnName: "_other", count: 100} +// overflowSvcTxn = testServiceTransaction{txnType: "_other", count: 100} +// overflowSpan = testSpan{targetName: "_other", count: 1} +// ) +// for _, tc := range []struct { +// name string +// aggregationInterval time.Duration +// combinedMetrics CombinedMetrics +// expectedEvents modelpb.Batch +// }{ +// { +// name: "no_overflow_without_faas", +// aggregationInterval: aggIvl, +// combinedMetrics: CombinedMetrics( +// *createTestCombinedMetrics(). +// addTransaction(ts, svcName, "", txn). +// addServiceTransaction(ts, svcName, "", svcTxn). +// addSpan(ts, svcName, "", span), +// ), +// expectedEvents: []*modelpb.APMEvent{ +// createTestTransactionMetric(ts, aggIvl, svcName, txn, 0), +// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), +// createTestSpanMetric(ts, aggIvl, svcName, span, 0), +// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), +// }, +// }, +// { +// name: "no_overflow", +// aggregationInterval: aggIvl, +// combinedMetrics: CombinedMetrics( +// *createTestCombinedMetrics(). +// addTransaction(ts, svcName, "", txnFaas). +// addServiceTransaction(ts, svcName, "", svcTxn). +// addSpan(ts, svcName, "", span), +// ), +// expectedEvents: []*modelpb.APMEvent{ +// createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), +// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), +// createTestSpanMetric(ts, aggIvl, svcName, span, 0), +// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), +// }, +// }, +// { +// name: "overflow", +// aggregationInterval: aggIvl, +// combinedMetrics: CombinedMetrics( +// *createTestCombinedMetrics(). +// addTransaction(ts, svcName, "", txnFaas). +// addServiceTransaction(ts, svcName, "", svcTxn). +// addSpan(ts, svcName, "", span). +// addPerServiceOverflowTransaction(ts, svcName, "", txn). +// addPerServiceOverflowServiceTransaction(ts, svcName, "", svcTxn). +// addPerServiceOverflowSpan(ts, svcName, "", span). +// addGlobalServiceOverflowServiceInstance(ts, "overflow", ""), +// ), +// expectedEvents: []*modelpb.APMEvent{ +// createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), +// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), +// createTestSpanMetric(ts, aggIvl, svcName, span, 0), +// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), +// // Events due to overflow +// createTestTransactionMetric(processingTime, aggIvl, svcName, overflowTxn, 1), +// createTestServiceTransactionMetric(processingTime, aggIvl, svcName, overflowSvcTxn, 1), +// createTestSpanMetric(processingTime, aggIvl, svcName, overflowSpan, 1), +// createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 1), +// }, +// }, +// { +// name: "service_instance_overflow_in_global_and_per_svc", +// aggregationInterval: aggIvl, +// combinedMetrics: CombinedMetrics( +// *createTestCombinedMetrics(). +// addServiceInstance(ts, "svc1", ""). +// addGlobalServiceOverflowServiceInstance(ts, "svc1", "1"). +// addGlobalServiceOverflowServiceInstance(ts, "svc2", "1"), +// ), +// expectedEvents: []*modelpb.APMEvent{ +// createTestServiceSummaryMetric(ts, aggIvl, "svc1", 0), +// createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 2), +// }, +// }, +// } { +// t.Run(tc.name, func(t *testing.T) { +// b, err := CombinedMetricsToBatch( +// tc.combinedMetrics, +// processingTime, +// tc.aggregationInterval, +// ) +// assert.NoError(t, err) +// assert.Empty(t, cmp.Diff( +// tc.expectedEvents, *b, +// cmpopts.IgnoreTypes(netip.Addr{}), +// cmpopts.SortSlices(func(e1, e2 *modelpb.APMEvent) bool { +// m1Name := e1.GetMetricset().GetName() +// m2Name := e2.GetMetricset().GetName() +// if m1Name != m2Name { +// return m1Name < m2Name +// } +// +// a1Name := e1.GetAgent().GetName() +// a2Name := e2.GetAgent().GetName() +// if a1Name != a2Name { +// return a1Name < a2Name +// } +// +// return e1.GetService().GetEnvironment() < e2.GetService().GetEnvironment() +// }), +// protocmp.Transform(), +// )) +// }) +// } +// } +// +// func BenchmarkCombinedMetricsToBatch(b *testing.B) { +// ai := time.Hour +// ts := time.Now() +// pt := ts.Truncate(ai) +// cardinality := 10 +// tcm := createTestCombinedMetrics() +// for i := 0; i < cardinality; i++ { +// txnName := fmt.Sprintf("txn%d", i) +// txnType := fmt.Sprintf("typ%d", i) +// spanName := fmt.Sprintf("spn%d", i) +// tcm = tcm.addTransaction(ts, "bench", "", testTransaction{txnName: txnName, txnType: txnType, count: 200}) +// tcm = tcm.addServiceTransaction(ts, "bench", "", testServiceTransaction{txnType: txnType, count: 200}) +// tcm = tcm.addSpan(ts, "bench", "", testSpan{spanName: spanName}) +// } +// cm := CombinedMetrics(*tcm) +// b.ResetTimer() +// for i := 0; i < b.N; i++ { +// _, err := CombinedMetricsToBatch(cm, pt, ai) +// if err != nil { +// b.Fatal(err) +// } +// } +// } +// +// func BenchmarkEventToCombinedMetrics(b *testing.B) { +// event := &modelpb.APMEvent{ +// Timestamp: timestamppb.Now(), +// ParentId: "nonroot", +// Service: &modelpb.Service{ +// Name: "test", +// }, +// Event: &modelpb.Event{ +// Duration: durationpb.New(time.Second), +// Outcome: "success", +// }, +// Transaction: &modelpb.Transaction{ +// RepresentativeCount: 1, +// Name: "testtxn", +// Type: "testtyp", +// }, +// } +// cmk := CombinedMetricsKey{ +// Interval: time.Minute, +// ProcessingTime: time.Now().Truncate(time.Minute), +// ID: EncodeToCombinedMetricsKeyID(b, "ab01"), +// } +// partitioner := NewHashPartitioner(1) +// noop := func(_ CombinedMetricsKey, _ *aggregationpb.CombinedMetrics) error { +// return nil +// } +// b.ResetTimer() +// for i := 0; i < b.N; i++ { +// err := EventToCombinedMetrics(event, cmk, partitioner, noop) +// if err != nil { +// b.Fatal(err) +// } +// } +// } +// +// func createTestServiceSummaryMetric( +// ts time.Time, +// ivl time.Duration, +// svcName string, +// overflowCount int, +// ) *modelpb.APMEvent { +// var metricsetSamples []*modelpb.MetricsetSample +// if overflowCount > 0 { +// metricsetSamples = []*modelpb.MetricsetSample{ +// { +// Name: "service_summary.aggregation.overflow_count", +// Value: float64(overflowCount), +// }, +// } +// } +// return &modelpb.APMEvent{ +// Timestamp: timestamppb.New(ts), +// Metricset: &modelpb.Metricset{ +// Name: "service_summary", +// Samples: metricsetSamples, +// Interval: formatDuration(ivl), +// }, +// Service: &modelpb.Service{Name: svcName}, +// } +// } +// +// func createTestTransactionMetric( +// ts time.Time, +// ivl time.Duration, +// svcName string, +// txn testTransaction, +// overflowCount int, +// ) *modelpb.APMEvent { +// histRep := hdrhistogram.New() +// for i := 0; i < txn.count; i++ { +// histRep.RecordDuration(time.Second, 1) +// } +// +// total, counts, values := histRep.Buckets() +// var eventSuccessSummary modelpb.SummaryMetric +// switch txn.eventOutcome { +// case "success": +// eventSuccessSummary.Count = total +// eventSuccessSummary.Sum = float64(total) +// case "failure": +// eventSuccessSummary.Count = total +// case "unknown": +// // Keep both Count and Sum as 0. +// } +// transactionDurationSummary := &modelpb.SummaryMetric{ +// Count: total, +// // only 1 expected element +// Sum: values[0] * float64(counts[0]), +// } +// var metricsetSamples []*modelpb.MetricsetSample +// if overflowCount > 0 { +// metricsetSamples = []*modelpb.MetricsetSample{ +// { +// Name: "transaction.aggregation.overflow_count", +// Value: float64(overflowCount), +// }, +// } +// } +// return &modelpb.APMEvent{ +// Timestamp: timestamppb.New(ts), +// Metricset: &modelpb.Metricset{ +// Name: "transaction", +// Interval: formatDuration(ivl), +// Samples: metricsetSamples, +// DocCount: total, +// }, +// Service: &modelpb.Service{Name: svcName}, +// Transaction: &modelpb.Transaction{ +// Name: txn.txnName, +// Type: txn.txnType, +// DurationHistogram: &modelpb.Histogram{ +// Counts: counts, +// Values: values, +// }, +// DurationSummary: transactionDurationSummary, +// }, +// Faas: txn.faas, +// Event: &modelpb.Event{ +// SuccessCount: &eventSuccessSummary, +// }, +// } +// } +// +// func createTestServiceTransactionMetric( +// ts time.Time, +// ivl time.Duration, +// svcName string, +// svcTxn testServiceTransaction, +// overflowCount int, +// ) *modelpb.APMEvent { +// histRep := hdrhistogram.New() +// for i := 0; i < svcTxn.count; i++ { +// histRep.RecordDuration(time.Second, 1) +// } +// total, counts, values := histRep.Buckets() +// transactionDurationSummary := &modelpb.SummaryMetric{ +// Count: total, +// // only 1 expected element +// Sum: values[0] * float64(counts[0]), +// } +// var metricsetSamples []*modelpb.MetricsetSample +// if overflowCount > 0 { +// metricsetSamples = []*modelpb.MetricsetSample{ +// { +// Name: "service_transaction.aggregation.overflow_count", +// Value: float64(overflowCount), +// }, +// } +// } +// return &modelpb.APMEvent{ +// Timestamp: timestamppb.New(ts), +// Metricset: &modelpb.Metricset{ +// Name: "service_transaction", +// Interval: formatDuration(ivl), +// Samples: metricsetSamples, +// DocCount: total, +// }, +// Service: &modelpb.Service{Name: svcName}, +// Transaction: &modelpb.Transaction{ +// Type: svcTxn.txnType, +// DurationHistogram: &modelpb.Histogram{ +// Counts: counts, +// Values: values, +// }, +// DurationSummary: transactionDurationSummary, +// }, +// Event: &modelpb.Event{ +// SuccessCount: &modelpb.SummaryMetric{ +// // test code generates all success events +// Count: int64(svcTxn.count), +// Sum: float64(svcTxn.count), +// }, +// }, +// } +// } +// +// func createTestSpanMetric( +// ts time.Time, +// ivl time.Duration, +// svcName string, +// span testSpan, +// overflowCount int, +// ) *modelpb.APMEvent { +// var metricsetSamples []*modelpb.MetricsetSample +// if overflowCount > 0 { +// metricsetSamples = []*modelpb.MetricsetSample{ +// { +// Name: "service_destination.aggregation.overflow_count", +// Value: float64(overflowCount), +// }, +// } +// } +// var target *modelpb.ServiceTarget +// if span.targetName != "" { +// target = &modelpb.ServiceTarget{ +// Name: span.targetName, +// } +// } +// return &modelpb.APMEvent{ +// Timestamp: timestamppb.New(ts), +// Metricset: &modelpb.Metricset{ +// Name: "service_destination", +// Interval: formatDuration(ivl), +// Samples: metricsetSamples, +// DocCount: int64(span.count), +// }, +// Service: &modelpb.Service{ +// Name: svcName, +// Target: target, +// }, +// Span: &modelpb.Span{ +// Name: span.spanName, +// DestinationService: &modelpb.DestinationService{ +// Resource: span.destinationResource, +// ResponseTime: &modelpb.AggregatedDuration{ +// // test code generates 1 count for 1 ns +// Count: int64(span.count), +// Sum: durationpb.New(time.Duration(span.count)), +// }, +// }, +// }, +// } +// } diff --git a/aggregators/merger.go b/aggregators/merger.go index bf5df4c..f72fd8e 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -8,8 +8,7 @@ import ( "io" "github.com/axiomhq/hyperloglog" - - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-aggregation/aggregationpb" ) type combinedMetricsMerger struct { @@ -18,20 +17,20 @@ type combinedMetricsMerger struct { } func (m *combinedMetricsMerger) MergeNewer(value []byte) error { - var from CombinedMetrics - if err := from.UnmarshalBinary(value); err != nil { + from := aggregationpb.CombinedMetricsFromVTPool() + if err := from.UnmarshalVT(value); err != nil { return err } - merge(&m.metrics, &from, m.limits) + m.merge(from) return nil } func (m *combinedMetricsMerger) MergeOlder(value []byte) error { - var from CombinedMetrics - if err := from.UnmarshalBinary(value); err != nil { + from := aggregationpb.CombinedMetricsFromVTPool() + if err := from.UnmarshalVT(value); err != nil { return err } - merge(&m.metrics, &from, m.limits) + m.merge(from) return nil } @@ -40,57 +39,34 @@ func (m *combinedMetricsMerger) Finish(includesBase bool) ([]byte, io.Closer, er return data, nil, err } -type Constraint struct { - counter int - limit int -} - -func newConstraint(initialCount, limit int) *Constraint { - return &Constraint{ - counter: initialCount, - limit: limit, - } -} - -func (c *Constraint) maxed() bool { - return c.counter >= c.limit -} - -func (c *Constraint) add(delta int) { - c.counter += delta -} - -func (c *Constraint) value() int { - return c.counter -} - -// merge merges two combined metrics considering the configured limits. -func merge(to, from *CombinedMetrics, limits Limits) { +func (to *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { // We merge the below fields irrespective of the services present // because it is possible for services to be empty if the event // does not fit the criteria for aggregations. - to.eventsTotal += from.eventsTotal - if to.youngestEventTimestamp.Before(from.youngestEventTimestamp) { - to.youngestEventTimestamp = from.youngestEventTimestamp - } - - if len(from.Services) == 0 { - // Accounts for overflow too as overflow cannot happen with 0 entries. - return + to.metrics.EventsTotal += from.EventsTotal + if to.metrics.YoungestEventTimestamp < from.YoungestEventTimestamp { + to.metrics.YoungestEventTimestamp = from.YoungestEventTimestamp } // If there is overflow due to max services in either of the buckets being // merged then we can merge the overflow buckets without considering any other scenarios. - mergeOverflow(&to.OverflowServices, &from.OverflowServices) if from.OverflowServiceInstancesEstimator != nil { - mergeEstimator(&to.OverflowServiceInstancesEstimator, from.OverflowServiceInstancesEstimator) + mergeOverflow(&to.metrics.OverflowServices, from.OverflowServices) + mergeEstimator( + &to.metrics.OverflowServiceInstancesEstimator, + hllSketch(from.OverflowServiceInstancesEstimator), + ) + } + + if len(from.ServiceMetrics) == 0 { + return } // Calculate the current capacity of the transaction, service transaction, // and span groups in the _to_ combined metrics. - totalTransactionGroupsConstraint := newConstraint(0, limits.MaxTransactionGroups) - totalServiceTransactionGroupsConstraint := newConstraint(0, limits.MaxServiceTransactionGroups) - totalSpanGroupsConstraint := newConstraint(0, limits.MaxSpanGroups) - for _, svc := range to.Services { + totalTransactionGroupsConstraint := newConstraint(0, to.limits.MaxTransactionGroups) + totalServiceTransactionGroupsConstraint := newConstraint(0, to.limits.MaxServiceTransactionGroups) + totalSpanGroupsConstraint := newConstraint(0, to.limits.MaxSpanGroups) + for _, svc := range to.metrics.Services { for _, si := range svc.ServiceInstanceGroups { totalTransactionGroupsConstraint.add(len(si.TransactionGroups)) totalServiceTransactionGroupsConstraint.add(len(si.ServiceTransactionGroups)) @@ -107,177 +83,348 @@ func merge(to, from *CombinedMetrics, limits Limits) { // create a new service in _to_ bucket and merge. // 2.b. Else, merge the _from_ bucket to the overflow service bucket // of the _to_ combined metrics. - for svcKey, fromSvc := range from.Services { - hash := Hasher{}.Chain(svcKey) - toSvc, svcOverflow := getServiceMetrics(to, svcKey, limits.MaxServices) + for i := range from.ServiceMetrics { + fromSvc := from.ServiceMetrics[i] + hash := Hasher{}.Chain(fromSvc.Key) + var sk ServiceAggregationKey + sk.FromProto(fromSvc.Key) + toSvc, svcOverflow := getServiceMetrics(&to.metrics, sk, to.limits.MaxServices) if svcOverflow { - mergeOverflow(&to.OverflowServices, &fromSvc.OverflowGroups) - - for sik, sim := range fromSvc.ServiceInstanceGroups { - sikHash := hash.Chain(sik) - mergeToOverflowFromSIM(&to.OverflowServices, &sim, sikHash) - insertHash(&to.OverflowServiceInstancesEstimator, sikHash.Sum()) + mergeOverflow(&to.metrics.OverflowServices, fromSvc.Metrics.OverflowGroups) + for j := range fromSvc.Metrics.ServiceInstanceMetrics { + ksim := fromSvc.Metrics.ServiceInstanceMetrics[j] + sikHash := hash.Chain(ksim.Key) + mergeToOverflowFromSIM(&to.metrics.OverflowServices, ksim, sikHash) + insertHash(&to.metrics.OverflowServiceInstancesEstimator, sikHash.Sum()) } continue } - mergeOverflow(&toSvc.OverflowGroups, &fromSvc.OverflowGroups) - mergeServiceInstanceGroups(&toSvc, &fromSvc, - totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint, - limits, hash, &to.OverflowServiceInstancesEstimator) - to.Services[svcKey] = toSvc - } -} - -func mergeToOverflowFromSIM(to *Overflow, from *ServiceInstanceMetrics, hash Hasher) { - for tk, tm := range from.TransactionGroups { - to.OverflowTransaction.Merge(&tm, hash.Chain(tk).Sum()) - } - for stk, stm := range from.ServiceTransactionGroups { - to.OverflowServiceTransaction.Merge(&stm, hash.Chain(stk).Sum()) - } - for sk, sm := range from.SpanGroups { - to.OverflowSpan.Merge(&sm, hash.Chain(sk).Sum()) + if fromSvc.Metrics != nil { + mergeOverflow(&toSvc.OverflowGroups, fromSvc.Metrics.OverflowGroups) + mergeServiceInstanceGroups( + &toSvc, + fromSvc.Metrics.ServiceInstanceMetrics, + totalTransactionGroupsConstraint, + totalServiceTransactionGroupsConstraint, + totalSpanGroupsConstraint, + to.limits, + hash, + &to.metrics.OverflowServiceInstancesEstimator, + ) + } + to.metrics.Services[sk] = toSvc } } -func mergeServiceInstanceGroups(to, from *ServiceMetrics, totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint *Constraint, limits Limits, hash Hasher, overflowServiceInstancesEstimator **hyperloglog.Sketch) { - for siKey, fromSIM := range from.ServiceInstanceGroups { - toSIM, overflowed := getServiceInstanceMetrics(to, siKey, limits.MaxServiceInstanceGroupsPerService) - siKeyHash := hash.Chain(siKey) +func mergeServiceInstanceGroups( + to *ServiceMetrics, + from []*aggregationpb.KeyedServiceInstanceMetrics, + totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint *Constraint, + limits Limits, + hash Hasher, + overflowServiceInstancesEstimator **hyperloglog.Sketch, +) { + for i := range from { + fromSvcIns := from[i] + var sik ServiceInstanceAggregationKey + sik.FromProto(fromSvcIns.Key) + sikHash := hash.Chain(fromSvcIns.Key) + + toSvcIns, overflowed := getServiceInstanceMetrics(to, sik, limits.MaxServiceInstanceGroupsPerService) if overflowed { - mergeToOverflowFromSIM(&to.OverflowGroups, &fromSIM, siKeyHash) - insertHash(overflowServiceInstancesEstimator, siKeyHash.Sum()) + mergeToOverflowFromSIM( + &to.OverflowGroups, + fromSvcIns, + sikHash, + ) + insertHash( + overflowServiceInstancesEstimator, + sikHash.Sum(), + ) continue } - mergeTransactionGroups( - &toSIM, - &fromSIM, - newConstraint(len(toSIM.TransactionGroups), limits.MaxTransactionGroupsPerService), + toSvcIns.TransactionGroups, + fromSvcIns.Metrics.TransactionMetrics, + newConstraint( + len(toSvcIns.TransactionGroups), + limits.MaxTransactionGroupsPerService, + ), totalTransactionGroupsConstraint, hash, &to.OverflowGroups.OverflowTransaction, ) mergeServiceTransactionGroups( - &toSIM, - &fromSIM, - newConstraint(len(toSIM.ServiceTransactionGroups), limits.MaxServiceTransactionGroupsPerService), + toSvcIns.ServiceTransactionGroups, + fromSvcIns.Metrics.ServiceTransactionMetrics, + newConstraint( + len(toSvcIns.ServiceTransactionGroups), + limits.MaxServiceTransactionGroupsPerService, + ), totalServiceTransactionGroupsConstraint, hash, &to.OverflowGroups.OverflowServiceTransaction, ) mergeSpanGroups( - &toSIM, - &fromSIM, - newConstraint(len(toSIM.SpanGroups), limits.MaxSpanGroupsPerService), + toSvcIns.SpanGroups, + fromSvcIns.Metrics.SpanMetrics, + newConstraint( + len(toSvcIns.SpanGroups), + limits.MaxSpanGroupsPerService, + ), totalSpanGroupsConstraint, hash, &to.OverflowGroups.OverflowSpan, ) - to.ServiceInstanceGroups[siKey] = toSIM + to.ServiceInstanceGroups[sik] = toSvcIns } } // mergeTransactionGroups merges transaction aggregation groups for two combined metrics // considering max transaction groups and max transaction groups per service limits. -func mergeTransactionGroups(to, from *ServiceInstanceMetrics, perSvcConstraint, globalConstraint *Constraint, hash Hasher, overflowTo *OverflowTransaction) { - for txnKey, fromTxn := range from.TransactionGroups { - toTxn, ok := to.TransactionGroups[txnKey] +func mergeTransactionGroups( + to map[TransactionAggregationKey]*aggregationpb.KeyedTransactionMetrics, + from []*aggregationpb.KeyedTransactionMetrics, + perSvcConstraint, globalConstraint *Constraint, + hash Hasher, + overflowTo *OverflowTransaction, +) { + for i := range from { + fromTxn := from[i] + var tk TransactionAggregationKey + tk.FromProto(fromTxn.Key) + toTxn, ok := to[tk] if !ok { overflowed := perSvcConstraint.maxed() || globalConstraint.maxed() if overflowed { - overflowTo.Merge(&fromTxn, hash.Chain(txnKey).Sum()) + overflowTo.Merge( + fromTxn.Metrics, + hash.Chain(fromTxn.Key).Sum(), + ) continue } - toTxn = newTransactionMetrics() perSvcConstraint.add(1) globalConstraint.add(1) + + to[tk] = fromTxn + from[i] = nil + continue } - mergeTransactionMetrics(&toTxn, &fromTxn) - to.TransactionGroups[txnKey] = toTxn + mergeKeyedTransactionMetrics(toTxn, fromTxn) } } -// mergeServiceTransactionGroups merges service transaction aggregation groups for two combined metrics -// considering max service transaction groups and max service transaction groups per service limits. -func mergeServiceTransactionGroups(to, from *ServiceInstanceMetrics, perSvcConstraint, globalConstraint *Constraint, hash Hasher, overflowTo *OverflowServiceTransaction) { - for svcTxnKey, fromSvcTxn := range from.ServiceTransactionGroups { - toSvcTxn, ok := to.ServiceTransactionGroups[svcTxnKey] +// mergeServiceTransactionGroups merges service transaction aggregation groups for two +// combined metrics considering max service transaction groups and max service +// transaction groups per service limits. +func mergeServiceTransactionGroups( + to map[ServiceTransactionAggregationKey]*aggregationpb.KeyedServiceTransactionMetrics, + from []*aggregationpb.KeyedServiceTransactionMetrics, + perSvcConstraint, globalConstraint *Constraint, + hash Hasher, + overflowTo *OverflowServiceTransaction, +) { + for i := range from { + fromSvcTxn := from[i] + var stk ServiceTransactionAggregationKey + stk.FromProto(fromSvcTxn.Key) + toSvcTxn, ok := to[stk] if !ok { overflowed := perSvcConstraint.maxed() || globalConstraint.maxed() if overflowed { - overflowTo.Merge(&fromSvcTxn, hash.Chain(svcTxnKey).Sum()) + overflowTo.Merge( + fromSvcTxn.Metrics, + hash.Chain(fromSvcTxn.Key).Sum(), + ) continue } - toSvcTxn = newServiceTransactionMetrics() perSvcConstraint.add(1) globalConstraint.add(1) + + to[stk] = fromSvcTxn + from[i] = nil + continue } - mergeServiceTransactionMetrics(&toSvcTxn, &fromSvcTxn) - to.ServiceTransactionGroups[svcTxnKey] = toSvcTxn + mergeKeyedServiceTransactionMetrics(toSvcTxn, fromSvcTxn) } } // mergeSpanGroups merges span aggregation groups for two combined metrics considering // max span groups and max span groups per service limits. -func mergeSpanGroups(to, from *ServiceInstanceMetrics, perSvcConstraint, globalConstraint *Constraint, hash Hasher, overflowTo *OverflowSpan) { - for spanKey, fromSpan := range from.SpanGroups { - toSpan, ok := to.SpanGroups[spanKey] +func mergeSpanGroups( + to map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics, + from []*aggregationpb.KeyedSpanMetrics, + perSvcConstraint, globalConstraint *Constraint, + hash Hasher, + overflowTo *OverflowSpan, +) { + for i := range from { + fromSpan := from[i] + var spk SpanAggregationKey + spk.FromProto(fromSpan.Key) + toSpan, ok := to[spk] if !ok { // Protect against agents that send high cardinality span names by dropping // span.name if more than half of the per svc span group limit is reached. half := perSvcConstraint.limit / 2 if perSvcConstraint.value() >= half { - spanKey.SpanName = "" - toSpan, ok = to.SpanGroups[spanKey] + spk.SpanName = "" + fromSpan.Key.SpanName = "" + toSpan, ok = to[spk] } if !ok { overflowed := perSvcConstraint.maxed() || globalConstraint.maxed() if overflowed { - overflowTo.Merge(&fromSpan, hash.Chain(spanKey).Sum()) + overflowTo.Merge( + fromSpan.Metrics, + hash.Chain(fromSpan.Key).Sum(), + ) continue } perSvcConstraint.add(1) globalConstraint.add(1) + + to[spk] = fromSpan + from[i] = nil + continue } } - mergeSpanMetrics(&toSpan, &fromSpan) - to.SpanGroups[spanKey] = toSpan + mergeKeyedSpanMetrics(toSpan, fromSpan) + } +} + +func mergeToOverflowFromSIM( + to *Overflow, + from *aggregationpb.KeyedServiceInstanceMetrics, + hash Hasher, +) { + if from.Metrics == nil { + return + } + for _, ktm := range from.Metrics.TransactionMetrics { + to.OverflowTransaction.Merge( + ktm.Metrics, + hash.Chain(ktm.Key).Sum(), + ) + } + for _, kstm := range from.Metrics.ServiceTransactionMetrics { + to.OverflowServiceTransaction.Merge( + kstm.Metrics, + hash.Chain(kstm.Key).Sum(), + ) + } + for _, ksm := range from.Metrics.SpanMetrics { + to.OverflowSpan.Merge( + ksm.Metrics, + hash.Chain(ksm.Key).Sum(), + ) } } -// mergeOverflow merges overflowed aggregation groups for transaction, -// service transaction, and span groups. -func mergeOverflow(to, from *Overflow) { +func mergeOverflow( + to *Overflow, + fromproto *aggregationpb.Overflow, +) { + if fromproto == nil { + return + } + var from Overflow + from.FromProto(fromproto) to.OverflowTransaction.MergeOverflow(&from.OverflowTransaction) to.OverflowServiceTransaction.MergeOverflow(&from.OverflowServiceTransaction) to.OverflowSpan.MergeOverflow(&from.OverflowSpan) } -// mergeTransactionMetrics merges two transaction metrics. -func mergeTransactionMetrics(to, from *TransactionMetrics) { +func mergeKeyedTransactionMetrics( + to, from *aggregationpb.KeyedTransactionMetrics, +) { + if from.Metrics == nil { + return + } + if to.Metrics == nil { + to.Metrics = aggregationpb.TransactionMetricsFromVTPool() + } + mergeTransactionMetrics(to.Metrics, from.Metrics) +} + +func mergeTransactionMetrics( + to, from *aggregationpb.TransactionMetrics, +) { if to.Histogram == nil && from.Histogram != nil { - to.Histogram = hdrhistogram.New() + to.Histogram = aggregationpb.HDRHistogramFromVTPool() + } + if to.Histogram != nil && from.Histogram != nil { + mergeHistogram(to.Histogram, from.Histogram) } - to.Histogram.Merge(from.Histogram) } -// mergeTransactionMetrics merges two transaction metrics. -func mergeServiceTransactionMetrics(to, from *ServiceTransactionMetrics) { +func mergeKeyedServiceTransactionMetrics( + to, from *aggregationpb.KeyedServiceTransactionMetrics, +) { + if from.Metrics == nil { + return + } + if to.Metrics == nil { + to.Metrics = aggregationpb.ServiceTransactionMetricsFromVTPool() + } + mergeServiceTransactionMetrics(to.Metrics, from.Metrics) +} + +func mergeServiceTransactionMetrics( + to, from *aggregationpb.ServiceTransactionMetrics, +) { if to.Histogram == nil && from.Histogram != nil { - to.Histogram = hdrhistogram.New() + to.Histogram = aggregationpb.HDRHistogramFromVTPool() + } + if to.Histogram != nil && from.Histogram != nil { + mergeHistogram(to.Histogram, from.Histogram) } - to.Histogram.Merge(from.Histogram) to.FailureCount += from.FailureCount to.SuccessCount += from.SuccessCount } -// mergeSpanMetrics merges two span metrics. -func mergeSpanMetrics(to, from *SpanMetrics) { +func mergeKeyedSpanMetrics(to, from *aggregationpb.KeyedSpanMetrics) { + if from.Metrics == nil { + return + } + if to.Metrics == nil { + to.Metrics = aggregationpb.SpanMetricsFromVTPool() + } + mergeSpanMetrics(to.Metrics, from.Metrics) +} + +func mergeSpanMetrics(to, from *aggregationpb.SpanMetrics) { to.Count += from.Count to.Sum += from.Sum } +// TODO: Add tests for merge histograms +func mergeHistogram(to, from *aggregationpb.HDRHistogram) { + // Assume both histograms are created with same arguments + m := make(map[int32]int64) + for i := 0; i < len(to.Buckets); i++ { + m[to.Buckets[i]] = to.Counts[i] + } + for i := 0; i < len(from.Buckets); i++ { + m[from.Buckets[i]] += from.Counts[i] + } + + if cap(to.Buckets) < len(m) { + to.Buckets = make([]int32, len(m)) + } + if cap(to.Counts) < len(m) { + to.Counts = make([]int64, len(m)) + } + + to.Buckets = to.Buckets[:0] + to.Counts = to.Counts[:0] + + for b, c := range m { + to.Buckets = append(to.Buckets, b) + to.Counts = append(to.Counts, c) + } +} + // getServiceMetrics returns the service metric from a combined metrics based on the // service key argument, creating one if needed. A second bool return value indicates // if a service is returned or no service can be created due to max svcs limit breach. @@ -307,28 +454,40 @@ func getServiceInstanceMetrics(sm *ServiceMetrics, siKey ServiceInstanceAggregat return sim, false } -func newTransactionMetrics() TransactionMetrics { - return TransactionMetrics{ - Histogram: hdrhistogram.New(), - } -} - -func newServiceTransactionMetrics() ServiceTransactionMetrics { - return ServiceTransactionMetrics{ - Histogram: hdrhistogram.New(), +func newServiceMetrics() ServiceMetrics { + return ServiceMetrics{ + ServiceInstanceGroups: make(map[ServiceInstanceAggregationKey]ServiceInstanceMetrics), } } func newServiceInstanceMetrics() ServiceInstanceMetrics { return ServiceInstanceMetrics{ - TransactionGroups: make(map[TransactionAggregationKey]TransactionMetrics), - ServiceTransactionGroups: make(map[ServiceTransactionAggregationKey]ServiceTransactionMetrics), - SpanGroups: make(map[SpanAggregationKey]SpanMetrics), + TransactionGroups: make(map[TransactionAggregationKey]*aggregationpb.KeyedTransactionMetrics), + ServiceTransactionGroups: make(map[ServiceTransactionAggregationKey]*aggregationpb.KeyedServiceTransactionMetrics), + SpanGroups: make(map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics), } } -func newServiceMetrics() ServiceMetrics { - return ServiceMetrics{ - ServiceInstanceGroups: make(map[ServiceInstanceAggregationKey]ServiceInstanceMetrics), +type Constraint struct { + counter int + limit int +} + +func newConstraint(initialCount, limit int) *Constraint { + return &Constraint{ + counter: initialCount, + limit: limit, } } + +func (c *Constraint) maxed() bool { + return c.counter >= c.limit +} + +func (c *Constraint) add(delta int) { + c.counter += delta +} + +func (c *Constraint) value() int { + return c.counter +} diff --git a/aggregators/merger_test.go b/aggregators/merger_test.go index 9325993..77668bd 100644 --- a/aggregators/merger_test.go +++ b/aggregators/merger_test.go @@ -9,21 +9,20 @@ import ( "testing" "time" + "github.com/elastic/apm-aggregation/aggregationpb" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" - - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" - "github.com/elastic/apm-data/model/modelpb" + "google.golang.org/protobuf/testing/protocmp" ) func TestMerge(t *testing.T) { - ts := time.Time{} + ts := time.Unix(0, 0).UTC() for _, tc := range []struct { name string limits Limits - to CombinedMetrics - from CombinedMetrics - expected CombinedMetrics + to func() CombinedMetrics + from func() *aggregationpb.CombinedMetrics + expected func() CombinedMetrics }{ { name: "no_overflow_with_count_values", @@ -37,21 +36,45 @@ func TestMerge(t *testing.T) { MaxServices: 2, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 5}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 5}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 5}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(4)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 2}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 2}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(14)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(5)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(4)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(2)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(2)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(2)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(14)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, }, { name: "no_overflow_with_histograms_in_to", @@ -65,21 +88,45 @@ func TestMerge(t *testing.T) { MaxServices: 2, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1000)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 500}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 500}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 500}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(4)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 2}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 2}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1004)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 502}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 502}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 502}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1000)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(500)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(500)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(500)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(4)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(2)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(2)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(2)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1004)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(502)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(502)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(502)). + Get() + }, }, { name: "no_overflow_with_histogram_in_from", @@ -93,21 +140,45 @@ func TestMerge(t *testing.T) { MaxServices: 2, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(4)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 2}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 2}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1000)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 500}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 500}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 500}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1004)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 502}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 502}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 502}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(4)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(2)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(2)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(2)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1000)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(500)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(500)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(500)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1004)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(502)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(502)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(502)). + Get() + }, }, { name: "no_overflow_with_histogram_in_both", @@ -121,21 +192,45 @@ func TestMerge(t *testing.T) { MaxServices: 2, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1400)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 700}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 700}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 700}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1000)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 500}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 500}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 500}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2400)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 1200}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 1200}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 1200}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1400)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(700)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(700)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(700)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1000)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(500)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(500)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(500)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(2400)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(1200)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(1200)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(1200)). + Get() + }, }, { name: "overflow_due_to_merge", @@ -149,24 +244,54 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(14)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addSpan(ts, "svc1", "", testSpan{spanName: "span2", count: 5}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(24)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). // no merge as transactions will overflow - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). // no merge as service transactions will overflow - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). // no merge as spans will overflow - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). // all transactions in from will overflow - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). // all service transactions in from will overflow - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 5}), // all spans will overflow but span.name dropped - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(14)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span2"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(24)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + // no merge as span, transaction, and service transaction will overflow + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + // all span, transaction, and service transaction from _from_ will overflow + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(5)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + Get() + }, }, { name: "to_overflowed_before_merge", @@ -180,27 +305,61 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(34)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 10}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 10}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 10}), // since max span groups per svc limit is 1, span.name will be dropped - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addSpan(ts, "svc1", "", testSpan{spanName: "span2", count: 5}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(44)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 15}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 15}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 15}), // all spans will overflow but span.name dropped - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(34)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(10)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(10)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(10)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span2"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(44)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(15)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(15), + ). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(15), + ). + Get() + }, }, { name: "from_overflowed_before_merge", @@ -214,30 +373,65 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(14)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(26)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addSpan(ts, "svc1", "", testSpan{spanName: "span2", count: 5}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn3", txnType: "type3", count: 8}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type3", count: 8}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 8}), // since max span groups per svc limit is 1, span.name will be dropped - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(40)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 5}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn3", txnType: "type3", count: 8}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type3", count: 8}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 8}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(14)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(26)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span2"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(8)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type3"}, + WithTransactionCount(8)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn3", TransactionType: "type3"}, + WithTransactionCount(8)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(40)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(13)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type3"}, + WithTransactionCount(8)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn3", TransactionType: "type3"}, + WithTransactionCount(8)). + Get() + }, }, { name: "svc_overflow", @@ -251,24 +445,60 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(14)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc2", "", testTransaction{txnName: "txn1", txnType: "type1", count: 5}). - addServiceTransaction(ts, "svc2", "", testServiceTransaction{txnType: "type1", count: 5}). - addSpan(ts, "svc2", "", testSpan{spanName: "span1", count: 5}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(24)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). - addGlobalServiceOverflowTransaction(ts, "svc2", "", testTransaction{txnName: "txn1", txnType: "type1", count: 5}). - addGlobalServiceOverflowServiceTransaction(ts, "svc2", "", testServiceTransaction{txnType: "type1", count: 5}). - addGlobalServiceOverflowSpan(ts, "svc2", "", testSpan{spanName: "span1", count: 5}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(14)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(5)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(24)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddGlobalTransactionOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + ServiceInstanceAggregationKey{}, + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(5)). + AddGlobalServiceTransactionOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + ServiceInstanceAggregationKey{}, + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(5)). + AddGlobalSpanOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + ServiceInstanceAggregationKey{}, + SpanAggregationKey{SpanName: "span1"}, + WithSpanCount(5)). + Get() + }, }, { name: "svc_overflow_only", @@ -282,16 +512,28 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(111)). - addServiceInstance(ts, "svc1", ""), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(222)). - addServiceInstance(ts, "svc2", ""), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(333)). - addServiceInstance(ts, "svc1", ""). - addGlobalServiceOverflowServiceInstance(ts, "svc2", ""), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(111)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(222)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(333)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddGlobalServiceInstanceOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + ServiceInstanceAggregationKey{}, + ). + Get() + }, }, { name: "per_svc_overflow_known_svc", @@ -305,24 +547,52 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(14)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addSpan(ts, "svc1", "", testSpan{spanName: "span2", count: 5}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(24)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}). - addSpan(ts, "svc1", "", testSpan{spanName: "span1", count: 7}). - addPerServiceOverflowTransaction(ts, "svc1", "", testTransaction{txnName: "txn2", txnType: "type2", count: 5}). - addPerServiceOverflowServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type2", count: 5}). - addPerServiceOverflowSpan(ts, "svc1", "", testSpan{spanName: "", count: 5}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(14)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span2"}, WithSpanCount(5)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(24)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddSpanOverflow(SpanAggregationKey{SpanName: ""}, WithSpanCount(5)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type2"}, + WithTransactionCount(5)). + AddTransactionOverflow( + TransactionAggregationKey{TransactionName: "txn2", TransactionType: "type2"}, + WithTransactionCount(5)). + Get() + }, }, { name: "service_instance_no_overflow", @@ -336,16 +606,25 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 2, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - addServiceInstance(ts, "svc1", "1"), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - addServiceInstance(ts, "svc1", "2"), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - addServiceInstance(ts, "svc1", "1"). - addServiceInstance(ts, "svc1", "2"), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(2)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + GetProto() + }, + expected: func() CombinedMetrics { + tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + sm := tcm.AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) + sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}) + sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + return tcm.Get() + }, }, { name: "service_instance_overflow_per_svc", @@ -359,16 +638,27 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - addServiceInstance(ts, "svc1", "1"), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - addServiceInstance(ts, "svc1", "2"), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - addServiceInstance(ts, "svc1", "1"). - addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(2)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(3)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddGlobalServiceInstanceOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + Get() + }, }, { name: "service_instance_overflow_global", @@ -382,72 +672,107 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - addServiceInstance(ts, "svc1", "1"), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - addServiceInstance(ts, "svc2", "2"), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - addServiceInstance(ts, "svc1", "1"). - addGlobalServiceOverflowServiceInstance(ts, "svc2", "2"), - ), - }, - { - name: "service_instance_overflow_per_svc_on_metrics", - limits: Limits{ - MaxSpanGroups: 100, - MaxSpanGroupsPerService: 100, - MaxTransactionGroups: 100, - MaxTransactionGroupsPerService: 100, - MaxServiceTransactionGroups: 100, - MaxServiceTransactionGroupsPerService: 100, - MaxServices: 1, - MaxServiceInstanceGroupsPerService: 1, + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + Get() }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - addTransaction(ts, "svc1", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). - addPerServiceOverflowTransaction(ts, "svc1", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"), - ), - }, - { - name: "service_instance_overflow_global_merge", - limits: Limits{ - MaxSpanGroups: 100, - MaxSpanGroupsPerService: 100, - MaxTransactionGroups: 100, - MaxTransactionGroupsPerService: 100, - MaxServiceTransactionGroups: 100, - MaxServiceTransactionGroupsPerService: 100, - MaxServices: 1, - MaxServiceInstanceGroupsPerService: 1, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(2)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(3)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddGlobalServiceInstanceOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + Get() }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). - addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). - addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - addTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). - addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - ), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). - addGlobalServiceOverflowTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - addGlobalServiceOverflowServiceInstance(ts, "svc2", "2"). - addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). - addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). - addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - ), }, + // { + // name: "service_instance_overflow_per_svc_on_metrics", + // limits: Limits{ + // MaxSpanGroups: 100, + // MaxSpanGroupsPerService: 100, + // MaxTransactionGroups: 100, + // MaxTransactionGroupsPerService: 100, + // MaxServiceTransactionGroups: 100, + // MaxServiceTransactionGroupsPerService: 100, + // MaxServices: 1, + // MaxServiceInstanceGroupsPerService: 1, + // }, + // to: func() CombinedMetrics { + // return NewTestCombinedMetrics(WithEventsTotal(1)). + // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + // AddTransaction( + // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + // WithTransactionCount(1)). + // Get() + // }, + // from: func() *aggregationpb.CombinedMetrics { + // return NewTestCombinedMetrics(WithEventsTotal(1)). + // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + // AddTransaction( + // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + // WithTransactionCount(2)). + // GetProto() + // }, + // expected: func() CombinedMetrics { + // tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + // sm := tcm.AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) + // sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + // AddTransaction( + // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + // WithTransactionCount(1)) + // sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + // AddTransactionOverflow( + // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + // WithTransactionCount(2)). + // AddGlobalServiceInstanceOverflow( + // ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, + // ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}, + // ) + // return tcm.Get() + // }, + // }, + //{ + // name: "service_instance_overflow_global_merge", + // limits: Limits{ + // MaxSpanGroups: 100, + // MaxSpanGroupsPerService: 100, + // MaxTransactionGroups: 100, + // MaxTransactionGroupsPerService: 100, + // MaxServiceTransactionGroups: 100, + // MaxServiceTransactionGroupsPerService: 100, + // MaxServices: 1, + // MaxServiceInstanceGroupsPerService: 1, + // }, + // to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). + // addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). + // addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). + // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), + // ), + // from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). + // addTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). + // addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). + // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), + // ), + // expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). + // addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). + // addGlobalServiceOverflowTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). + // addGlobalServiceOverflowServiceInstance(ts, "svc2", "2"). + // addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). + // addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). + // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), + // ), + //}, { name: "merge_with_empty_combined_metrics", limits: Limits{ @@ -459,329 +784,50 @@ func TestMerge(t *testing.T) { MaxServiceTransactionGroupsPerService: 1, MaxServices: 1, }, - to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(7)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}), - ), - from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1))), - expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(8)). - addTransaction(ts, "svc1", "", testTransaction{txnName: "txn1", txnType: "type1", count: 7}). - addServiceTransaction(ts, "svc1", "", testServiceTransaction{txnType: "type1", count: 7}), - ), + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(7)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1)).GetProto() + }, + expected: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(8)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + Get() + }, }, } { t.Run(tc.name, func(t *testing.T) { - merge(&tc.to, &tc.from, tc.limits) + cmm := combinedMetricsMerger{ + limits: tc.limits, + metrics: tc.to(), + } + cmm.merge(tc.from()) assert.Empty(t, cmp.Diff( - tc.expected, tc.to, + tc.expected(), cmm.metrics, + protocmp.Transform(), cmp.Exporter(func(reflect.Type) bool { return true }), - cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { - return a.Equal(&b) - }), )) }) } } -type testCombinedMetricsCfg struct { - eventsTotal float64 - youngestEventTimestamp time.Time -} - -type testCombinedMetricsOpt func(cfg testCombinedMetricsCfg) testCombinedMetricsCfg - -func withEventsTotal(total float64) testCombinedMetricsOpt { - return func(cfg testCombinedMetricsCfg) testCombinedMetricsCfg { - cfg.eventsTotal = total - return cfg - } -} - -func withYoungestEventTimestamp(ts time.Time) testCombinedMetricsOpt { - return func(cfg testCombinedMetricsCfg) testCombinedMetricsCfg { - cfg.youngestEventTimestamp = ts - return cfg - } -} - -type TestCombinedMetrics CombinedMetrics - -func createTestCombinedMetrics(opts ...testCombinedMetricsOpt) *TestCombinedMetrics { - var cfg testCombinedMetricsCfg - for _, opt := range opts { - cfg = opt(cfg) - } - return &TestCombinedMetrics{ - eventsTotal: cfg.eventsTotal, - youngestEventTimestamp: cfg.youngestEventTimestamp, - } -} - -type testTransaction struct { - txnName string - txnType string - eventOutcome string - faas *modelpb.Faas - count int -} - -type testServiceTransaction struct { - txnType string - count int -} - -type testSpan struct { - spanName string - destinationResource string - targetName string - targetType string - outcome string - count int -} - -func txnKeyFromTestTxn(txn testTransaction) TransactionAggregationKey { - tk := TransactionAggregationKey{ - TransactionName: txn.txnName, - TransactionType: txn.txnType, - EventOutcome: txn.eventOutcome, - } - if txn.faas != nil { - tk.FAASID = txn.faas.Id - tk.FAASName = txn.faas.Name - tk.FAASVersion = txn.faas.Version - tk.FAASTriggerType = txn.faas.TriggerType - tk.FAASColdstart.ParseBoolPtr(txn.faas.ColdStart) - } - return tk -} - -func spanKeyFromTestSpan(span testSpan) SpanAggregationKey { - return SpanAggregationKey{ - SpanName: span.spanName, - TargetName: span.targetName, - TargetType: span.targetType, - Resource: span.destinationResource, - Outcome: span.outcome, - } -} - -func (m *TestCombinedMetrics) addTransaction(timestamp time.Time, serviceName, globalLabelsStr string, txn testTransaction) *TestCombinedMetrics { - upsertSIM(m, timestamp, serviceName, globalLabelsStr, func(sim *ServiceInstanceMetrics) { - tk := txnKeyFromTestTxn(txn) - tm, ok := sim.TransactionGroups[tk] - if !ok { - tm = newTransactionMetrics() - } - for i := 0; i < txn.count; i++ { - tm.Histogram.RecordDuration(time.Second, 1) - } - sim.TransactionGroups[tk] = tm - }) - return m -} - -func (m *TestCombinedMetrics) addPerServiceOverflowTransaction(timestamp time.Time, serviceName, globalLabelsStr string, txn testTransaction) *TestCombinedMetrics { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - upsertPerServiceOverflow(m, timestamp, serviceName, func(overflow *Overflow) { - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - tk := txnKeyFromTestTxn(txn) - tm := newTransactionMetrics() - for i := 0; i < txn.count; i++ { - tm.Histogram.RecordDuration(time.Second, 1) - } - overflow.OverflowTransaction.Merge(&tm, Hasher{}.Chain(sk).Chain(sik).Chain(tk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addGlobalServiceOverflowTransaction(timestamp time.Time, serviceName, globalLabelsStr string, txn testTransaction) *TestCombinedMetrics { - upsertGlobalServiceOverflow(m, timestamp, serviceName, globalLabelsStr, func(overflow *Overflow) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - tk := txnKeyFromTestTxn(txn) - tm := newTransactionMetrics() - for i := 0; i < txn.count; i++ { - tm.Histogram.RecordDuration(time.Second, 1) - } - overflow.OverflowTransaction.Merge(&tm, Hasher{}.Chain(sk).Chain(sik).Chain(tk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addServiceTransaction(timestamp time.Time, serviceName, globalLabelsStr string, svcTxn testServiceTransaction) *TestCombinedMetrics { - upsertSIM(m, timestamp, serviceName, globalLabelsStr, func(sim *ServiceInstanceMetrics) { - stk := ServiceTransactionAggregationKey{ - TransactionType: svcTxn.txnType, - } - stm, ok := sim.ServiceTransactionGroups[stk] - if !ok { - stm = newServiceTransactionMetrics() - } - for i := 0; i < svcTxn.count; i++ { - stm.Histogram.RecordDuration(time.Second, 1) - stm.FailureCount += 0.0 - stm.SuccessCount += 1.0 - } - sim.ServiceTransactionGroups[stk] = stm - }) - return m -} - -func (m *TestCombinedMetrics) addPerServiceOverflowServiceTransaction(timestamp time.Time, serviceName, globalLabelsStr string, svcTxn testServiceTransaction) *TestCombinedMetrics { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - upsertPerServiceOverflow(m, timestamp, serviceName, func(overflow *Overflow) { - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - stk := ServiceTransactionAggregationKey{ - TransactionType: svcTxn.txnType, - } - stm := newServiceTransactionMetrics() - for i := 0; i < svcTxn.count; i++ { - stm.Histogram.RecordDuration(time.Second, 1) - stm.FailureCount += 0.0 - stm.SuccessCount += 1.0 - } - overflow.OverflowServiceTransaction.Merge(&stm, Hasher{}.Chain(sk).Chain(sik).Chain(stk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addGlobalServiceOverflowServiceTransaction(timestamp time.Time, serviceName, globalLabelsStr string, svcTxn testServiceTransaction) *TestCombinedMetrics { - upsertGlobalServiceOverflow(m, timestamp, serviceName, globalLabelsStr, func(overflow *Overflow) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - stk := ServiceTransactionAggregationKey{ - TransactionType: svcTxn.txnType, - } - stm := newServiceTransactionMetrics() - for i := 0; i < svcTxn.count; i++ { - stm.Histogram.RecordDuration(time.Second, 1) - stm.FailureCount += 0.0 - stm.SuccessCount += 1.0 - } - overflow.OverflowServiceTransaction.Merge(&stm, Hasher{}.Chain(sk).Chain(sik).Chain(stk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addSpan(timestamp time.Time, serviceName, globalLabelsStr string, span testSpan) *TestCombinedMetrics { - upsertSIM(m, timestamp, serviceName, globalLabelsStr, func(sim *ServiceInstanceMetrics) { - spk := spanKeyFromTestSpan(span) - spm := sim.SpanGroups[spk] - for i := 0; i < span.count; i++ { - spm.Count++ - spm.Sum++ - } - sim.SpanGroups[spk] = spm - }) - return m -} - -func (m *TestCombinedMetrics) addPerServiceOverflowSpan(timestamp time.Time, serviceName, globalLabelsStr string, span testSpan) *TestCombinedMetrics { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - upsertPerServiceOverflow(m, timestamp, serviceName, func(overflow *Overflow) { - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - spk := spanKeyFromTestSpan(span) - spm := SpanMetrics{} - for i := 0; i < span.count; i++ { - spm.Count++ - spm.Sum++ - } - overflow.OverflowSpan.Merge(&spm, Hasher{}.Chain(sk).Chain(sik).Chain(spk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addGlobalServiceOverflowSpan(timestamp time.Time, serviceName, globalLabelsStr string, span testSpan) *TestCombinedMetrics { - upsertGlobalServiceOverflow(m, timestamp, serviceName, globalLabelsStr, func(overflow *Overflow) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - spk := spanKeyFromTestSpan(span) - spm := SpanMetrics{} - for i := 0; i < span.count; i++ { - spm.Count++ - spm.Sum++ - } - overflow.OverflowSpan.Merge(&spm, Hasher{}.Chain(sk).Chain(sik).Chain(spk).Sum()) - }) - return m -} - -func (m *TestCombinedMetrics) addServiceInstance(timestamp time.Time, serviceName, globalLabelsStr string) *TestCombinedMetrics { - upsertSIM(m, timestamp, serviceName, globalLabelsStr, func(_ *ServiceInstanceMetrics) {}) - return m -} - -func (m *TestCombinedMetrics) addGlobalServiceOverflowServiceInstance(timestamp time.Time, serviceName, globalLabelsStr string) *TestCombinedMetrics { - upsertGlobalServiceOverflow(m, timestamp, serviceName, globalLabelsStr, func(_ *Overflow) {}) - return m -} - -func upsertSIM(cm *TestCombinedMetrics, timestamp time.Time, serviceName, globalLabelsStr string, updater func(sim *ServiceInstanceMetrics)) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sm, ok := cm.Services[sk] - if !ok { - sm = newServiceMetrics() - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - sim, ok := sm.ServiceInstanceGroups[sik] - if !ok { - sim = newServiceInstanceMetrics() - } - updater(&sim) - sm.ServiceInstanceGroups[sik] = sim - if cm.Services == nil { - cm.Services = make(map[ServiceAggregationKey]ServiceMetrics) - } - cm.Services[sk] = sm -} - -func upsertPerServiceOverflow(cm *TestCombinedMetrics, timestamp time.Time, serviceName string, updater func(overflow *Overflow)) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sm, ok := cm.Services[sk] - if !ok { - sm = newServiceMetrics() - } - updater(&sm.OverflowGroups) - if cm.Services == nil { - cm.Services = make(map[ServiceAggregationKey]ServiceMetrics) - } - cm.Services[sk] = sm -} - -func upsertGlobalServiceOverflow(cm *TestCombinedMetrics, timestamp time.Time, serviceName, globalLabelsStr string, updater func(overflow *Overflow)) { - sk := ServiceAggregationKey{ - Timestamp: timestamp, - ServiceName: serviceName, - } - sik := ServiceInstanceAggregationKey{GlobalLabelsStr: globalLabelsStr} - updater(&cm.OverflowServices) - insertHash(&cm.OverflowServiceInstancesEstimator, Hasher{}.Chain(sk).Chain(sik).Sum()) -} - func TestCardinalityEstimationOnSubKeyCollision(t *testing.T) { limits := Limits{ MaxSpanGroups: 100, @@ -793,21 +839,41 @@ func TestCardinalityEstimationOnSubKeyCollision(t *testing.T) { MaxServices: 1, } ts := time.Time{} - to := CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(0)). - addServiceInstance(ts, "svc1", "")) - from1 := CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc2", "", testTransaction{txnName: "txn1", txnType: "type1", count: 5}). - addServiceTransaction(ts, "svc2", "", testServiceTransaction{txnType: "type1", count: 5}). - addSpan(ts, "svc2", "", testSpan{spanName: "", count: 5}), - ) - from2 := CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(10)). - addTransaction(ts, "svc3", "", testTransaction{txnName: "txn1", txnType: "type1", count: 5}). - addServiceTransaction(ts, "svc3", "", testServiceTransaction{txnType: "type1", count: 5}). - addSpan(ts, "svc3", "", testSpan{spanName: "", count: 5}), - ) - merge(&to, &from1, limits) - merge(&to, &from2, limits) - assert.Equal(t, uint64(2), to.OverflowServices.OverflowTransaction.Estimator.Estimate()) - assert.Equal(t, uint64(2), to.OverflowServices.OverflowServiceTransaction.Estimator.Estimate()) - assert.Equal(t, uint64(2), to.OverflowServices.OverflowSpan.Estimator.Estimate()) + to := NewTestCombinedMetrics(WithEventsTotal(0)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + Get() + from1 := NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{}, WithSpanCount(5)). + AddTransaction(TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(5)). + AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: "type1", + }, WithTransactionCount(5)). + GetProto() + from2 := NewTestCombinedMetrics(WithEventsTotal(10)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc3"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{}, WithSpanCount(5)). + AddTransaction(TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(5)). + AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: "type1", + }, WithTransactionCount(5)). + GetProto() + cmm := combinedMetricsMerger{ + limits: limits, + metrics: to, + } + cmm.merge(from1) + cmm.merge(from2) + assert.Equal(t, uint64(2), cmm.metrics.OverflowServices.OverflowTransaction.Estimator.Estimate()) + assert.Equal(t, uint64(2), cmm.metrics.OverflowServices.OverflowServiceTransaction.Estimator.Estimate()) + assert.Equal(t, uint64(2), cmm.metrics.OverflowServices.OverflowSpan.Estimator.Estimate()) } diff --git a/aggregators/models.go b/aggregators/models.go index b62a301..47a8aaf 100644 --- a/aggregators/models.go +++ b/aggregators/models.go @@ -5,13 +5,11 @@ package aggregators import ( - "encoding/binary" "time" "github.com/axiomhq/hyperloglog" - "github.com/cespare/xxhash/v2" - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-aggregation/aggregationpb" "github.com/elastic/apm-aggregation/aggregators/nullable" "github.com/elastic/apm-data/model/modelpb" ) @@ -95,15 +93,15 @@ type CombinedMetrics struct { // max service instances per service limit. OverflowServiceInstancesEstimator *hyperloglog.Sketch - // eventsTotal is the total number of individual events, including + // EventsTotal is the total number of individual events, including // all overflows, that were aggregated for this combined metrics. It // is used for internal monitoring purposes and is approximated when // partitioning is enabled. - eventsTotal float64 + EventsTotal float64 - // youngestEventTimestamp is the youngest event that was aggregated + // YoungestEventTimestamp is the youngest event that was aggregated // in the combined metrics based on the received timestamp. - youngestEventTimestamp time.Time + YoungestEventTimestamp uint64 } // ServiceAggregationKey models the key used to store service specific @@ -116,19 +114,6 @@ type ServiceAggregationKey struct { AgentName string } -// Hash returns a xxhash.Digest after hashing the aggregation key on top of h. -func (k ServiceAggregationKey) Hash(h xxhash.Digest) xxhash.Digest { - var buf [8]byte - binary.LittleEndian.PutUint64(buf[:], uint64(k.Timestamp.UnixNano())) - h.Write(buf[:]) - - h.WriteString(k.ServiceName) - h.WriteString(k.ServiceEnvironment) - h.WriteString(k.ServiceLanguageName) - h.WriteString(k.AgentName) - return h -} - // ServiceMetrics models the value to store all the aggregated metrics // for a specific service aggregation key. type ServiceMetrics struct { @@ -142,47 +127,52 @@ type ServiceInstanceAggregationKey struct { GlobalLabelsStr string } -// Hash returns a xxhash.Digest after hashing the aggregation key on top of h. -func (k ServiceInstanceAggregationKey) Hash(h xxhash.Digest) xxhash.Digest { - h.WriteString(k.GlobalLabelsStr) - return h -} - // ServiceInstanceMetrics models the value to store all the aggregated metrics // for a specific service instance aggregation key. type ServiceInstanceMetrics struct { - TransactionGroups map[TransactionAggregationKey]TransactionMetrics - ServiceTransactionGroups map[ServiceTransactionAggregationKey]ServiceTransactionMetrics - SpanGroups map[SpanAggregationKey]SpanMetrics + TransactionGroups map[TransactionAggregationKey]*aggregationpb.KeyedTransactionMetrics + ServiceTransactionGroups map[ServiceTransactionAggregationKey]*aggregationpb.KeyedServiceTransactionMetrics + SpanGroups map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics } -func insertHash(estimator **hyperloglog.Sketch, hash uint64) { - if *estimator == nil { - *estimator = hyperloglog.New14() +func insertHash(to **hyperloglog.Sketch, hash uint64) { + if *to == nil { + *to = hyperloglog.New14() } - (*estimator).InsertHash(hash) + (*to).InsertHash(hash) } func mergeEstimator(to **hyperloglog.Sketch, from *hyperloglog.Sketch) { if *to == nil { *to = hyperloglog.New14() } + // Ignoring returned error here since the error is only returned if + // the precision is set outside bounds which is not possible for our case. (*to).Merge(from) } type OverflowTransaction struct { - Metrics TransactionMetrics + Metrics *aggregationpb.TransactionMetrics Estimator *hyperloglog.Sketch } -func (o *OverflowTransaction) Merge(from *TransactionMetrics, hash uint64) { - o.Metrics.Merge(from) +func (o *OverflowTransaction) Merge( + from *aggregationpb.TransactionMetrics, + hash uint64, +) { + if o.Metrics == nil { + o.Metrics = aggregationpb.TransactionMetricsFromVTPool() + } + mergeTransactionMetrics(o.Metrics, from) insertHash(&o.Estimator, hash) } func (o *OverflowTransaction) MergeOverflow(from *OverflowTransaction) { if from.Estimator != nil { - o.Metrics.Merge(&from.Metrics) + if o.Metrics == nil { + o.Metrics = aggregationpb.TransactionMetricsFromVTPool() + } + mergeTransactionMetrics(o.Metrics, from.Metrics) mergeEstimator(&o.Estimator, from.Estimator) } } @@ -192,18 +182,27 @@ func (o *OverflowTransaction) Empty() bool { } type OverflowServiceTransaction struct { - Metrics ServiceTransactionMetrics + Metrics *aggregationpb.ServiceTransactionMetrics Estimator *hyperloglog.Sketch } -func (o *OverflowServiceTransaction) Merge(from *ServiceTransactionMetrics, hash uint64) { - o.Metrics.Merge(from) +func (o *OverflowServiceTransaction) Merge( + from *aggregationpb.ServiceTransactionMetrics, + hash uint64, +) { + if o.Metrics == nil { + o.Metrics = aggregationpb.ServiceTransactionMetricsFromVTPool() + } + mergeServiceTransactionMetrics(o.Metrics, from) insertHash(&o.Estimator, hash) } func (o *OverflowServiceTransaction) MergeOverflow(from *OverflowServiceTransaction) { if from.Estimator != nil { - o.Metrics.Merge(&from.Metrics) + if o.Metrics == nil { + o.Metrics = aggregationpb.ServiceTransactionMetricsFromVTPool() + } + mergeServiceTransactionMetrics(o.Metrics, from.Metrics) mergeEstimator(&o.Estimator, from.Estimator) } } @@ -213,18 +212,27 @@ func (o *OverflowServiceTransaction) Empty() bool { } type OverflowSpan struct { - Metrics SpanMetrics + Metrics *aggregationpb.SpanMetrics Estimator *hyperloglog.Sketch } -func (o *OverflowSpan) Merge(from *SpanMetrics, hash uint64) { - o.Metrics.Merge(from) +func (o *OverflowSpan) Merge( + from *aggregationpb.SpanMetrics, + hash uint64, +) { + if o.Metrics == nil { + o.Metrics = aggregationpb.SpanMetricsFromVTPool() + } + mergeSpanMetrics(o.Metrics, from) insertHash(&o.Estimator, hash) } func (o *OverflowSpan) MergeOverflow(from *OverflowSpan) { if from.Estimator != nil { - o.Metrics.Merge(&from.Metrics) + if o.Metrics == nil { + o.Metrics = aggregationpb.SpanMetricsFromVTPool() + } + mergeSpanMetrics(o.Metrics, from.Metrics) mergeEstimator(&o.Estimator, from.Estimator) } } @@ -283,66 +291,6 @@ type TransactionAggregationKey struct { CloudProjectName string } -// Hash returns a xxhash.Digest after hashing the aggregation key on top of h. -func (k TransactionAggregationKey) Hash(h xxhash.Digest) xxhash.Digest { - if k.TraceRoot { - h.WriteString("1") - } - - h.WriteString(k.ContainerID) - h.WriteString(k.KubernetesPodName) - - h.WriteString(k.ServiceVersion) - h.WriteString(k.ServiceNodeName) - - h.WriteString(k.ServiceRuntimeName) - h.WriteString(k.ServiceRuntimeVersion) - h.WriteString(k.ServiceLanguageVersion) - - h.WriteString(k.HostHostname) - h.WriteString(k.HostName) - h.WriteString(k.HostOSPlatform) - - h.WriteString(k.EventOutcome) - - h.WriteString(k.TransactionName) - h.WriteString(k.TransactionType) - h.WriteString(k.TransactionResult) - - if k.FAASColdstart == nullable.True { - h.WriteString("1") - } - h.WriteString(k.FAASID) - h.WriteString(k.FAASName) - h.WriteString(k.FAASVersion) - h.WriteString(k.FAASTriggerType) - - h.WriteString(k.CloudProvider) - h.WriteString(k.CloudRegion) - h.WriteString(k.CloudAvailabilityZone) - h.WriteString(k.CloudServiceName) - h.WriteString(k.CloudAccountID) - h.WriteString(k.CloudAccountName) - h.WriteString(k.CloudMachineType) - h.WriteString(k.CloudProjectID) - h.WriteString(k.CloudProjectName) - return h -} - -// TransactionMetrics models the aggregated metric for each unique -// transaction metrics key. TransactionMetrics is designed to use -// two different data structures depending on the number of transactions -// getting aggregated. For lower number of transactions (< 255), a slice -// is used. The slice is promoted to a histogram if the number of entries -// exceed the limit for the slice data structure. -type TransactionMetrics struct { - Histogram *hdrhistogram.HistogramRepresentation -} - -func (m *TransactionMetrics) Merge(from *TransactionMetrics) { - mergeTransactionMetrics(m, from) -} - // SpanAggregationKey models the key used to store span aggregation metrics. type SpanAggregationKey struct { SpanName string @@ -354,55 +302,15 @@ type SpanAggregationKey struct { Resource string } -// Hash returns a xxhash.Digest after hashing the aggregation key on top of h. -func (k SpanAggregationKey) Hash(h xxhash.Digest) xxhash.Digest { - h.WriteString(k.SpanName) - h.WriteString(k.Outcome) - - h.WriteString(k.TargetType) - h.WriteString(k.TargetName) - - h.WriteString(k.Resource) - return h -} - -// SpanMetrics models the aggregated metric for each unique span metrics key. -type SpanMetrics struct { - Count float64 - Sum float64 -} - -func (m *SpanMetrics) Merge(from *SpanMetrics) { - mergeSpanMetrics(m, from) -} - // ServiceTransactionAggregationKey models the key used to store // service transaction aggregation metrics. type ServiceTransactionAggregationKey struct { TransactionType string } -// Hash returns a xxhash.Digest after hashing the aggregation key on top of h. -func (k ServiceTransactionAggregationKey) Hash(h xxhash.Digest) xxhash.Digest { - h.WriteString(k.TransactionType) - return h -} - -// ServiceTransactionMetrics models the value to store all the aggregated metrics -// for a specific service transaction aggregation key. -type ServiceTransactionMetrics struct { - Histogram *hdrhistogram.HistogramRepresentation - FailureCount float64 - SuccessCount float64 -} - -func (m *ServiceTransactionMetrics) Merge(from *ServiceTransactionMetrics) { - mergeServiceTransactionMetrics(m, from) -} - -// GlobalLabels is an intermediate struct used to marshal/unmarshal the provided -// global labels into a comparable format. The format is used by pebble db to -// compare service aggregation keys. +// GlobalLabels is an intermediate struct used to marshal/unmarshal the +// provided global labels into a comparable format. The format is used by +// pebble db to compare service aggregation keys. type GlobalLabels struct { Labels modelpb.Labels NumericLabels modelpb.NumericLabels From abce21f7527575b065167d15f78e1322a60e0431 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 17:19:00 +0800 Subject: [PATCH 02/13] Fix tests --- aggregators/aggregator_test.go | 1132 +++++++++++++------------- aggregators/combined_metrics_test.go | 320 ++++---- aggregators/converter_test.go | 498 +++++------ aggregators/merger.go | 34 +- aggregators/merger_test.go | 58 +- 5 files changed, 1027 insertions(+), 1015 deletions(-) diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go index dc2b24d..cea30e8 100644 --- a/aggregators/aggregator_test.go +++ b/aggregators/aggregator_test.go @@ -9,6 +9,7 @@ import ( "fmt" "math/rand" "net/netip" + "sort" "strings" "sync/atomic" "testing" @@ -27,11 +28,14 @@ import ( apmmodel "go.elastic.co/apm/v2/model" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" "golang.org/x/sync/errgroup" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" + "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" "github.com/elastic/apm-data/model/modelpb" ) @@ -41,581 +45,561 @@ func TestNew(t *testing.T) { assert.NotNil(t, agg) } -// func TestAggregateBatch(t *testing.T) { -// exp := tracetest.NewInMemoryExporter() -// tp := sdktrace.NewTracerProvider( -// sdktrace.WithSyncer(exp), -// ) -// gatherer, err := apmotel.NewGatherer() -// require.NoError(t, err) -// mp := metric.NewMeterProvider(metric.WithReader(gatherer)) -// -// cmID := EncodeToCombinedMetricsKeyID(t, "ab01") -// txnDuration := 100 * time.Millisecond -// uniqueEventCount := 100 // for each of txns and spans -// uniqueServices := 10 -// repCount := 5 -// ts := time.Date(2022, 12, 31, 0, 0, 0, 0, time.UTC) -// batch := make(modelpb.Batch, 0, uniqueEventCount*repCount*2) -// // Distribute the total unique transaction count amongst the total -// // unique services uniformly. -// for i := 0; i < uniqueEventCount*repCount; i++ { -// batch = append(batch, &modelpb.APMEvent{ -// Event: &modelpb.Event{ -// Outcome: "success", -// Duration: durationpb.New(txnDuration), -// Received: timestamppb.New(ts), -// }, -// Transaction: &modelpb.Transaction{ -// Name: fmt.Sprintf("foo%d", i%uniqueEventCount), -// Type: fmt.Sprintf("txtype%d", i%uniqueEventCount), -// RepresentativeCount: 1, -// DroppedSpansStats: []*modelpb.DroppedSpanStats{ -// { -// DestinationServiceResource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), -// Outcome: "success", -// Duration: &modelpb.AggregatedDuration{ -// Count: 1, -// Sum: durationpb.New(10 * time.Millisecond), -// }, -// }, -// }, -// }, -// Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, -// }) -// batch = append(batch, &modelpb.APMEvent{ -// Event: &modelpb.Event{ -// Received: timestamppb.New(ts), -// }, -// Span: &modelpb.Span{ -// Name: fmt.Sprintf("bar%d", i%uniqueEventCount), -// Type: "type", -// RepresentativeCount: 1, -// DestinationService: &modelpb.DestinationService{ -// Resource: "test_dest", -// }, -// }, -// Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, -// }) -// } -// -// out := make(chan CombinedMetrics, 1) -// aggIvl := time.Minute -// agg, err := New( -// WithDataDir(t.TempDir()), -// WithLimits(Limits{ -// MaxSpanGroups: 1000, -// MaxSpanGroupsPerService: 100, -// MaxTransactionGroups: 100, -// MaxTransactionGroupsPerService: 10, -// MaxServiceTransactionGroups: 100, -// MaxServiceTransactionGroupsPerService: 10, -// MaxServices: 10, -// MaxServiceInstanceGroupsPerService: 10, -// }), -// WithProcessor(combinedMetricsProcessor(out)), -// WithAggregationIntervals([]time.Duration{aggIvl}), -// WithHarvestDelay(time.Hour), // disable auto harvest -// WithTracer(tp.Tracer("test")), -// WithMeter(mp.Meter("test")), -// WithCombinedMetricsIDToKVs(func(id [16]byte) []attribute.KeyValue { -// return []attribute.KeyValue{attribute.String("id_key", string(id[:]))} -// }), -// ) -// require.NoError(t, err) -// -// require.NoError(t, agg.AggregateBatch(context.Background(), cmID, &batch)) -// require.NoError(t, agg.Stop(context.Background())) -// var cm CombinedMetrics -// select { -// case cm = <-out: -// default: -// t.Error("failed to get aggregated metrics") -// t.FailNow() -// } -// -// var span tracetest.SpanStub -// for _, s := range exp.GetSpans() { -// if s.Name == "AggregateBatch" { -// span = s -// } -// } -// assert.NotNil(t, span) -// -// expectedCombinedMetrics := CombinedMetrics{ -// Services: make(map[ServiceAggregationKey]ServiceMetrics), -// eventsTotal: float64(len(batch)), -// youngestEventTimestamp: ts, -// } -// expectedMeasurements := []apmmodel.Metrics{ -// { -// Samples: map[string]apmmodel.Metric{ -// "aggregator.requests.total": {Value: 1}, -// "aggregator.bytes.ingested": {Value: 133750}, -// }, -// Labels: apmmodel.StringMap{ -// apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, -// }, -// }, -// { -// Samples: map[string]apmmodel.Metric{ -// "aggregator.events.total": {Value: float64(len(batch))}, -// "aggregator.events.processed": {Value: float64(len(batch))}, -// "events.processing-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, -// "events.queued-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, -// }, -// Labels: apmmodel.StringMap{ -// apmmodel.StringMapItem{Key: aggregationIvlKey, Value: formatDuration(aggIvl)}, -// apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, -// }, -// }, -// } -// sik := ServiceInstanceAggregationKey{GlobalLabelsStr: ""} -// for i := 0; i < uniqueEventCount*repCount; i++ { -// svcKey := ServiceAggregationKey{ -// Timestamp: time.Unix(0, 0).UTC(), -// ServiceName: fmt.Sprintf("svc%d", i%uniqueServices), -// } -// txKey := TransactionAggregationKey{ -// TraceRoot: true, -// TransactionName: fmt.Sprintf("foo%d", i%uniqueEventCount), -// TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), -// EventOutcome: "success", -// } -// stxKey := ServiceTransactionAggregationKey{ -// TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), -// } -// spanKey := SpanAggregationKey{ -// SpanName: fmt.Sprintf("bar%d", i%uniqueEventCount), -// Resource: "test_dest", -// } -// if _, ok := expectedCombinedMetrics.Services[svcKey]; !ok { -// expectedCombinedMetrics.Services[svcKey] = newServiceMetrics() -// } -// if _, ok := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik]; !ok { -// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik] = newServiceInstanceMetrics() -// } -// var ok bool -// var tm TransactionMetrics -// if tm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey]; !ok { -// tm = newTransactionMetrics() -// } -// tm.Histogram.RecordDuration(txnDuration, 1) -// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].TransactionGroups[txKey] = tm -// var stm ServiceTransactionMetrics -// if stm, ok = expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey]; !ok { -// stm = newServiceTransactionMetrics() -// } -// stm.Histogram.RecordDuration(txnDuration, 1) -// stm.SuccessCount++ -// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].ServiceTransactionGroups[stxKey] = stm -// sm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] -// sm.Count++ -// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[spanKey] = sm -// -// droppedSpanStatsKey := SpanAggregationKey{ -// SpanName: "", -// Resource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), -// Outcome: "success", -// } -// dssm := expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] -// dssm.Count++ -// dssm.Sum += float64(10 * time.Millisecond) -// expectedCombinedMetrics.Services[svcKey].ServiceInstanceGroups[sik].SpanGroups[droppedSpanStatsKey] = dssm -// } -// assert.Empty(t, cmp.Diff( -// expectedCombinedMetrics, cm, -// cmpopts.EquateEmpty(), -// cmpopts.EquateApprox(0, 0.01), -// cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { -// return a.Equal(&b) -// }), -// cmp.AllowUnexported(CombinedMetrics{}), -// )) -// assert.Empty(t, cmp.Diff( -// expectedMeasurements, -// gatherMetrics( -// gatherer, -// withIgnoreMetricPrefix("pebble."), -// withZeroHistogramValues(true), -// ), -// cmpopts.IgnoreUnexported(apmmodel.Time{}), -// cmpopts.EquateApprox(0, 0.01), -// )) -// } +func TestAggregateBatch(t *testing.T) { + exp := tracetest.NewInMemoryExporter() + tp := sdktrace.NewTracerProvider( + sdktrace.WithSyncer(exp), + ) + gatherer, err := apmotel.NewGatherer() + require.NoError(t, err) + mp := metric.NewMeterProvider(metric.WithReader(gatherer)) + + cmID := EncodeToCombinedMetricsKeyID(t, "ab01") + eventDuration := 100 * time.Millisecond + dssDuration := 10 * time.Millisecond + uniqueEventCount := 100 // for each of txns and spans + uniqueServices := 10 + repCount := 5 + ts := time.Date(2022, 12, 31, 0, 0, 0, 0, time.UTC) + batch := make(modelpb.Batch, 0, uniqueEventCount*repCount*2) + // Distribute the total unique transaction count amongst the total + // unique services uniformly. + for i := 0; i < uniqueEventCount*repCount; i++ { + batch = append(batch, &modelpb.APMEvent{ + Event: &modelpb.Event{ + Outcome: "success", + Duration: durationpb.New(eventDuration), + Received: timestamppb.New(ts), + }, + Transaction: &modelpb.Transaction{ + Name: fmt.Sprintf("foo%d", i%uniqueEventCount), + Type: fmt.Sprintf("txtype%d", i%uniqueEventCount), + RepresentativeCount: 1, + DroppedSpansStats: []*modelpb.DroppedSpanStats{ + { + DestinationServiceResource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), + Outcome: "success", + Duration: &modelpb.AggregatedDuration{ + Count: 1, + Sum: durationpb.New(dssDuration), + }, + }, + }, + }, + Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, + }) + batch = append(batch, &modelpb.APMEvent{ + Event: &modelpb.Event{ + Duration: durationpb.New(eventDuration), + Received: timestamppb.New(ts), + }, + Span: &modelpb.Span{ + Name: fmt.Sprintf("bar%d", i%uniqueEventCount), + Type: "type", + RepresentativeCount: 1, + DestinationService: &modelpb.DestinationService{ + Resource: "test_dest", + }, + }, + Service: &modelpb.Service{Name: fmt.Sprintf("svc%d", i%uniqueServices)}, + }) + } + + out := make(chan CombinedMetrics, 1) + aggIvl := time.Minute + agg, err := New( + WithDataDir(t.TempDir()), + WithLimits(Limits{ + MaxSpanGroups: 1000, + MaxSpanGroupsPerService: 100, + MaxTransactionGroups: 100, + MaxTransactionGroupsPerService: 10, + MaxServiceTransactionGroups: 100, + MaxServiceTransactionGroupsPerService: 10, + MaxServices: 10, + MaxServiceInstanceGroupsPerService: 10, + }), + WithProcessor(combinedMetricsProcessor(out)), + WithAggregationIntervals([]time.Duration{aggIvl}), + WithHarvestDelay(time.Hour), // disable auto harvest + WithTracer(tp.Tracer("test")), + WithMeter(mp.Meter("test")), + WithCombinedMetricsIDToKVs(func(id [16]byte) []attribute.KeyValue { + return []attribute.KeyValue{attribute.String("id_key", string(id[:]))} + }), + ) + require.NoError(t, err) + + require.NoError(t, agg.AggregateBatch(context.Background(), cmID, &batch)) + require.NoError(t, agg.Stop(context.Background())) + var cm CombinedMetrics + select { + case cm = <-out: + default: + t.Error("failed to get aggregated metrics") + t.FailNow() + } + + var span tracetest.SpanStub + for _, s := range exp.GetSpans() { + if s.Name == "AggregateBatch" { + span = s + } + } + assert.NotNil(t, span) + + expectedCombinedMetrics := NewTestCombinedMetrics( + WithEventsTotal(float64(len(batch))), + WithYoungestEventTimestamp(ts), + ) + expectedMeasurements := []apmmodel.Metrics{ + { + Samples: map[string]apmmodel.Metric{ + "aggregator.requests.total": {Value: 1}, + "aggregator.bytes.ingested": {Value: 138250}, + }, + Labels: apmmodel.StringMap{ + apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, + }, + }, + { + Samples: map[string]apmmodel.Metric{ + "aggregator.events.total": {Value: float64(len(batch))}, + "aggregator.events.processed": {Value: float64(len(batch))}, + "events.processing-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, + "events.queued-delay": {Type: "histogram", Counts: []uint64{1}, Values: []float64{0}}, + }, + Labels: apmmodel.StringMap{ + apmmodel.StringMapItem{Key: aggregationIvlKey, Value: formatDuration(aggIvl)}, + apmmodel.StringMapItem{Key: "id_key", Value: string(cmID[:])}, + }, + }, + } + sik := ServiceInstanceAggregationKey{GlobalLabelsStr: ""} + for i := 0; i < uniqueEventCount*repCount; i++ { + svcKey := ServiceAggregationKey{ + Timestamp: time.Unix(0, 0).UTC(), + ServiceName: fmt.Sprintf("svc%d", i%uniqueServices), + } + txKey := TransactionAggregationKey{ + TraceRoot: true, + TransactionName: fmt.Sprintf("foo%d", i%uniqueEventCount), + TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), + EventOutcome: "success", + } + stxKey := ServiceTransactionAggregationKey{ + TransactionType: fmt.Sprintf("txtype%d", i%uniqueEventCount), + } + spanKey := SpanAggregationKey{ + SpanName: fmt.Sprintf("bar%d", i%uniqueEventCount), + Resource: "test_dest", + } + dssKey := SpanAggregationKey{ + SpanName: "", + Resource: fmt.Sprintf("dropped_dest_resource%d", i%uniqueEventCount), + Outcome: "success", + } + expectedCombinedMetrics. + AddServiceMetrics(svcKey). + AddServiceInstanceMetrics(sik). + AddTransaction(txKey, WithTransactionDuration(eventDuration)). + AddServiceTransaction(stxKey, WithTransactionDuration(eventDuration)). + AddSpan(spanKey, WithSpanDuration(eventDuration)). + AddSpan(dssKey, WithSpanDuration(dssDuration)) + } + assert.Empty(t, cmp.Diff( + expectedCombinedMetrics.Get(), cm, + cmpopts.EquateEmpty(), + cmpopts.EquateApprox(0, 0.01), + cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { + return a.Equal(&b) + }), + protocmp.Transform(), + )) + assert.Empty(t, cmp.Diff( + expectedMeasurements, + gatherMetrics( + gatherer, + withIgnoreMetricPrefix("pebble."), + withZeroHistogramValues(true), + ), + cmpopts.IgnoreUnexported(apmmodel.Time{}), + cmpopts.EquateApprox(0, 0.01), + )) +} -// func TestAggregateSpanMetrics(t *testing.T) { -// type input struct { -// serviceName string -// agentName string -// destination string -// targetType string -// targetName string -// outcome string -// representativeCount float64 -// } -// -// destinationX := "destination-X" -// destinationZ := "destination-Z" -// trgTypeX := "trg-type-X" -// trgNameX := "trg-name-X" -// trgTypeZ := "trg-type-Z" -// trgNameZ := "trg-name-Z" -// defaultLabels := modelpb.Labels{ -// "department_name": &modelpb.LabelValue{Global: true, Value: "apm"}, -// "organization": &modelpb.LabelValue{Global: true, Value: "observability"}, -// "company": &modelpb.LabelValue{Global: true, Value: "elastic"}, -// } -// defaultNumericLabels := modelpb.NumericLabels{ -// "user_id": &modelpb.NumericLabelValue{Global: true, Value: 100}, -// "cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10}, -// } -// -// for _, tt := range []struct { -// name string -// inputs []input -// getExpectedEvents func(time.Time, time.Duration, time.Duration, int) []*modelpb.APMEvent -// }{ -// { -// name: "with destination and service targets", -// inputs: []input{ -// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 2}, -// {serviceName: "service-A", agentName: "java", destination: destinationX, targetType: trgTypeX, targetName: trgNameX, outcome: "success", representativeCount: 1}, -// {serviceName: "service-B", agentName: "python", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, -// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, -// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 0}, -// {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "failure", representativeCount: 1}, -// }, -// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { -// return []*modelpb.APMEvent{ -// { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// }, -// Metricset: &modelpb.Metricset{ -// Name: "service_summary", -// Interval: formatDuration(ivl), -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "python"}, -// Service: &modelpb.Service{ -// Name: "service-B", -// }, -// Metricset: &modelpb.Metricset{ -// Name: "service_summary", -// Interval: formatDuration(ivl), -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// Target: &modelpb.ServiceTarget{ -// Type: trgTypeX, -// Name: trgNameX, -// }, -// }, -// Event: &modelpb.Event{Outcome: "success"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(count), -// }, -// Span: &modelpb.Span{ -// Name: "service-A:" + destinationX, -// DestinationService: &modelpb.DestinationService{ -// Resource: destinationX, -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(count), -// Sum: durationpb.New(time.Duration(count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// Target: &modelpb.ServiceTarget{ -// Type: trgTypeZ, -// Name: trgNameZ, -// }, -// }, -// Event: &modelpb.Event{Outcome: "failure"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(count), -// }, -// Span: &modelpb.Span{ -// Name: "service-A:" + destinationZ, -// DestinationService: &modelpb.DestinationService{ -// Resource: destinationZ, -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(count), -// Sum: durationpb.New(time.Duration(count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// Target: &modelpb.ServiceTarget{ -// Type: trgTypeZ, -// Name: trgNameZ, -// }, -// }, -// Event: &modelpb.Event{Outcome: "success"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(3 * count), -// }, -// Span: &modelpb.Span{ -// Name: "service-A:" + destinationZ, -// DestinationService: &modelpb.DestinationService{ -// Resource: destinationZ, -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(3 * count), -// Sum: durationpb.New(time.Duration(3*count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "python"}, -// Service: &modelpb.Service{ -// Name: "service-B", -// Target: &modelpb.ServiceTarget{ -// Type: trgTypeZ, -// Name: trgNameZ, -// }, -// }, -// Event: &modelpb.Event{Outcome: "success"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(count), -// }, -// Span: &modelpb.Span{ -// Name: "service-B:" + destinationZ, -// DestinationService: &modelpb.DestinationService{ -// Resource: destinationZ, -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(count), -// Sum: durationpb.New(time.Duration(count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, -// } -// }, -// }, { -// name: "with_no_destination_and_no_service_target", -// inputs: []input{ -// {serviceName: "service-A", agentName: "java", outcome: "success", representativeCount: 1}, -// }, -// getExpectedEvents: func(_ time.Time, _, _ time.Duration, _ int) []*modelpb.APMEvent { -// return nil -// }, -// }, { -// name: "with no destination and a service target", -// inputs: []input{ -// {serviceName: "service-A", agentName: "java", targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, -// }, -// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { -// return []*modelpb.APMEvent{ -// { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// }, -// Metricset: &modelpb.Metricset{ -// Name: "service_summary", -// Interval: formatDuration(ivl), -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// Target: &modelpb.ServiceTarget{ -// Type: trgTypeZ, -// Name: trgNameZ, -// }, -// }, -// Event: &modelpb.Event{Outcome: "success"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(count), -// }, -// Span: &modelpb.Span{ -// Name: "service-A:", -// DestinationService: &modelpb.DestinationService{ -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(count), -// Sum: durationpb.New(time.Duration(count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, -// } -// }, -// }, { -// name: "with a destination and no service target", -// inputs: []input{ -// {serviceName: "service-A", agentName: "java", destination: destinationZ, outcome: "success", representativeCount: 1}, -// }, -// getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { -// return []*modelpb.APMEvent{ -// { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// }, -// Metricset: &modelpb.Metricset{ -// Name: "service_summary", -// Interval: formatDuration(ivl), -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, { -// Timestamp: timestamppb.New(ts.Truncate(ivl)), -// Agent: &modelpb.Agent{Name: "java"}, -// Service: &modelpb.Service{ -// Name: "service-A", -// }, -// Event: &modelpb.Event{Outcome: "success"}, -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// DocCount: int64(count), -// }, -// Span: &modelpb.Span{ -// Name: "service-A:" + destinationZ, -// DestinationService: &modelpb.DestinationService{ -// Resource: destinationZ, -// ResponseTime: &modelpb.AggregatedDuration{ -// Count: int64(count), -// Sum: durationpb.New(time.Duration(count) * duration), -// }, -// }, -// }, -// Labels: defaultLabels, -// NumericLabels: defaultNumericLabels, -// }, -// } -// }, -// }, -// } { -// t.Run(tt.name, func(t *testing.T) { -// var actualEvents []*modelpb.APMEvent -// aggregationIvls := []time.Duration{time.Minute, 10 * time.Minute, time.Hour} -// agg, err := New( -// WithLimits(Limits{ -// MaxSpanGroups: 1000, -// MaxSpanGroupsPerService: 100, -// MaxTransactionGroups: 100, -// MaxTransactionGroupsPerService: 10, -// MaxServiceTransactionGroups: 100, -// MaxServiceTransactionGroupsPerService: 10, -// MaxServices: 10, -// MaxServiceInstanceGroupsPerService: 10, -// }), -// WithAggregationIntervals(aggregationIvls), -// WithProcessor(sliceProcessor(&actualEvents)), -// WithDataDir(t.TempDir()), -// ) -// require.NoError(t, err) -// -// count := 100 -// now := time.Now() -// duration := 100 * time.Millisecond -// for _, in := range tt.inputs { -// span := makeSpan( -// now, -// in.serviceName, -// in.agentName, -// in.destination, -// in.targetType, -// in.targetName, -// in.outcome, -// duration, -// in.representativeCount, -// defaultLabels, -// defaultNumericLabels, -// ) -// for i := 0; i < count; i++ { -// err := agg.AggregateBatch( -// context.Background(), -// EncodeToCombinedMetricsKeyID(t, "ab01"), -// &modelpb.Batch{span}, -// ) -// require.NoError(t, err) -// } -// } -// require.NoError(t, agg.Stop(context.Background())) -// var expectedEvents []*modelpb.APMEvent -// for _, ivl := range aggregationIvls { -// expectedEvents = append(expectedEvents, tt.getExpectedEvents(now, duration, ivl, count)...) -// } -// sortKey := func(e *modelpb.APMEvent) string { -// var sb strings.Builder -// sb.WriteString(e.GetService().GetName()) -// sb.WriteString(e.GetAgent().GetName()) -// sb.WriteString(e.GetMetricset().GetName()) -// sb.WriteString(e.GetMetricset().GetInterval()) -// destSvc := e.GetSpan().GetDestinationService() -// if destSvc != nil { -// sb.WriteString(destSvc.GetResource()) -// } -// target := e.GetService().GetTarget() -// if target != nil { -// sb.WriteString(target.GetName()) -// sb.WriteString(target.GetType()) -// } -// sb.WriteString(e.GetEvent().GetOutcome()) -// return sb.String() -// } -// sort.Slice(expectedEvents, func(i, j int) bool { -// return sortKey(expectedEvents[i]) < sortKey(expectedEvents[j]) -// }) -// sort.Slice(actualEvents, func(i, j int) bool { -// return sortKey(actualEvents[i]) < sortKey(actualEvents[j]) -// }) -// assert.Empty(t, cmp.Diff( -// expectedEvents, actualEvents, -// cmpopts.EquateEmpty(), -// cmpopts.IgnoreTypes(netip.Addr{}), -// protocmp.Transform(), -// )) -// }) -// } -// } +func TestAggregateSpanMetrics(t *testing.T) { + type input struct { + serviceName string + agentName string + destination string + targetType string + targetName string + outcome string + representativeCount float64 + } + + destinationX := "destination-X" + destinationZ := "destination-Z" + trgTypeX := "trg-type-X" + trgNameX := "trg-name-X" + trgTypeZ := "trg-type-Z" + trgNameZ := "trg-name-Z" + defaultLabels := modelpb.Labels{ + "department_name": &modelpb.LabelValue{Global: true, Value: "apm"}, + "organization": &modelpb.LabelValue{Global: true, Value: "observability"}, + "company": &modelpb.LabelValue{Global: true, Value: "elastic"}, + } + defaultNumericLabels := modelpb.NumericLabels{ + "user_id": &modelpb.NumericLabelValue{Global: true, Value: 100}, + "cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10}, + } + + for _, tt := range []struct { + name string + inputs []input + getExpectedEvents func(time.Time, time.Duration, time.Duration, int) []*modelpb.APMEvent + }{ + { + name: "with destination and service targets", + inputs: []input{ + {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 2}, + {serviceName: "service-A", agentName: "java", destination: destinationX, targetType: trgTypeX, targetName: trgNameX, outcome: "success", representativeCount: 1}, + {serviceName: "service-B", agentName: "python", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, + {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, + {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 0}, + {serviceName: "service-A", agentName: "java", destination: destinationZ, targetType: trgTypeZ, targetName: trgNameZ, outcome: "failure", representativeCount: 1}, + }, + getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { + return []*modelpb.APMEvent{ + { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + }, + Metricset: &modelpb.Metricset{ + Name: "service_summary", + Interval: formatDuration(ivl), + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "python"}, + Service: &modelpb.Service{ + Name: "service-B", + }, + Metricset: &modelpb.Metricset{ + Name: "service_summary", + Interval: formatDuration(ivl), + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + Target: &modelpb.ServiceTarget{ + Type: trgTypeX, + Name: trgNameX, + }, + }, + Event: &modelpb.Event{Outcome: "success"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(count), + }, + Span: &modelpb.Span{ + Name: "service-A:" + destinationX, + DestinationService: &modelpb.DestinationService{ + Resource: destinationX, + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(count), + Sum: durationpb.New(time.Duration(count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + Target: &modelpb.ServiceTarget{ + Type: trgTypeZ, + Name: trgNameZ, + }, + }, + Event: &modelpb.Event{Outcome: "failure"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(count), + }, + Span: &modelpb.Span{ + Name: "service-A:" + destinationZ, + DestinationService: &modelpb.DestinationService{ + Resource: destinationZ, + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(count), + Sum: durationpb.New(time.Duration(count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + Target: &modelpb.ServiceTarget{ + Type: trgTypeZ, + Name: trgNameZ, + }, + }, + Event: &modelpb.Event{Outcome: "success"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(3 * count), + }, + Span: &modelpb.Span{ + Name: "service-A:" + destinationZ, + DestinationService: &modelpb.DestinationService{ + Resource: destinationZ, + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(3 * count), + Sum: durationpb.New(time.Duration(3*count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "python"}, + Service: &modelpb.Service{ + Name: "service-B", + Target: &modelpb.ServiceTarget{ + Type: trgTypeZ, + Name: trgNameZ, + }, + }, + Event: &modelpb.Event{Outcome: "success"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(count), + }, + Span: &modelpb.Span{ + Name: "service-B:" + destinationZ, + DestinationService: &modelpb.DestinationService{ + Resource: destinationZ, + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(count), + Sum: durationpb.New(time.Duration(count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, + } + }, + }, { + name: "with_no_destination_and_no_service_target", + inputs: []input{ + {serviceName: "service-A", agentName: "java", outcome: "success", representativeCount: 1}, + }, + getExpectedEvents: func(_ time.Time, _, _ time.Duration, _ int) []*modelpb.APMEvent { + return nil + }, + }, { + name: "with no destination and a service target", + inputs: []input{ + {serviceName: "service-A", agentName: "java", targetType: trgTypeZ, targetName: trgNameZ, outcome: "success", representativeCount: 1}, + }, + getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { + return []*modelpb.APMEvent{ + { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + }, + Metricset: &modelpb.Metricset{ + Name: "service_summary", + Interval: formatDuration(ivl), + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + Target: &modelpb.ServiceTarget{ + Type: trgTypeZ, + Name: trgNameZ, + }, + }, + Event: &modelpb.Event{Outcome: "success"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(count), + }, + Span: &modelpb.Span{ + Name: "service-A:", + DestinationService: &modelpb.DestinationService{ + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(count), + Sum: durationpb.New(time.Duration(count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, + } + }, + }, { + name: "with a destination and no service target", + inputs: []input{ + {serviceName: "service-A", agentName: "java", destination: destinationZ, outcome: "success", representativeCount: 1}, + }, + getExpectedEvents: func(ts time.Time, duration, ivl time.Duration, count int) []*modelpb.APMEvent { + return []*modelpb.APMEvent{ + { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + }, + Metricset: &modelpb.Metricset{ + Name: "service_summary", + Interval: formatDuration(ivl), + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, { + Timestamp: timestamppb.New(ts.Truncate(ivl)), + Agent: &modelpb.Agent{Name: "java"}, + Service: &modelpb.Service{ + Name: "service-A", + }, + Event: &modelpb.Event{Outcome: "success"}, + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + DocCount: int64(count), + }, + Span: &modelpb.Span{ + Name: "service-A:" + destinationZ, + DestinationService: &modelpb.DestinationService{ + Resource: destinationZ, + ResponseTime: &modelpb.AggregatedDuration{ + Count: int64(count), + Sum: durationpb.New(time.Duration(count) * duration), + }, + }, + }, + Labels: defaultLabels, + NumericLabels: defaultNumericLabels, + }, + } + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + var actualEvents []*modelpb.APMEvent + aggregationIvls := []time.Duration{time.Minute, 10 * time.Minute, time.Hour} + agg, err := New( + WithLimits(Limits{ + MaxSpanGroups: 1000, + MaxSpanGroupsPerService: 100, + MaxTransactionGroups: 100, + MaxTransactionGroupsPerService: 10, + MaxServiceTransactionGroups: 100, + MaxServiceTransactionGroupsPerService: 10, + MaxServices: 10, + MaxServiceInstanceGroupsPerService: 10, + }), + WithAggregationIntervals(aggregationIvls), + WithProcessor(sliceProcessor(&actualEvents)), + WithDataDir(t.TempDir()), + ) + require.NoError(t, err) + + count := 100 + now := time.Now() + duration := 100 * time.Millisecond + for _, in := range tt.inputs { + span := makeSpan( + now, + in.serviceName, + in.agentName, + in.destination, + in.targetType, + in.targetName, + in.outcome, + duration, + in.representativeCount, + defaultLabels, + defaultNumericLabels, + ) + for i := 0; i < count; i++ { + err := agg.AggregateBatch( + context.Background(), + EncodeToCombinedMetricsKeyID(t, "ab01"), + &modelpb.Batch{span}, + ) + require.NoError(t, err) + } + } + require.NoError(t, agg.Stop(context.Background())) + var expectedEvents []*modelpb.APMEvent + for _, ivl := range aggregationIvls { + expectedEvents = append(expectedEvents, tt.getExpectedEvents(now, duration, ivl, count)...) + } + sortKey := func(e *modelpb.APMEvent) string { + var sb strings.Builder + sb.WriteString(e.GetService().GetName()) + sb.WriteString(e.GetAgent().GetName()) + sb.WriteString(e.GetMetricset().GetName()) + sb.WriteString(e.GetMetricset().GetInterval()) + destSvc := e.GetSpan().GetDestinationService() + if destSvc != nil { + sb.WriteString(destSvc.GetResource()) + } + target := e.GetService().GetTarget() + if target != nil { + sb.WriteString(target.GetName()) + sb.WriteString(target.GetType()) + } + sb.WriteString(e.GetEvent().GetOutcome()) + return sb.String() + } + sort.Slice(expectedEvents, func(i, j int) bool { + return sortKey(expectedEvents[i]) < sortKey(expectedEvents[j]) + }) + sort.Slice(actualEvents, func(i, j int) bool { + return sortKey(actualEvents[i]) < sortKey(actualEvents[j]) + }) + assert.Empty(t, cmp.Diff( + expectedEvents, actualEvents, + cmpopts.EquateEmpty(), + cmpopts.IgnoreTypes(netip.Addr{}), + protocmp.Transform(), + )) + }) + } +} func TestCombinedMetricsKeyOrdered(t *testing.T) { // To Allow for retrieving combined metrics by time range, the metrics should diff --git a/aggregators/combined_metrics_test.go b/aggregators/combined_metrics_test.go index 6c8808f..3149d41 100644 --- a/aggregators/combined_metrics_test.go +++ b/aggregators/combined_metrics_test.go @@ -38,6 +38,58 @@ var defaultTestCombinedMetricsCfg = TestCombinedMetricsCfg{ youngestEventTimestamp: time.Time{}, } +type TestTransactionCfg struct { + duration time.Duration + count int +} + +type TestTransactionOpt func(TestTransactionCfg) TestTransactionCfg + +func WithTransactionDuration(d time.Duration) TestTransactionOpt { + return func(cfg TestTransactionCfg) TestTransactionCfg { + cfg.duration = d + return cfg + } +} + +func WithTransactionCount(c int) TestTransactionOpt { + return func(cfg TestTransactionCfg) TestTransactionCfg { + cfg.count = c + return cfg + } +} + +var defaultTestTransactionCfg = TestTransactionCfg{ + duration: time.Second, + count: 1, +} + +type TestSpanCfg struct { + duration time.Duration + count int +} + +type TestSpanOpt func(TestSpanCfg) TestSpanCfg + +func WithSpanDuration(d time.Duration) TestSpanOpt { + return func(cfg TestSpanCfg) TestSpanCfg { + cfg.duration = d + return cfg + } +} + +func WithSpanCount(c int) TestSpanOpt { + return func(cfg TestSpanCfg) TestSpanCfg { + cfg.count = c + return cfg + } +} + +var defaultTestSpanCfg = TestSpanCfg{ + duration: time.Nanosecond, // for backward compatibility with previous tests + count: 1, +} + // TestCombinedMetrics creates combined metrics for testing. The creation logic // is arranged in a way to allow chained creation and addition of leaf nodes // to combined metrics. @@ -55,6 +107,99 @@ func NewTestCombinedMetrics(opts ...TestCombinedMetricsOpt) *TestCombinedMetrics return (*TestCombinedMetrics)(&cm) } +func (tcm *TestCombinedMetrics) AddGlobalTransactionOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + tk TransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestCombinedMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.TransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(tk.ToProto()). + Sum() + tcm.OverflowServices.OverflowTransaction.Merge(from, hash) + insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tcm +} + +func (tcm *TestCombinedMetrics) AddGlobalServiceTransactionOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + stk ServiceTransactionAggregationKey, + opts ...TestTransactionOpt, +) *TestCombinedMetrics { + cfg := defaultTestTransactionCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + hdr := hdrhistogram.New() + hdr.RecordDuration(cfg.duration, float64(cfg.count)) + from := aggregationpb.ServiceTransactionMetricsFromVTPool() + from.Histogram = HistogramToProto(hdr) + from.SuccessCount += float64(cfg.count) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(stk.ToProto()). + Sum() + tcm.OverflowServices.OverflowServiceTransaction.Merge(from, hash) + insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tcm +} + +func (tcm *TestCombinedMetrics) AddGlobalSpanOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, + spk SpanAggregationKey, + opts ...TestSpanOpt, +) *TestCombinedMetrics { + cfg := defaultTestSpanCfg + for _, opt := range opts { + cfg = opt(cfg) + } + + from := aggregationpb.SpanMetricsFromVTPool() + from.Sum += float64(cfg.duration * time.Duration(cfg.count)) + from.Count += float64(cfg.count) + + sikHasher := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()) + hash := sikHasher. + Chain(spk.ToProto()). + Sum() + tcm.OverflowServices.OverflowSpan.Merge(from, hash) + insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) + return tcm +} + +func (tcm *TestCombinedMetrics) AddGlobalServiceInstanceOverflow( + sk ServiceAggregationKey, + sik ServiceInstanceAggregationKey, +) *TestCombinedMetrics { + hash := Hasher{}. + Chain(sk.ToProto()). + Chain(sik.ToProto()). + Sum() + insertHash(&tcm.OverflowServiceInstancesEstimator, hash) + return tcm +} + func (tcm *TestCombinedMetrics) GetProto() *aggregationpb.CombinedMetrics { cm := (*CombinedMetrics)(tcm) cmproto := cm.ToProto() @@ -74,7 +219,9 @@ type TestServiceMetrics struct { func (tcm *TestCombinedMetrics) AddServiceMetrics( sk ServiceAggregationKey, ) *TestServiceMetrics { - tcm.Services[sk] = newServiceMetrics() + if _, ok := tcm.Services[sk]; !ok { + tcm.Services[sk] = newServiceMetrics() + } return &TestServiceMetrics{sk: sk, tcm: tcm} } @@ -88,7 +235,9 @@ func (tsm *TestServiceMetrics) AddServiceInstanceMetrics( sik ServiceInstanceAggregationKey, ) *TestServiceInstanceMetrics { svc := tsm.tcm.Services[tsm.sk] - svc.ServiceInstanceGroups[sik] = newServiceInstanceMetrics() + if _, ok := svc.ServiceInstanceGroups[sik]; !ok { + svc.ServiceInstanceGroups[sik] = newServiceInstanceMetrics() + } return &TestServiceInstanceMetrics{ sik: sik, sk: tsm.sk, @@ -104,44 +253,6 @@ func (tsim *TestServiceInstanceMetrics) Get() CombinedMetrics { return tsim.tcm.Get() } -type TestTransactionCfg struct { - duration time.Duration - count int -} - -func (tsim *TestServiceInstanceMetrics) AddGlobalServiceInstanceOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, -) *TestServiceInstanceMetrics { - hash := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()). - Sum() - insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, hash) - return tsim -} - -type TestTransactionOpt func(TestTransactionCfg) TestTransactionCfg - -func WithTransactionDuration(d time.Duration) TestTransactionOpt { - return func(cfg TestTransactionCfg) TestTransactionCfg { - cfg.duration = d - return cfg - } -} - -func WithTransactionCount(c int) TestTransactionOpt { - return func(cfg TestTransactionCfg) TestTransactionCfg { - cfg.count = c - return cfg - } -} - -var defaultTestTransactionCfg = TestTransactionCfg{ - duration: time.Second, - count: 1, -} - func (tsim *TestServiceInstanceMetrics) AddTransaction( tk TransactionAggregationKey, opts ...TestTransactionOpt, @@ -160,6 +271,10 @@ func (tsim *TestServiceInstanceMetrics) AddTransaction( svc := tsim.tcm.Services[tsim.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] + if oldKtm, ok := svcIns.TransactionGroups[tk]; ok { + mergeKeyedTransactionMetrics(oldKtm, ktm) + ktm = oldKtm + } svcIns.TransactionGroups[tk] = ktm return tsim } @@ -167,14 +282,6 @@ func (tsim *TestServiceInstanceMetrics) AddTransaction( func (tsim *TestServiceInstanceMetrics) AddTransactionOverflow( tk TransactionAggregationKey, opts ...TestTransactionOpt, -) *TestServiceInstanceMetrics { - return tsim.AddTransactionOverflowWithServiceInstance(tsim.sik, tk, opts...) -} - -func (tsim *TestServiceInstanceMetrics) AddTransactionOverflowWithServiceInstance( - sik ServiceInstanceAggregationKey, - tk TransactionAggregationKey, - opts ...TestTransactionOpt, ) *TestServiceInstanceMetrics { cfg := defaultTestTransactionCfg for _, opt := range opts { @@ -189,7 +296,7 @@ func (tsim *TestServiceInstanceMetrics) AddTransactionOverflowWithServiceInstanc svc := tsim.tcm.Services[tsim.sk] hash := Hasher{}. Chain(tsim.sk.ToProto()). - Chain(sik.ToProto()). + Chain(tsim.sik.ToProto()). Chain(tk.ToProto()). Sum() svc.OverflowGroups.OverflowTransaction.Merge(from, hash) @@ -197,33 +304,6 @@ func (tsim *TestServiceInstanceMetrics) AddTransactionOverflowWithServiceInstanc return tsim } -func (tsim *TestServiceInstanceMetrics) AddGlobalTransactionOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - tk TransactionAggregationKey, - opts ...TestTransactionOpt, -) *TestServiceInstanceMetrics { - cfg := defaultTestTransactionCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - hdr := hdrhistogram.New() - hdr.RecordDuration(cfg.duration, float64(cfg.count)) - from := aggregationpb.TransactionMetricsFromVTPool() - from.Histogram = HistogramToProto(hdr) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(tk.ToProto()). - Sum() - tsim.tcm.OverflowServices.OverflowTransaction.Merge(from, hash) - insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tsim -} - func (tsim *TestServiceInstanceMetrics) AddServiceTransaction( stk ServiceTransactionAggregationKey, opts ...TestTransactionOpt, @@ -243,6 +323,10 @@ func (tsim *TestServiceInstanceMetrics) AddServiceTransaction( svc := tsim.tcm.Services[tsim.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] + if oldKstm, ok := svcIns.ServiceTransactionGroups[stk]; ok { + mergeKeyedServiceTransactionMetrics(oldKstm, kstm) + kstm = oldKstm + } svcIns.ServiceTransactionGroups[stk] = kstm return tsim } @@ -273,60 +357,6 @@ func (tsim *TestServiceInstanceMetrics) AddServiceTransactionOverflow( return tsim } -func (tsim *TestServiceInstanceMetrics) AddGlobalServiceTransactionOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - stk ServiceTransactionAggregationKey, - opts ...TestTransactionOpt, -) *TestServiceInstanceMetrics { - cfg := defaultTestTransactionCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - hdr := hdrhistogram.New() - hdr.RecordDuration(cfg.duration, float64(cfg.count)) - from := aggregationpb.ServiceTransactionMetricsFromVTPool() - from.Histogram = HistogramToProto(hdr) - from.SuccessCount += float64(cfg.count) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(stk.ToProto()). - Sum() - tsim.tcm.OverflowServices.OverflowServiceTransaction.Merge(from, hash) - insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tsim -} - -type TestSpanCfg struct { - duration time.Duration - count int -} - -type TestSpanOpt func(TestSpanCfg) TestSpanCfg - -func WithSpanDuration(d time.Duration) TestSpanOpt { - return func(cfg TestSpanCfg) TestSpanCfg { - cfg.duration = d - return cfg - } -} - -func WithSpanCount(c int) TestSpanOpt { - return func(cfg TestSpanCfg) TestSpanCfg { - cfg.count = c - return cfg - } -} - -var defaultTestSpanCfg = TestSpanCfg{ - duration: time.Nanosecond, // for backward compatibility with previous tests - count: 1, -} - func (tsim *TestServiceInstanceMetrics) AddSpan( spk SpanAggregationKey, opts ...TestSpanOpt, @@ -344,6 +374,10 @@ func (tsim *TestServiceInstanceMetrics) AddSpan( svc := tsim.tcm.Services[tsim.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] + if oldKsm, ok := svcIns.SpanGroups[spk]; ok { + mergeKeyedSpanMetrics(oldKsm, ksm) + ksm = oldKsm + } svcIns.SpanGroups[spk] = ksm return tsim } @@ -371,29 +405,3 @@ func (tsim *TestServiceInstanceMetrics) AddSpanOverflow( tsim.tcm.Services[tsim.sk] = svc return tsim } - -func (tsim *TestServiceInstanceMetrics) AddGlobalSpanOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - spk SpanAggregationKey, - opts ...TestSpanOpt, -) *TestServiceInstanceMetrics { - cfg := defaultTestSpanCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - from := aggregationpb.SpanMetricsFromVTPool() - from.Sum += float64(cfg.duration * time.Duration(cfg.count)) - from.Count += float64(cfg.count) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(spk.ToProto()). - Sum() - tsim.tcm.OverflowServices.OverflowSpan.Merge(from, hash) - insertHash(&tsim.tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tsim -} diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go index b5c1dbc..402de11 100644 --- a/aggregators/converter_test.go +++ b/aggregators/converter_test.go @@ -4,247 +4,263 @@ package aggregators -// func TestEventToCombinedMetrics(t *testing.T) { -// ts := time.Now().UTC() -// receivedTS := ts.Add(time.Second) -// baseEvent := &modelpb.APMEvent{ -// Timestamp: timestamppb.New(ts), -// ParentId: "nonroot", -// Service: &modelpb.Service{Name: "test"}, -// Event: &modelpb.Event{ -// Duration: durationpb.New(time.Second), -// Outcome: "success", -// Received: timestamppb.New(receivedTS), -// }, -// } -// for _, tc := range []struct { -// name string -// input func() *modelpb.APMEvent -// partitioner Partitioner -// expected func() []*aggregationpb.CombinedMetrics -// }{ -// { -// name: "with-zero-rep-count-txn", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Transaction = &modelpb.Transaction{ -// Name: "testtxn", -// Type: "testtyp", -// RepresentativeCount: 0, -// } -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return nil -// }, -// }, -// { -// name: "with-good-txn", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Transaction = &modelpb.Transaction{ -// Name: "testtxn", -// Type: "testtyp", -// RepresentativeCount: 1, -// } -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return []*aggregationpb.CombinedMetrics{ -// (*CombinedMetrics)(createTestCombinedMetrics( -// withEventsTotal(1), -// withYoungestEventTimestamp(receivedTS), -// ).addTransaction( -// ts.Truncate(time.Minute), "test", "", -// testTransaction{ -// txnName: "testtxn", -// txnType: "testtyp", -// eventOutcome: "success", -// count: 1, -// }, -// ).addServiceTransaction( -// ts.Truncate(time.Minute), "test", "", -// testServiceTransaction{ -// txnType: "testtyp", -// count: 1, -// }, -// )).ToProto(), -// } -// }, -// }, -// { -// name: "with-zero-rep-count-span", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Span = &modelpb.Span{ -// Name: "testspan", -// Type: "testtyp", -// RepresentativeCount: 0, -// } -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return nil -// }, -// }, -// { -// name: "with-no-exit-span", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Span = &modelpb.Span{ -// Name: "testspan", -// Type: "testtyp", -// RepresentativeCount: 1, -// } -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return nil -// }, -// }, -// { -// name: "with-good-span-dest-svc", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Span = &modelpb.Span{ -// Name: "testspan", -// Type: "testtyp", -// RepresentativeCount: 1, -// } -// event.Service.Target = &modelpb.ServiceTarget{ -// Name: "psql", -// Type: "db", -// } -// // Current test structs are hardcoded to use 1ns for spans -// event.Event.Duration = durationpb.New(time.Nanosecond) -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return []*aggregationpb.CombinedMetrics{ -// (*CombinedMetrics)(createTestCombinedMetrics( -// withEventsTotal(1), -// withYoungestEventTimestamp(receivedTS), -// ).addSpan( -// ts.Truncate(time.Minute), "test", "", -// testSpan{ -// spanName: "testspan", -// targetName: "psql", -// targetType: "db", -// outcome: "success", -// count: 1, -// }, -// )).ToProto(), -// } -// }, -// }, -// { -// name: "with-good-span-svc-target", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Span = &modelpb.Span{ -// Name: "testspan", -// Type: "testtyp", -// RepresentativeCount: 1, -// DestinationService: &modelpb.DestinationService{ -// Resource: "db", -// }, -// } -// // Current test structs are hardcoded to use 1ns for spans -// event.Event.Duration = durationpb.New(time.Nanosecond) -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return []*aggregationpb.CombinedMetrics{ -// (*CombinedMetrics)(createTestCombinedMetrics( -// withEventsTotal(1), -// withYoungestEventTimestamp(receivedTS), -// ).addSpan( -// ts.Truncate(time.Minute), "test", "", -// testSpan{ -// spanName: "testspan", -// destinationResource: "db", -// outcome: "success", -// count: 1, -// }, -// )).ToProto(), -// } -// }, -// }, -// { -// name: "with-metricset", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Metricset = &modelpb.Metricset{ -// Name: "testmetricset", -// Interval: "1m", -// } -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return []*aggregationpb.CombinedMetrics{ -// (*CombinedMetrics)(createTestCombinedMetrics( -// withEventsTotal(1), -// withYoungestEventTimestamp(receivedTS), -// ).addServiceInstance( -// ts.Truncate(time.Minute), "test", "", -// )).ToProto(), -// } -// }, -// }, -// { -// name: "with-log", -// input: func() *modelpb.APMEvent { -// event := baseEvent.CloneVT() -// event.Log = &modelpb.Log{} -// return event -// }, -// partitioner: NewHashPartitioner(1), -// expected: func() []*aggregationpb.CombinedMetrics { -// return []*aggregationpb.CombinedMetrics{ -// (*CombinedMetrics)(createTestCombinedMetrics( -// withEventsTotal(1), -// withYoungestEventTimestamp(receivedTS), -// ).addServiceInstance( -// ts.Truncate(time.Minute), "test", "", -// )).ToProto(), -// } -// }, -// }, -// } { -// t.Run(tc.name, func(t *testing.T) { -// cmk := CombinedMetricsKey{ -// Interval: time.Minute, -// ProcessingTime: time.Now().Truncate(time.Minute), -// ID: EncodeToCombinedMetricsKeyID(t, "ab01"), -// } -// var actual []*aggregationpb.CombinedMetrics -// collector := func( -// _ CombinedMetricsKey, -// m *aggregationpb.CombinedMetrics, -// ) error { -// actual = append(actual, m.CloneVT()) -// return nil -// } -// err := EventToCombinedMetrics(tc.input(), cmk, tc.partitioner, collector) -// require.NoError(t, err) -// assert.Empty(t, cmp.Diff( -// tc.expected(), actual, -// cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { -// return a.Equal(&b) -// }), -// protocmp.Transform(), -// protocmp.IgnoreEmptyMessages(), -// )) -// }) -// } -// } +import ( + "testing" + "time" + + "github.com/elastic/apm-aggregation/aggregationpb" + "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-data/model/modelpb" + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" +) + +func TestEventToCombinedMetrics(t *testing.T) { + ts := time.Now().UTC() + receivedTS := ts.Add(time.Second) + baseEvent := &modelpb.APMEvent{ + Timestamp: timestamppb.New(ts), + ParentId: "nonroot", + Service: &modelpb.Service{Name: "test"}, + Event: &modelpb.Event{ + Duration: durationpb.New(time.Second), + Outcome: "success", + Received: timestamppb.New(receivedTS), + }, + } + for _, tc := range []struct { + name string + input func() *modelpb.APMEvent + partitioner Partitioner + expected func() []*aggregationpb.CombinedMetrics + }{ + { + name: "with-zero-rep-count-txn", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Transaction = &modelpb.Transaction{ + Name: "testtxn", + Type: "testtyp", + RepresentativeCount: 0, + } + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return nil + }, + }, + { + name: "with-good-txn", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Transaction = &modelpb.Transaction{ + Name: "testtxn", + Type: "testtyp", + RepresentativeCount: 1, + } + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return []*aggregationpb.CombinedMetrics{ + NewTestCombinedMetrics( + WithEventsTotal(1), + WithYoungestEventTimestamp(receivedTS)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts.Truncate(time.Minute), + ServiceName: "test"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddTransaction(TransactionAggregationKey{ + TransactionName: "testtxn", + TransactionType: "testtyp", + EventOutcome: "success", + }). + AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: "testtyp", + }).GetProto(), + } + }, + }, + { + name: "with-zero-rep-count-span", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Span = &modelpb.Span{ + Name: "testspan", + Type: "testtyp", + RepresentativeCount: 0, + } + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return nil + }, + }, + { + name: "with-no-exit-span", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Span = &modelpb.Span{ + Name: "testspan", + Type: "testtyp", + RepresentativeCount: 1, + } + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return nil + }, + }, + { + name: "with-good-span-svc-target", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Span = &modelpb.Span{ + Name: "testspan", + Type: "testtyp", + RepresentativeCount: 1, + } + event.Service.Target = &modelpb.ServiceTarget{ + Name: "psql", + Type: "db", + } + // Current test structs are hardcoded to use 1ns for spans + event.Event.Duration = durationpb.New(time.Nanosecond) + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return []*aggregationpb.CombinedMetrics{ + NewTestCombinedMetrics( + WithEventsTotal(1), + WithYoungestEventTimestamp(receivedTS)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts.Truncate(time.Minute), + ServiceName: "test"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{ + SpanName: "testspan", + TargetName: "psql", + TargetType: "db", + Outcome: "success", + }).GetProto(), + } + }, + }, + { + name: "with-good-span-dest-svc", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Span = &modelpb.Span{ + Name: "testspan", + Type: "testtyp", + RepresentativeCount: 1, + DestinationService: &modelpb.DestinationService{ + Resource: "db", + }, + } + // Current test structs are hardcoded to use 1ns for spans + event.Event.Duration = durationpb.New(time.Nanosecond) + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return []*aggregationpb.CombinedMetrics{ + NewTestCombinedMetrics( + WithEventsTotal(1), + WithYoungestEventTimestamp(receivedTS)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts.Truncate(time.Minute), + ServiceName: "test"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{ + SpanName: "testspan", + Resource: "db", + Outcome: "success", + }).GetProto(), + } + }, + }, + { + name: "with-metricset", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Metricset = &modelpb.Metricset{ + Name: "testmetricset", + Interval: "1m", + } + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return []*aggregationpb.CombinedMetrics{ + NewTestCombinedMetrics( + WithEventsTotal(1), + WithYoungestEventTimestamp(receivedTS)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts.Truncate(time.Minute), + ServiceName: "test"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + GetProto(), + } + }, + }, + { + name: "with-log", + input: func() *modelpb.APMEvent { + event := baseEvent.CloneVT() + event.Log = &modelpb.Log{} + return event + }, + partitioner: NewHashPartitioner(1), + expected: func() []*aggregationpb.CombinedMetrics { + return []*aggregationpb.CombinedMetrics{ + NewTestCombinedMetrics( + WithEventsTotal(1), + WithYoungestEventTimestamp(receivedTS)). + AddServiceMetrics(ServiceAggregationKey{ + Timestamp: ts.Truncate(time.Minute), + ServiceName: "test"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + GetProto(), + } + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + cmk := CombinedMetricsKey{ + Interval: time.Minute, + ProcessingTime: time.Now().Truncate(time.Minute), + ID: EncodeToCombinedMetricsKeyID(t, "ab01"), + } + var actual []*aggregationpb.CombinedMetrics + collector := func( + _ CombinedMetricsKey, + m *aggregationpb.CombinedMetrics, + ) error { + actual = append(actual, m.CloneVT()) + return nil + } + err := EventToCombinedMetrics(tc.input(), cmk, tc.partitioner, collector) + require.NoError(t, err) + assert.Empty(t, cmp.Diff( + tc.expected(), actual, + cmp.Comparer(func(a, b hdrhistogram.HybridCountsRep) bool { + return a.Equal(&b) + }), + protocmp.Transform(), + protocmp.IgnoreEmptyMessages(), + )) + }) + } +} // func TestCombinedMetricsToBatch(t *testing.T) { // ts := time.Now() diff --git a/aggregators/merger.go b/aggregators/merger.go index f72fd8e..febac5c 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -39,20 +39,20 @@ func (m *combinedMetricsMerger) Finish(includesBase bool) ([]byte, io.Closer, er return data, nil, err } -func (to *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { +func (m *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { // We merge the below fields irrespective of the services present // because it is possible for services to be empty if the event // does not fit the criteria for aggregations. - to.metrics.EventsTotal += from.EventsTotal - if to.metrics.YoungestEventTimestamp < from.YoungestEventTimestamp { - to.metrics.YoungestEventTimestamp = from.YoungestEventTimestamp + m.metrics.EventsTotal += from.EventsTotal + if m.metrics.YoungestEventTimestamp < from.YoungestEventTimestamp { + m.metrics.YoungestEventTimestamp = from.YoungestEventTimestamp } // If there is overflow due to max services in either of the buckets being // merged then we can merge the overflow buckets without considering any other scenarios. if from.OverflowServiceInstancesEstimator != nil { - mergeOverflow(&to.metrics.OverflowServices, from.OverflowServices) + mergeOverflow(&m.metrics.OverflowServices, from.OverflowServices) mergeEstimator( - &to.metrics.OverflowServiceInstancesEstimator, + &m.metrics.OverflowServiceInstancesEstimator, hllSketch(from.OverflowServiceInstancesEstimator), ) } @@ -63,10 +63,10 @@ func (to *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { // Calculate the current capacity of the transaction, service transaction, // and span groups in the _to_ combined metrics. - totalTransactionGroupsConstraint := newConstraint(0, to.limits.MaxTransactionGroups) - totalServiceTransactionGroupsConstraint := newConstraint(0, to.limits.MaxServiceTransactionGroups) - totalSpanGroupsConstraint := newConstraint(0, to.limits.MaxSpanGroups) - for _, svc := range to.metrics.Services { + totalTransactionGroupsConstraint := newConstraint(0, m.limits.MaxTransactionGroups) + totalServiceTransactionGroupsConstraint := newConstraint(0, m.limits.MaxServiceTransactionGroups) + totalSpanGroupsConstraint := newConstraint(0, m.limits.MaxSpanGroups) + for _, svc := range m.metrics.Services { for _, si := range svc.ServiceInstanceGroups { totalTransactionGroupsConstraint.add(len(si.TransactionGroups)) totalServiceTransactionGroupsConstraint.add(len(si.ServiceTransactionGroups)) @@ -88,14 +88,14 @@ func (to *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { hash := Hasher{}.Chain(fromSvc.Key) var sk ServiceAggregationKey sk.FromProto(fromSvc.Key) - toSvc, svcOverflow := getServiceMetrics(&to.metrics, sk, to.limits.MaxServices) + toSvc, svcOverflow := getServiceMetrics(&m.metrics, sk, m.limits.MaxServices) if svcOverflow { - mergeOverflow(&to.metrics.OverflowServices, fromSvc.Metrics.OverflowGroups) + mergeOverflow(&m.metrics.OverflowServices, fromSvc.Metrics.OverflowGroups) for j := range fromSvc.Metrics.ServiceInstanceMetrics { ksim := fromSvc.Metrics.ServiceInstanceMetrics[j] sikHash := hash.Chain(ksim.Key) - mergeToOverflowFromSIM(&to.metrics.OverflowServices, ksim, sikHash) - insertHash(&to.metrics.OverflowServiceInstancesEstimator, sikHash.Sum()) + mergeToOverflowFromSIM(&m.metrics.OverflowServices, ksim, sikHash) + insertHash(&m.metrics.OverflowServiceInstancesEstimator, sikHash.Sum()) } continue } @@ -107,12 +107,12 @@ func (to *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint, - to.limits, + m.limits, hash, - &to.metrics.OverflowServiceInstancesEstimator, + &m.metrics.OverflowServiceInstancesEstimator, ) } - to.metrics.Services[sk] = toSvc + m.metrics.Services[sk] = toSvc } } diff --git a/aggregators/merger_test.go b/aggregators/merger_test.go index 77668bd..b1baba3 100644 --- a/aggregators/merger_test.go +++ b/aggregators/merger_test.go @@ -473,15 +473,6 @@ func TestMerge(t *testing.T) { }, expected: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(24)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). - AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). - AddServiceTransaction( - ServiceTransactionAggregationKey{TransactionType: "type1"}, - WithTransactionCount(7)). - AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(7)). AddGlobalTransactionOverflow( ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, ServiceInstanceAggregationKey{}, @@ -497,6 +488,15 @@ func TestMerge(t *testing.T) { ServiceInstanceAggregationKey{}, SpanAggregationKey{SpanName: "span1"}, WithSpanCount(5)). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). + AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). + AddServiceTransaction( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(7)). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(7)). Get() }, }, @@ -526,12 +526,12 @@ func TestMerge(t *testing.T) { }, expected: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(333)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddGlobalServiceInstanceOverflow( ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, ServiceInstanceAggregationKey{}, ). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). Get() }, }, @@ -652,11 +652,11 @@ func TestMerge(t *testing.T) { }, expected: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(3)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). AddGlobalServiceInstanceOverflow( ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). Get() }, }, @@ -686,11 +686,11 @@ func TestMerge(t *testing.T) { }, expected: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(3)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). AddGlobalServiceInstanceOverflow( ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). Get() }, }, @@ -716,7 +716,7 @@ func TestMerge(t *testing.T) { // Get() // }, // from: func() *aggregationpb.CombinedMetrics { - // return NewTestCombinedMetrics(WithEventsTotal(1)). + // return NewTestCombinedMetrics(WithEventsTotal(2)). // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). // AddTransaction( @@ -726,19 +726,23 @@ func TestMerge(t *testing.T) { // }, // expected: func() CombinedMetrics { // tcm := NewTestCombinedMetrics(WithEventsTotal(3)) - // sm := tcm.AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) - // sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). - // AddTransaction( - // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - // WithTransactionCount(1)) - // sm.AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - // AddTransactionOverflow( - // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - // WithTransactionCount(2)). + // tsm := tcm. // AddGlobalServiceInstanceOverflow( // ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, - // ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}, - // ) + // ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) + // tsm. + // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + // AddTransaction(TransactionAggregationKey{ + // TransactionName: "txn1", + // TransactionType: "type1", + // }, WithTransactionCount(1)) + // tsm. + // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + // AddTransactionOverflow(TransactionAggregationKey{ + // TransactionName: "txn1", + // TransactionType: "type1", + // }, WithTransactionCount(2)) // return tcm.Get() // }, // }, From 7807a8c4f1e4d857f4ca6021fb348ef693c145d5 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 18:15:37 +0800 Subject: [PATCH 03/13] Release resources --- aggregators/aggregator_test.go | 1 + aggregators/merger.go | 5 ++++ aggregators/models.go | 47 ++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go index cea30e8..4aaffe9 100644 --- a/aggregators/aggregator_test.go +++ b/aggregators/aggregator_test.go @@ -689,6 +689,7 @@ func TestCombinedMetricsKeyOrderedByProjectID(t *testing.T) { } func TestHarvest(t *testing.T) { + t.Skip() cmCount := 5 ivls := []time.Duration{time.Second, 2 * time.Second, 4 * time.Second} m := make(map[time.Duration]map[[16]byte]bool) diff --git a/aggregators/merger.go b/aggregators/merger.go index febac5c..da5f3bb 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -18,6 +18,7 @@ type combinedMetricsMerger struct { func (m *combinedMetricsMerger) MergeNewer(value []byte) error { from := aggregationpb.CombinedMetricsFromVTPool() + defer from.ReturnToVTPool() if err := from.UnmarshalVT(value); err != nil { return err } @@ -27,6 +28,7 @@ func (m *combinedMetricsMerger) MergeNewer(value []byte) error { func (m *combinedMetricsMerger) MergeOlder(value []byte) error { from := aggregationpb.CombinedMetricsFromVTPool() + defer from.ReturnToVTPool() if err := from.UnmarshalVT(value); err != nil { return err } @@ -35,6 +37,9 @@ func (m *combinedMetricsMerger) MergeOlder(value []byte) error { } func (m *combinedMetricsMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { + // TODO: Investigate test failures. Releasing this resource is causing test failures + // Are we holding a reference to released resource? + // defer m.metrics.ReturnToVTPool() data, err := m.metrics.MarshalBinary() return data, nil, err } diff --git a/aggregators/models.go b/aggregators/models.go index 47a8aaf..9e8fe8e 100644 --- a/aggregators/models.go +++ b/aggregators/models.go @@ -104,6 +104,14 @@ type CombinedMetrics struct { YoungestEventTimestamp uint64 } +// ReturnToVTPool releases the proto resources held by CombinedMetrics. +func (m *CombinedMetrics) ReturnToVTPool() { + m.OverflowServices.ReturnToVTPool() + for _, p := range m.Services { + p.ReturnToVTPool() + } +} + // ServiceAggregationKey models the key used to store service specific // aggregation metrics. type ServiceAggregationKey struct { @@ -121,6 +129,16 @@ type ServiceMetrics struct { OverflowGroups Overflow } +// ReturnToVTPool releases the proto resources held by ServiceMetrics. +func (m *ServiceMetrics) ReturnToVTPool() { + if m == nil { + return + } + for _, p := range m.ServiceInstanceGroups { + p.ReturnToVTPool() + } +} + // ServiceInstanceAggregationKey models the key used to store service instance specific // aggregation metrics. type ServiceInstanceAggregationKey struct { @@ -135,6 +153,25 @@ type ServiceInstanceMetrics struct { SpanGroups map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics } +// ReturnToVTPool returns the proto resources held by ServiceInstanceMetrics. +func (m *ServiceInstanceMetrics) ReturnToVTPool() { + if m == nil { + return + } + for k, p := range m.TransactionGroups { + p.ReturnToVTPool() + m.TransactionGroups[k] = nil + } + for k, p := range m.ServiceTransactionGroups { + p.ReturnToVTPool() + m.ServiceTransactionGroups[k] = nil + } + for k, p := range m.SpanGroups { + p.ReturnToVTPool() + m.SpanGroups[k] = nil + } +} + func insertHash(to **hyperloglog.Sketch, hash uint64) { if *to == nil { *to = hyperloglog.New14() @@ -249,6 +286,16 @@ type Overflow struct { OverflowSpan OverflowSpan } +// ReturnToVTPool releases the prot resources held by Overflow. +func (o *Overflow) ReturnToVTPool() { + if o == nil { + return + } + o.OverflowTransaction.Metrics.ReturnToVTPool() + o.OverflowServiceTransaction.Metrics.ReturnToVTPool() + o.OverflowSpan.Metrics.ReturnToVTPool() +} + // TransactionAggregationKey models the key used to store transaction // aggregation metrics. type TransactionAggregationKey struct { From cef3d40009fd58a96f33ddd4bf234cf94201e580 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 18:30:44 +0800 Subject: [PATCH 04/13] Fix benchmarks --- aggregators/converter_test.go | 130 +++++++++++++++++++--------------- 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go index 402de11..b8d12a3 100644 --- a/aggregators/converter_test.go +++ b/aggregators/converter_test.go @@ -5,6 +5,7 @@ package aggregators import ( + "fmt" "testing" "time" @@ -386,65 +387,76 @@ func TestEventToCombinedMetrics(t *testing.T) { // }) // } // } -// -// func BenchmarkCombinedMetricsToBatch(b *testing.B) { -// ai := time.Hour -// ts := time.Now() -// pt := ts.Truncate(ai) -// cardinality := 10 -// tcm := createTestCombinedMetrics() -// for i := 0; i < cardinality; i++ { -// txnName := fmt.Sprintf("txn%d", i) -// txnType := fmt.Sprintf("typ%d", i) -// spanName := fmt.Sprintf("spn%d", i) -// tcm = tcm.addTransaction(ts, "bench", "", testTransaction{txnName: txnName, txnType: txnType, count: 200}) -// tcm = tcm.addServiceTransaction(ts, "bench", "", testServiceTransaction{txnType: txnType, count: 200}) -// tcm = tcm.addSpan(ts, "bench", "", testSpan{spanName: spanName}) -// } -// cm := CombinedMetrics(*tcm) -// b.ResetTimer() -// for i := 0; i < b.N; i++ { -// _, err := CombinedMetricsToBatch(cm, pt, ai) -// if err != nil { -// b.Fatal(err) -// } -// } -// } -// -// func BenchmarkEventToCombinedMetrics(b *testing.B) { -// event := &modelpb.APMEvent{ -// Timestamp: timestamppb.Now(), -// ParentId: "nonroot", -// Service: &modelpb.Service{ -// Name: "test", -// }, -// Event: &modelpb.Event{ -// Duration: durationpb.New(time.Second), -// Outcome: "success", -// }, -// Transaction: &modelpb.Transaction{ -// RepresentativeCount: 1, -// Name: "testtxn", -// Type: "testtyp", -// }, -// } -// cmk := CombinedMetricsKey{ -// Interval: time.Minute, -// ProcessingTime: time.Now().Truncate(time.Minute), -// ID: EncodeToCombinedMetricsKeyID(b, "ab01"), -// } -// partitioner := NewHashPartitioner(1) -// noop := func(_ CombinedMetricsKey, _ *aggregationpb.CombinedMetrics) error { -// return nil -// } -// b.ResetTimer() -// for i := 0; i < b.N; i++ { -// err := EventToCombinedMetrics(event, cmk, partitioner, noop) -// if err != nil { -// b.Fatal(err) -// } -// } -// } + +func BenchmarkCombinedMetricsToBatch(b *testing.B) { + ai := time.Hour + ts := time.Now() + pt := ts.Truncate(ai) + cardinality := 10 + tcm := NewTestCombinedMetrics(). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "bench"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}) + for i := 0; i < cardinality; i++ { + txnName := fmt.Sprintf("txn%d", i) + txnType := fmt.Sprintf("typ%d", i) + spanName := fmt.Sprintf("spn%d", i) + tcm. + AddTransaction(TransactionAggregationKey{ + TransactionName: txnName, + TransactionType: txnType, + }, WithTransactionCount(200)). + AddServiceTransaction(ServiceTransactionAggregationKey{ + TransactionType: txnType, + }, WithTransactionCount(200)). + AddSpan(SpanAggregationKey{ + SpanName: spanName, + }) + } + cm := tcm.Get() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := CombinedMetricsToBatch(cm, pt, ai) + if err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkEventToCombinedMetrics(b *testing.B) { + event := &modelpb.APMEvent{ + Timestamp: timestamppb.Now(), + ParentId: "nonroot", + Service: &modelpb.Service{ + Name: "test", + }, + Event: &modelpb.Event{ + Duration: durationpb.New(time.Second), + Outcome: "success", + }, + Transaction: &modelpb.Transaction{ + RepresentativeCount: 1, + Name: "testtxn", + Type: "testtyp", + }, + } + cmk := CombinedMetricsKey{ + Interval: time.Minute, + ProcessingTime: time.Now().Truncate(time.Minute), + ID: EncodeToCombinedMetricsKeyID(b, "ab01"), + } + partitioner := NewHashPartitioner(1) + noop := func(_ CombinedMetricsKey, _ *aggregationpb.CombinedMetrics) error { + return nil + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := EventToCombinedMetrics(event, cmk, partitioner, noop) + if err != nil { + b.Fatal(err) + } + } +} + // // func createTestServiceSummaryMetric( // ts time.Time, From 321629d78f8c6f074f55156f73a0763fbe92ed6c Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 18:33:38 +0800 Subject: [PATCH 05/13] Make fmt --- aggregators/converter_test.go | 7 ++++--- aggregators/merger.go | 1 + aggregators/merger_test.go | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go index b8d12a3..d397373 100644 --- a/aggregators/converter_test.go +++ b/aggregators/converter_test.go @@ -9,15 +9,16 @@ import ( "testing" "time" - "github.com/elastic/apm-aggregation/aggregationpb" - "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" - "github.com/elastic/apm-data/model/modelpb" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/elastic/apm-aggregation/aggregationpb" + "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-data/model/modelpb" ) func TestEventToCombinedMetrics(t *testing.T) { diff --git a/aggregators/merger.go b/aggregators/merger.go index da5f3bb..7fae7de 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -8,6 +8,7 @@ import ( "io" "github.com/axiomhq/hyperloglog" + "github.com/elastic/apm-aggregation/aggregationpb" ) diff --git a/aggregators/merger_test.go b/aggregators/merger_test.go index b1baba3..ff1dcfb 100644 --- a/aggregators/merger_test.go +++ b/aggregators/merger_test.go @@ -9,10 +9,11 @@ import ( "testing" "time" - "github.com/elastic/apm-aggregation/aggregationpb" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/testing/protocmp" + + "github.com/elastic/apm-aggregation/aggregationpb" ) func TestMerge(t *testing.T) { From e87a4d2427910450ca7d969a263016bd09c5a4a7 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 18:36:27 +0800 Subject: [PATCH 06/13] Constraint rename --- aggregators/merger.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/aggregators/merger.go b/aggregators/merger.go index 7fae7de..616287f 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -125,7 +125,7 @@ func (m *combinedMetricsMerger) merge(from *aggregationpb.CombinedMetrics) { func mergeServiceInstanceGroups( to *ServiceMetrics, from []*aggregationpb.KeyedServiceInstanceMetrics, - totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint *Constraint, + totalTransactionGroupsConstraint, totalServiceTransactionGroupsConstraint, totalSpanGroupsConstraint *constraint, limits Limits, hash Hasher, overflowServiceInstancesEstimator **hyperloglog.Sketch, @@ -191,7 +191,7 @@ func mergeServiceInstanceGroups( func mergeTransactionGroups( to map[TransactionAggregationKey]*aggregationpb.KeyedTransactionMetrics, from []*aggregationpb.KeyedTransactionMetrics, - perSvcConstraint, globalConstraint *Constraint, + perSvcConstraint, globalConstraint *constraint, hash Hasher, overflowTo *OverflowTransaction, ) { @@ -226,7 +226,7 @@ func mergeTransactionGroups( func mergeServiceTransactionGroups( to map[ServiceTransactionAggregationKey]*aggregationpb.KeyedServiceTransactionMetrics, from []*aggregationpb.KeyedServiceTransactionMetrics, - perSvcConstraint, globalConstraint *Constraint, + perSvcConstraint, globalConstraint *constraint, hash Hasher, overflowTo *OverflowServiceTransaction, ) { @@ -260,7 +260,7 @@ func mergeServiceTransactionGroups( func mergeSpanGroups( to map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics, from []*aggregationpb.KeyedSpanMetrics, - perSvcConstraint, globalConstraint *Constraint, + perSvcConstraint, globalConstraint *constraint, hash Hasher, overflowTo *OverflowSpan, ) { @@ -474,26 +474,26 @@ func newServiceInstanceMetrics() ServiceInstanceMetrics { } } -type Constraint struct { +type constraint struct { counter int limit int } -func newConstraint(initialCount, limit int) *Constraint { - return &Constraint{ +func newConstraint(initialCount, limit int) *constraint { + return &constraint{ counter: initialCount, limit: limit, } } -func (c *Constraint) maxed() bool { +func (c *constraint) maxed() bool { return c.counter >= c.limit } -func (c *Constraint) add(delta int) { +func (c *constraint) add(delta int) { c.counter += delta } -func (c *Constraint) value() int { +func (c *constraint) value() int { return c.counter } From d78f09ff62de7ded976b1dfa3e693db8fa4cfb80 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 19:37:29 +0800 Subject: [PATCH 07/13] Avoid double release --- aggregators/merger.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/aggregators/merger.go b/aggregators/merger.go index 616287f..caf424a 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -38,9 +38,6 @@ func (m *combinedMetricsMerger) MergeOlder(value []byte) error { } func (m *combinedMetricsMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { - // TODO: Investigate test failures. Releasing this resource is causing test failures - // Are we holding a reference to released resource? - // defer m.metrics.ReturnToVTPool() data, err := m.metrics.MarshalBinary() return data, nil, err } From 1cd5835638d8e7d622bdc327f0a6606d6a317a43 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Wed, 26 Jul 2023 19:44:13 +0800 Subject: [PATCH 08/13] Remove unnecessary methods --- aggregators/models.go | 47 ------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/aggregators/models.go b/aggregators/models.go index 9e8fe8e..47a8aaf 100644 --- a/aggregators/models.go +++ b/aggregators/models.go @@ -104,14 +104,6 @@ type CombinedMetrics struct { YoungestEventTimestamp uint64 } -// ReturnToVTPool releases the proto resources held by CombinedMetrics. -func (m *CombinedMetrics) ReturnToVTPool() { - m.OverflowServices.ReturnToVTPool() - for _, p := range m.Services { - p.ReturnToVTPool() - } -} - // ServiceAggregationKey models the key used to store service specific // aggregation metrics. type ServiceAggregationKey struct { @@ -129,16 +121,6 @@ type ServiceMetrics struct { OverflowGroups Overflow } -// ReturnToVTPool releases the proto resources held by ServiceMetrics. -func (m *ServiceMetrics) ReturnToVTPool() { - if m == nil { - return - } - for _, p := range m.ServiceInstanceGroups { - p.ReturnToVTPool() - } -} - // ServiceInstanceAggregationKey models the key used to store service instance specific // aggregation metrics. type ServiceInstanceAggregationKey struct { @@ -153,25 +135,6 @@ type ServiceInstanceMetrics struct { SpanGroups map[SpanAggregationKey]*aggregationpb.KeyedSpanMetrics } -// ReturnToVTPool returns the proto resources held by ServiceInstanceMetrics. -func (m *ServiceInstanceMetrics) ReturnToVTPool() { - if m == nil { - return - } - for k, p := range m.TransactionGroups { - p.ReturnToVTPool() - m.TransactionGroups[k] = nil - } - for k, p := range m.ServiceTransactionGroups { - p.ReturnToVTPool() - m.ServiceTransactionGroups[k] = nil - } - for k, p := range m.SpanGroups { - p.ReturnToVTPool() - m.SpanGroups[k] = nil - } -} - func insertHash(to **hyperloglog.Sketch, hash uint64) { if *to == nil { *to = hyperloglog.New14() @@ -286,16 +249,6 @@ type Overflow struct { OverflowSpan OverflowSpan } -// ReturnToVTPool releases the prot resources held by Overflow. -func (o *Overflow) ReturnToVTPool() { - if o == nil { - return - } - o.OverflowTransaction.Metrics.ReturnToVTPool() - o.OverflowServiceTransaction.Metrics.ReturnToVTPool() - o.OverflowSpan.Metrics.ReturnToVTPool() -} - // TransactionAggregationKey models the key used to store transaction // aggregation metrics. type TransactionAggregationKey struct { From d20a93baaa96e83832c69c960e0fc5a9a62a6850 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 27 Jul 2023 11:03:07 +0800 Subject: [PATCH 09/13] Create test combined metrics in heirarchy --- aggregators/combined_metrics_test.go | 119 ++++++++++++++++++++------- 1 file changed, 89 insertions(+), 30 deletions(-) diff --git a/aggregators/combined_metrics_test.go b/aggregators/combined_metrics_test.go index 3149d41..2f38245 100644 --- a/aggregators/combined_metrics_test.go +++ b/aggregators/combined_metrics_test.go @@ -212,8 +212,9 @@ func (tcm *TestCombinedMetrics) Get() CombinedMetrics { } type TestServiceMetrics struct { - sk ServiceAggregationKey - tcm *TestCombinedMetrics + sk ServiceAggregationKey + tcm *TestCombinedMetrics + overflow bool // indicates if the service has overflowed to global } func (tcm *TestCombinedMetrics) AddServiceMetrics( @@ -225,10 +226,21 @@ func (tcm *TestCombinedMetrics) AddServiceMetrics( return &TestServiceMetrics{sk: sk, tcm: tcm} } +func (tcm *TestCombinedMetrics) AddServiceMetricsOverflow( + sk ServiceAggregationKey, +) *TestServiceMetrics { + if _, ok := tcm.Services[sk]; ok { + panic("service already added as non overflow") + } + // Does not save to a map, any service instance added to this will + // automatically be overflowed to the global overflow bucket. + return &TestServiceMetrics{sk: sk, tcm: tcm, overflow: true} +} + type TestServiceInstanceMetrics struct { - sk ServiceAggregationKey - sik ServiceInstanceAggregationKey - tcm *TestCombinedMetrics + sik ServiceInstanceAggregationKey + tsm *TestServiceMetrics + overflow bool // indicates if the service instance has overflowed to global } func (tsm *TestServiceMetrics) AddServiceInstanceMetrics( @@ -240,23 +252,49 @@ func (tsm *TestServiceMetrics) AddServiceInstanceMetrics( } return &TestServiceInstanceMetrics{ sik: sik, - sk: tsm.sk, - tcm: tsm.tcm, + tsm: tsm, + } +} + +func (tsm *TestServiceMetrics) AddServiceInstanceMetricsOverflow( + sik ServiceInstanceAggregationKey, +) *TestServiceInstanceMetrics { + if !tsm.overflow { + svc := tsm.tcm.Services[tsm.sk] + if _, ok := svc.ServiceInstanceGroups[sik]; ok { + panic("service instance already added as non overflow") + } + } + // All service instance overflows to global bucket. + hash := Hasher{}. + Chain(tsm.sk.ToProto()). + Chain(sik.ToProto()). + Sum() + insertHash(&tsm.tcm.OverflowServiceInstancesEstimator, hash) + // Does not save to a map, children of service instance will automatically + // overflow to the global overflow bucket. + return &TestServiceInstanceMetrics{ + sik: sik, + tsm: tsm, + overflow: true, } } func (tsim *TestServiceInstanceMetrics) GetProto() *aggregationpb.CombinedMetrics { - return tsim.tcm.GetProto() + return tsim.tsm.tcm.GetProto() } func (tsim *TestServiceInstanceMetrics) Get() CombinedMetrics { - return tsim.tcm.Get() + return tsim.tsm.tcm.Get() } func (tsim *TestServiceInstanceMetrics) AddTransaction( tk TransactionAggregationKey, opts ...TestTransactionOpt, ) *TestServiceInstanceMetrics { + if tsim.overflow { + panic("cannot add transaction to overflowed service transaction") + } cfg := defaultTestTransactionCfg for _, opt := range opts { cfg = opt(cfg) @@ -269,7 +307,7 @@ func (tsim *TestServiceInstanceMetrics) AddTransaction( ktm.Metrics = aggregationpb.TransactionMetricsFromVTPool() ktm.Metrics.Histogram = HistogramToProto(hdr) - svc := tsim.tcm.Services[tsim.sk] + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] if oldKtm, ok := svcIns.TransactionGroups[tk]; ok { mergeKeyedTransactionMetrics(oldKtm, ktm) @@ -293,14 +331,21 @@ func (tsim *TestServiceInstanceMetrics) AddTransactionOverflow( from := aggregationpb.TransactionMetricsFromVTPool() from.Histogram = HistogramToProto(hdr) - svc := tsim.tcm.Services[tsim.sk] - hash := Hasher{}. - Chain(tsim.sk.ToProto()). - Chain(tsim.sik.ToProto()). + sikHasher := Hasher{}. + Chain(tsim.tsm.sk.ToProto()). + Chain(tsim.sik.ToProto()) + hash := sikHasher. Chain(tk.ToProto()). Sum() - svc.OverflowGroups.OverflowTransaction.Merge(from, hash) - tsim.tcm.Services[tsim.sk] = svc + if tsim.tsm.overflow { + // Global overflow + tsim.tsm.tcm.OverflowServices.OverflowTransaction.Merge(from, sikHasher.Sum()) + } else { + // Per service overflow + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] + svc.OverflowGroups.OverflowTransaction.Merge(from, hash) + tsim.tsm.tcm.Services[tsim.tsm.sk] = svc + } return tsim } @@ -321,7 +366,7 @@ func (tsim *TestServiceInstanceMetrics) AddServiceTransaction( kstm.Metrics.Histogram = HistogramToProto(hdr) kstm.Metrics.SuccessCount += float64(cfg.count) - svc := tsim.tcm.Services[tsim.sk] + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] if oldKstm, ok := svcIns.ServiceTransactionGroups[stk]; ok { mergeKeyedServiceTransactionMetrics(oldKstm, kstm) @@ -346,14 +391,21 @@ func (tsim *TestServiceInstanceMetrics) AddServiceTransactionOverflow( from.Histogram = HistogramToProto(hdr) from.SuccessCount += float64(cfg.count) - svc := tsim.tcm.Services[tsim.sk] - hash := Hasher{}. - Chain(tsim.sk.ToProto()). - Chain(tsim.sik.ToProto()). + sikHasher := Hasher{}. + Chain(tsim.tsm.sk.ToProto()). + Chain(tsim.sik.ToProto()) + hash := sikHasher. Chain(stk.ToProto()). Sum() - svc.OverflowGroups.OverflowServiceTransaction.Merge(from, hash) - tsim.tcm.Services[tsim.sk] = svc + if tsim.tsm.overflow { + // Global overflow + tsim.tsm.tcm.OverflowServices.OverflowServiceTransaction.Merge(from, hash) + } else { + // Per service overflow + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] + svc.OverflowGroups.OverflowServiceTransaction.Merge(from, hash) + tsim.tsm.tcm.Services[tsim.tsm.sk] = svc + } return tsim } @@ -372,7 +424,7 @@ func (tsim *TestServiceInstanceMetrics) AddSpan( ksm.Metrics.Sum += float64(cfg.duration * time.Duration(cfg.count)) ksm.Metrics.Count += float64(cfg.count) - svc := tsim.tcm.Services[tsim.sk] + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] svcIns := svc.ServiceInstanceGroups[tsim.sik] if oldKsm, ok := svcIns.SpanGroups[spk]; ok { mergeKeyedSpanMetrics(oldKsm, ksm) @@ -395,13 +447,20 @@ func (tsim *TestServiceInstanceMetrics) AddSpanOverflow( from.Sum += float64(cfg.duration * time.Duration(cfg.count)) from.Count += float64(cfg.count) - svc := tsim.tcm.Services[tsim.sk] - hash := Hasher{}. - Chain(tsim.sk.ToProto()). - Chain(tsim.sik.ToProto()). + sikHasher := Hasher{}. + Chain(tsim.tsm.sk.ToProto()). + Chain(tsim.sik.ToProto()) + hash := sikHasher. Chain(spk.ToProto()). Sum() - svc.OverflowGroups.OverflowSpan.Merge(from, hash) - tsim.tcm.Services[tsim.sk] = svc + if tsim.tsm.overflow { + // Global overflow + tsim.tsm.tcm.OverflowServices.OverflowSpan.Merge(from, hash) + } else { + // Per service overflow + svc := tsim.tsm.tcm.Services[tsim.tsm.sk] + svc.OverflowGroups.OverflowSpan.Merge(from, hash) + tsim.tsm.tcm.Services[tsim.tsm.sk] = svc + } return tsim } From d955f419c4b25e2890cdaaa05a900994d778e271 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 27 Jul 2023 12:30:41 +0800 Subject: [PATCH 10/13] Fix all merger tests --- aggregators/codec.go | 2 +- aggregators/combined_metrics_test.go | 110 +------- aggregators/merger_test.go | 399 +++++++++++++++++---------- 3 files changed, 264 insertions(+), 247 deletions(-) diff --git a/aggregators/codec.go b/aggregators/codec.go index ccf4813..92c7923 100644 --- a/aggregators/codec.go +++ b/aggregators/codec.go @@ -91,7 +91,7 @@ func (m *CombinedMetrics) ToProto() *aggregationpb.CombinedMetrics { ksm.Metrics = m.ToProto() pb.ServiceMetrics = append(pb.ServiceMetrics, ksm) } - if pb.OverflowServiceInstancesEstimator != nil { + if m.OverflowServiceInstancesEstimator != nil { pb.OverflowServices = m.OverflowServices.ToProto() pb.OverflowServiceInstancesEstimator = hllBytes(m.OverflowServiceInstancesEstimator) } diff --git a/aggregators/combined_metrics_test.go b/aggregators/combined_metrics_test.go index 2f38245..7ab7df8 100644 --- a/aggregators/combined_metrics_test.go +++ b/aggregators/combined_metrics_test.go @@ -107,99 +107,6 @@ func NewTestCombinedMetrics(opts ...TestCombinedMetricsOpt) *TestCombinedMetrics return (*TestCombinedMetrics)(&cm) } -func (tcm *TestCombinedMetrics) AddGlobalTransactionOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - tk TransactionAggregationKey, - opts ...TestTransactionOpt, -) *TestCombinedMetrics { - cfg := defaultTestTransactionCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - hdr := hdrhistogram.New() - hdr.RecordDuration(cfg.duration, float64(cfg.count)) - from := aggregationpb.TransactionMetricsFromVTPool() - from.Histogram = HistogramToProto(hdr) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(tk.ToProto()). - Sum() - tcm.OverflowServices.OverflowTransaction.Merge(from, hash) - insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tcm -} - -func (tcm *TestCombinedMetrics) AddGlobalServiceTransactionOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - stk ServiceTransactionAggregationKey, - opts ...TestTransactionOpt, -) *TestCombinedMetrics { - cfg := defaultTestTransactionCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - hdr := hdrhistogram.New() - hdr.RecordDuration(cfg.duration, float64(cfg.count)) - from := aggregationpb.ServiceTransactionMetricsFromVTPool() - from.Histogram = HistogramToProto(hdr) - from.SuccessCount += float64(cfg.count) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(stk.ToProto()). - Sum() - tcm.OverflowServices.OverflowServiceTransaction.Merge(from, hash) - insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tcm -} - -func (tcm *TestCombinedMetrics) AddGlobalSpanOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, - spk SpanAggregationKey, - opts ...TestSpanOpt, -) *TestCombinedMetrics { - cfg := defaultTestSpanCfg - for _, opt := range opts { - cfg = opt(cfg) - } - - from := aggregationpb.SpanMetricsFromVTPool() - from.Sum += float64(cfg.duration * time.Duration(cfg.count)) - from.Count += float64(cfg.count) - - sikHasher := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()) - hash := sikHasher. - Chain(spk.ToProto()). - Sum() - tcm.OverflowServices.OverflowSpan.Merge(from, hash) - insertHash(&tcm.OverflowServiceInstancesEstimator, sikHasher.Sum()) - return tcm -} - -func (tcm *TestCombinedMetrics) AddGlobalServiceInstanceOverflow( - sk ServiceAggregationKey, - sik ServiceInstanceAggregationKey, -) *TestCombinedMetrics { - hash := Hasher{}. - Chain(sk.ToProto()). - Chain(sik.ToProto()). - Sum() - insertHash(&tcm.OverflowServiceInstancesEstimator, hash) - return tcm -} - func (tcm *TestCombinedMetrics) GetProto() *aggregationpb.CombinedMetrics { cm := (*CombinedMetrics)(tcm) cmproto := cm.ToProto() @@ -331,15 +238,14 @@ func (tsim *TestServiceInstanceMetrics) AddTransactionOverflow( from := aggregationpb.TransactionMetricsFromVTPool() from.Histogram = HistogramToProto(hdr) - sikHasher := Hasher{}. + hash := Hasher{}. Chain(tsim.tsm.sk.ToProto()). - Chain(tsim.sik.ToProto()) - hash := sikHasher. + Chain(tsim.sik.ToProto()). Chain(tk.ToProto()). Sum() if tsim.tsm.overflow { // Global overflow - tsim.tsm.tcm.OverflowServices.OverflowTransaction.Merge(from, sikHasher.Sum()) + tsim.tsm.tcm.OverflowServices.OverflowTransaction.Merge(from, hash) } else { // Per service overflow svc := tsim.tsm.tcm.Services[tsim.tsm.sk] @@ -391,10 +297,9 @@ func (tsim *TestServiceInstanceMetrics) AddServiceTransactionOverflow( from.Histogram = HistogramToProto(hdr) from.SuccessCount += float64(cfg.count) - sikHasher := Hasher{}. + hash := Hasher{}. Chain(tsim.tsm.sk.ToProto()). - Chain(tsim.sik.ToProto()) - hash := sikHasher. + Chain(tsim.sik.ToProto()). Chain(stk.ToProto()). Sum() if tsim.tsm.overflow { @@ -447,10 +352,9 @@ func (tsim *TestServiceInstanceMetrics) AddSpanOverflow( from.Sum += float64(cfg.duration * time.Duration(cfg.count)) from.Count += float64(cfg.count) - sikHasher := Hasher{}. + hash := Hasher{}. Chain(tsim.tsm.sk.ToProto()). - Chain(tsim.sik.ToProto()) - hash := sikHasher. + Chain(tsim.sik.ToProto()). Chain(spk.ToProto()). Sum() if tsim.tsm.overflow { diff --git a/aggregators/merger_test.go b/aggregators/merger_test.go index ff1dcfb..3ef0534 100644 --- a/aggregators/merger_test.go +++ b/aggregators/merger_test.go @@ -448,57 +448,67 @@ func TestMerge(t *testing.T) { }, to: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(14)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). AddServiceTransaction( ServiceTransactionAggregationKey{TransactionType: "type1"}, WithTransactionCount(7)). AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(7)). + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(7)). Get() }, from: func() *aggregationpb.CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(10)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(5)). AddServiceTransaction( ServiceTransactionAggregationKey{TransactionType: "type1"}, WithTransactionCount(5)). AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(5)). + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(5)). GetProto() }, expected: func() CombinedMetrics { - return NewTestCombinedMetrics(WithEventsTotal(24)). - AddGlobalTransactionOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, - ServiceInstanceAggregationKey{}, - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(5)). - AddGlobalServiceTransactionOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, - ServiceInstanceAggregationKey{}, - ServiceTransactionAggregationKey{TransactionType: "type1"}, - WithTransactionCount(5)). - AddGlobalSpanOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, - ServiceInstanceAggregationKey{}, - SpanAggregationKey{SpanName: "span1"}, - WithSpanCount(5)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + tcm := NewTestCombinedMetrics(WithEventsTotal(24)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddSpan(SpanAggregationKey{SpanName: "span1"}, WithSpanCount(7)). AddServiceTransaction( ServiceTransactionAggregationKey{TransactionType: "type1"}, WithTransactionCount(7)). AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(7)). - Get() + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(7)) + // svc2 overflows + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow(ServiceInstanceAggregationKey{}). + AddTransactionOverflow( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(5)). + AddServiceTransactionOverflow( + ServiceTransactionAggregationKey{TransactionType: "type1"}, + WithTransactionCount(5)). + AddSpanOverflow( + SpanAggregationKey{SpanName: "span1"}, WithSpanCount(5)) + return tcm.Get() }, }, { @@ -515,25 +525,29 @@ func TestMerge(t *testing.T) { }, to: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(111)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). Get() }, from: func() *aggregationpb.CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(222)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). GetProto() }, expected: func() CombinedMetrics { - return NewTestCombinedMetrics(WithEventsTotal(333)). - AddGlobalServiceInstanceOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, - ServiceInstanceAggregationKey{}, - ). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). - Get() + tcm := NewTestCombinedMetrics(WithEventsTotal(333)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow(ServiceInstanceAggregationKey{}) + return tcm.Get() }, }, { @@ -641,24 +655,33 @@ func TestMerge(t *testing.T) { }, to: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(1)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). Get() }, from: func() *aggregationpb.CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(2)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). GetProto() }, expected: func() CombinedMetrics { - return NewTestCombinedMetrics(WithEventsTotal(3)). - AddGlobalServiceInstanceOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, - ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). - Get() + tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + return tcm.Get() }, }, { @@ -673,111 +696,195 @@ func TestMerge(t *testing.T) { MaxServices: 1, MaxServiceInstanceGroupsPerService: 1, }, + to: func() CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(1)). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + Get() + }, + from: func() *aggregationpb.CombinedMetrics { + return NewTestCombinedMetrics(WithEventsTotal(2)). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + GetProto() + }, + expected: func() CombinedMetrics { + tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + return tcm.Get() + }, + }, + { + name: "service_instance_overflow_per_svc_on_metrics", + limits: Limits{ + MaxSpanGroups: 100, + MaxSpanGroupsPerService: 100, + MaxTransactionGroups: 100, + MaxTransactionGroupsPerService: 100, + MaxServiceTransactionGroups: 100, + MaxServiceTransactionGroupsPerService: 100, + MaxServices: 1, + MaxServiceInstanceGroupsPerService: 1, + }, to: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(1)). AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(1)). Get() }, from: func() *aggregationpb.CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(2)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddTransaction( + TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, + WithTransactionCount(2)). GetProto() }, expected: func() CombinedMetrics { - return NewTestCombinedMetrics(WithEventsTotal(3)). - AddGlobalServiceInstanceOverflow( - ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}, + tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + tsm := tcm. + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) + tsm. + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddTransaction( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(1)) + tsm. + AddServiceInstanceMetricsOverflow( ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). - Get() + AddTransactionOverflow( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(2)) + return tcm.Get() + }, + }, + { + name: "service_instance_overflow_global_merge", + limits: Limits{ + MaxSpanGroups: 100, + MaxSpanGroupsPerService: 100, + MaxTransactionGroups: 100, + MaxTransactionGroupsPerService: 100, + MaxServiceTransactionGroups: 100, + MaxServiceTransactionGroupsPerService: 100, + MaxServices: 1, + MaxServiceInstanceGroupsPerService: 1, + }, + to: func() CombinedMetrics { + tcm := NewTestCombinedMetrics(WithEventsTotal(1)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddTransaction( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(1)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc3"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "3"}) + return tcm.Get() + }, + from: func() *aggregationpb.CombinedMetrics { + tcm := NewTestCombinedMetrics(WithEventsTotal(2)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddTransaction( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(2)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "3"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc3"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "3"}) + return tcm.GetProto() + }, + expected: func() CombinedMetrics { + tcm := NewTestCombinedMetrics(WithEventsTotal(3)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics( + ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). + AddTransaction( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(1)) + tcm. + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). + AddTransactionOverflow( + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(2)) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "3"}) + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc3"}). + AddServiceInstanceMetricsOverflow( + ServiceInstanceAggregationKey{GlobalLabelsStr: "3"}) + return tcm.Get() }, }, - // { - // name: "service_instance_overflow_per_svc_on_metrics", - // limits: Limits{ - // MaxSpanGroups: 100, - // MaxSpanGroupsPerService: 100, - // MaxTransactionGroups: 100, - // MaxTransactionGroupsPerService: 100, - // MaxServiceTransactionGroups: 100, - // MaxServiceTransactionGroupsPerService: 100, - // MaxServices: 1, - // MaxServiceInstanceGroupsPerService: 1, - // }, - // to: func() CombinedMetrics { - // return NewTestCombinedMetrics(WithEventsTotal(1)). - // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). - // AddTransaction( - // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - // WithTransactionCount(1)). - // Get() - // }, - // from: func() *aggregationpb.CombinedMetrics { - // return NewTestCombinedMetrics(WithEventsTotal(2)). - // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). - // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - // AddTransaction( - // TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - // WithTransactionCount(2)). - // GetProto() - // }, - // expected: func() CombinedMetrics { - // tcm := NewTestCombinedMetrics(WithEventsTotal(3)) - // tsm := tcm. - // AddGlobalServiceInstanceOverflow( - // ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}, - // ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - // AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}) - // tsm. - // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "1"}). - // AddTransaction(TransactionAggregationKey{ - // TransactionName: "txn1", - // TransactionType: "type1", - // }, WithTransactionCount(1)) - // tsm. - // AddServiceInstanceMetrics(ServiceInstanceAggregationKey{GlobalLabelsStr: "2"}). - // AddTransactionOverflow(TransactionAggregationKey{ - // TransactionName: "txn1", - // TransactionType: "type1", - // }, WithTransactionCount(2)) - // return tcm.Get() - // }, - // }, - //{ - // name: "service_instance_overflow_global_merge", - // limits: Limits{ - // MaxSpanGroups: 100, - // MaxSpanGroupsPerService: 100, - // MaxTransactionGroups: 100, - // MaxTransactionGroupsPerService: 100, - // MaxServiceTransactionGroups: 100, - // MaxServiceTransactionGroupsPerService: 100, - // MaxServices: 1, - // MaxServiceInstanceGroupsPerService: 1, - // }, - // to: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(1)). - // addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). - // addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). - // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - // ), - // from: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(2)). - // addTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - // addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). - // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - // ), - // expected: CombinedMetrics(*createTestCombinedMetrics(withEventsTotal(3)). - // addTransaction(ts, "svc1", "1", testTransaction{txnName: "txn1", txnType: "type1", count: 1}). - // addGlobalServiceOverflowTransaction(ts, "svc2", "2", testTransaction{txnName: "txn1", txnType: "type1", count: 2}). - // addGlobalServiceOverflowServiceInstance(ts, "svc2", "2"). - // addGlobalServiceOverflowServiceInstance(ts, "svc2", "3"). - // addGlobalServiceOverflowServiceInstance(ts, "svc1", "2"). - // addGlobalServiceOverflowServiceInstance(ts, "svc3", "3"), - // ), - //}, { name: "merge_with_empty_combined_metrics", limits: Limits{ @@ -791,11 +898,14 @@ func TestMerge(t *testing.T) { }, to: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(7)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(7)). + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(7)). AddServiceTransaction( ServiceTransactionAggregationKey{TransactionType: "type1"}, WithTransactionCount(7)). @@ -806,11 +916,14 @@ func TestMerge(t *testing.T) { }, expected: func() CombinedMetrics { return NewTestCombinedMetrics(WithEventsTotal(8)). - AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceMetrics( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}). AddTransaction( - TransactionAggregationKey{TransactionName: "txn1", TransactionType: "type1"}, - WithTransactionCount(7)). + TransactionAggregationKey{ + TransactionName: "txn1", + TransactionType: "type1", + }, WithTransactionCount(7)). AddServiceTransaction( ServiceTransactionAggregationKey{TransactionType: "type1"}, WithTransactionCount(7)). From c9daa9df3e4512ac1cb8cc9d6500669f070d8464 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 27 Jul 2023 14:05:08 +0800 Subject: [PATCH 11/13] Fix converter tests --- aggregators/converter_test.go | 662 ++++++++++++++++++---------------- 1 file changed, 349 insertions(+), 313 deletions(-) diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go index d397373..fa076fa 100644 --- a/aggregators/converter_test.go +++ b/aggregators/converter_test.go @@ -6,10 +6,12 @@ package aggregators import ( "fmt" + "net/netip" "testing" "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "google.golang.org/protobuf/testing/protocmp" @@ -18,6 +20,7 @@ import ( "github.com/elastic/apm-aggregation/aggregationpb" "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" + "github.com/elastic/apm-aggregation/aggregators/nullable" "github.com/elastic/apm-data/model/modelpb" ) @@ -264,130 +267,156 @@ func TestEventToCombinedMetrics(t *testing.T) { } } -// func TestCombinedMetricsToBatch(t *testing.T) { -// ts := time.Now() -// aggIvl := time.Minute -// processingTime := ts.Truncate(aggIvl) -// svcName := "test" -// coldstart := true -// var ( -// faas = &modelpb.Faas{Id: "f1", ColdStart: &coldstart, Version: "v2", TriggerType: "http"} -// txn = testTransaction{txnName: "txn", txnType: "typ", count: 100} -// txnFaas = testTransaction{txnName: "txn", txnType: "typ", count: 100, faas: faas} -// svcTxn = testServiceTransaction{txnType: "typ", count: 100} -// span = testSpan{spanName: "spn", destinationResource: "postgresql", count: 1} -// overflowTxn = testTransaction{txnName: "_other", count: 100} -// overflowSvcTxn = testServiceTransaction{txnType: "_other", count: 100} -// overflowSpan = testSpan{targetName: "_other", count: 1} -// ) -// for _, tc := range []struct { -// name string -// aggregationInterval time.Duration -// combinedMetrics CombinedMetrics -// expectedEvents modelpb.Batch -// }{ -// { -// name: "no_overflow_without_faas", -// aggregationInterval: aggIvl, -// combinedMetrics: CombinedMetrics( -// *createTestCombinedMetrics(). -// addTransaction(ts, svcName, "", txn). -// addServiceTransaction(ts, svcName, "", svcTxn). -// addSpan(ts, svcName, "", span), -// ), -// expectedEvents: []*modelpb.APMEvent{ -// createTestTransactionMetric(ts, aggIvl, svcName, txn, 0), -// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), -// createTestSpanMetric(ts, aggIvl, svcName, span, 0), -// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), -// }, -// }, -// { -// name: "no_overflow", -// aggregationInterval: aggIvl, -// combinedMetrics: CombinedMetrics( -// *createTestCombinedMetrics(). -// addTransaction(ts, svcName, "", txnFaas). -// addServiceTransaction(ts, svcName, "", svcTxn). -// addSpan(ts, svcName, "", span), -// ), -// expectedEvents: []*modelpb.APMEvent{ -// createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), -// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), -// createTestSpanMetric(ts, aggIvl, svcName, span, 0), -// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), -// }, -// }, -// { -// name: "overflow", -// aggregationInterval: aggIvl, -// combinedMetrics: CombinedMetrics( -// *createTestCombinedMetrics(). -// addTransaction(ts, svcName, "", txnFaas). -// addServiceTransaction(ts, svcName, "", svcTxn). -// addSpan(ts, svcName, "", span). -// addPerServiceOverflowTransaction(ts, svcName, "", txn). -// addPerServiceOverflowServiceTransaction(ts, svcName, "", svcTxn). -// addPerServiceOverflowSpan(ts, svcName, "", span). -// addGlobalServiceOverflowServiceInstance(ts, "overflow", ""), -// ), -// expectedEvents: []*modelpb.APMEvent{ -// createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, 0), -// createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, 0), -// createTestSpanMetric(ts, aggIvl, svcName, span, 0), -// createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), -// // Events due to overflow -// createTestTransactionMetric(processingTime, aggIvl, svcName, overflowTxn, 1), -// createTestServiceTransactionMetric(processingTime, aggIvl, svcName, overflowSvcTxn, 1), -// createTestSpanMetric(processingTime, aggIvl, svcName, overflowSpan, 1), -// createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 1), -// }, -// }, -// { -// name: "service_instance_overflow_in_global_and_per_svc", -// aggregationInterval: aggIvl, -// combinedMetrics: CombinedMetrics( -// *createTestCombinedMetrics(). -// addServiceInstance(ts, "svc1", ""). -// addGlobalServiceOverflowServiceInstance(ts, "svc1", "1"). -// addGlobalServiceOverflowServiceInstance(ts, "svc2", "1"), -// ), -// expectedEvents: []*modelpb.APMEvent{ -// createTestServiceSummaryMetric(ts, aggIvl, "svc1", 0), -// createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 2), -// }, -// }, -// } { -// t.Run(tc.name, func(t *testing.T) { -// b, err := CombinedMetricsToBatch( -// tc.combinedMetrics, -// processingTime, -// tc.aggregationInterval, -// ) -// assert.NoError(t, err) -// assert.Empty(t, cmp.Diff( -// tc.expectedEvents, *b, -// cmpopts.IgnoreTypes(netip.Addr{}), -// cmpopts.SortSlices(func(e1, e2 *modelpb.APMEvent) bool { -// m1Name := e1.GetMetricset().GetName() -// m2Name := e2.GetMetricset().GetName() -// if m1Name != m2Name { -// return m1Name < m2Name -// } -// -// a1Name := e1.GetAgent().GetName() -// a2Name := e2.GetAgent().GetName() -// if a1Name != a2Name { -// return a1Name < a2Name -// } -// -// return e1.GetService().GetEnvironment() < e2.GetService().GetEnvironment() -// }), -// protocmp.Transform(), -// )) -// }) -// } -// } +func TestCombinedMetricsToBatch(t *testing.T) { + ts := time.Now() + aggIvl := time.Minute + processingTime := ts.Truncate(aggIvl) + svcName := "test" + coldstart := nullable.True + var ( + svc = ServiceAggregationKey{Timestamp: ts, ServiceName: svcName} + svcIns = ServiceInstanceAggregationKey{} + faas = &modelpb.Faas{Id: "f1", ColdStart: coldstart.ToBoolPtr(), Version: "v2", TriggerType: "http"} + span = SpanAggregationKey{SpanName: "spn", Resource: "postgresql"} + overflowSpan = SpanAggregationKey{TargetName: "_other"} + spanCount = 1 + svcTxn = ServiceTransactionAggregationKey{TransactionType: "typ"} + overflowSvcTxn = ServiceTransactionAggregationKey{TransactionType: "_other"} + txn = TransactionAggregationKey{TransactionName: "txn", TransactionType: "typ"} + txnFaas = TransactionAggregationKey{TransactionName: "txn", TransactionType: "typ", + FAASID: faas.Id, FAASColdstart: coldstart, FAASVersion: faas.Version, FAASTriggerType: faas.TriggerType} + overflowTxn = TransactionAggregationKey{TransactionName: "_other"} + txnCount = 100 + ) + for _, tc := range []struct { + name string + aggregationInterval time.Duration + combinedMetrics func() CombinedMetrics + expectedEvents modelpb.Batch + }{ + { + name: "no_overflow_without_faas", + aggregationInterval: aggIvl, + combinedMetrics: func() CombinedMetrics { + return NewTestCombinedMetrics(). + AddServiceMetrics(svc). + AddServiceInstanceMetrics(svcIns). + AddSpan(span, WithSpanCount(spanCount)). + AddTransaction(txn, WithTransactionCount(txnCount)). + AddServiceTransaction(svcTxn, WithTransactionCount(txnCount)). + Get() + }, + expectedEvents: []*modelpb.APMEvent{ + createTestTransactionMetric(ts, aggIvl, svcName, txn, nil, txnCount, 0), + createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, txnCount, 0), + createTestSpanMetric(ts, aggIvl, svcName, span, spanCount, 0), + createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), + }, + }, + { + name: "no_overflow", + aggregationInterval: aggIvl, + combinedMetrics: func() CombinedMetrics { + return NewTestCombinedMetrics(). + AddServiceMetrics(svc). + AddServiceInstanceMetrics(svcIns). + AddSpan(span, WithSpanCount(spanCount)). + AddTransaction(txnFaas, WithTransactionCount(txnCount)). + AddServiceTransaction(svcTxn, WithTransactionCount(txnCount)). + Get() + }, + expectedEvents: []*modelpb.APMEvent{ + createTestTransactionMetric(ts, aggIvl, svcName, txn, faas, txnCount, 0), + createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, txnCount, 0), + createTestSpanMetric(ts, aggIvl, svcName, span, spanCount, 0), + createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), + }, + }, + { + name: "overflow", + aggregationInterval: aggIvl, + combinedMetrics: func() CombinedMetrics { + tcm := NewTestCombinedMetrics() + tcm. + AddServiceMetrics(svc). + AddServiceInstanceMetrics(svcIns). + AddSpan(span, WithSpanCount(spanCount)). + AddTransaction(txnFaas, WithTransactionCount(txnCount)). + AddServiceTransaction(svcTxn, WithTransactionCount(txnCount)). + AddTransactionOverflow(txn, WithTransactionCount(txnCount)). + AddServiceTransactionOverflow(svcTxn, WithTransactionCount(txnCount)). + AddSpanOverflow(span, WithSpanCount(spanCount)) + // Add global service overflow + tcm. + AddServiceMetricsOverflow( + ServiceAggregationKey{Timestamp: ts, ServiceName: "svc_overflow"}). + AddServiceInstanceMetricsOverflow(ServiceInstanceAggregationKey{}) + return tcm.Get() + }, + expectedEvents: []*modelpb.APMEvent{ + createTestTransactionMetric(ts, aggIvl, svcName, txnFaas, faas, txnCount, 0), + createTestServiceTransactionMetric(ts, aggIvl, svcName, svcTxn, txnCount, 0), + createTestSpanMetric(ts, aggIvl, svcName, span, spanCount, 0), + createTestServiceSummaryMetric(ts, aggIvl, svcName, 0), + // Events due to overflow + createTestTransactionMetric(processingTime, aggIvl, svcName, overflowTxn, nil, txnCount, 1), + createTestServiceTransactionMetric(processingTime, aggIvl, svcName, overflowSvcTxn, txnCount, 1), + createTestSpanMetric(processingTime, aggIvl, svcName, overflowSpan, spanCount, 1), + createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 1), + }, + }, + { + name: "service_instance_overflow_in_global_and_per_svc", + aggregationInterval: aggIvl, + combinedMetrics: func() CombinedMetrics { + tcm := NewTestCombinedMetrics() + tcm. + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetrics(ServiceInstanceAggregationKey{}) + tcm. + AddServiceMetrics(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc1"}). + AddServiceInstanceMetricsOverflow(ServiceInstanceAggregationKey{GlobalLabelsStr: getTestGlobalLabelsStr(t, "1")}) + tcm. + AddServiceMetricsOverflow(ServiceAggregationKey{Timestamp: ts, ServiceName: "svc2"}). + AddServiceInstanceMetricsOverflow(ServiceInstanceAggregationKey{GlobalLabelsStr: getTestGlobalLabelsStr(t, "2")}) + return tcm.Get() + }, + expectedEvents: []*modelpb.APMEvent{ + createTestServiceSummaryMetric(ts, aggIvl, "svc1", 0), + createTestServiceSummaryMetric(processingTime, aggIvl, "_other", 2), + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + b, err := CombinedMetricsToBatch( + tc.combinedMetrics(), + processingTime, + tc.aggregationInterval, + ) + assert.NoError(t, err) + assert.Empty(t, cmp.Diff( + tc.expectedEvents, *b, + cmpopts.IgnoreTypes(netip.Addr{}), + cmpopts.SortSlices(func(e1, e2 *modelpb.APMEvent) bool { + m1Name := e1.GetMetricset().GetName() + m2Name := e2.GetMetricset().GetName() + if m1Name != m2Name { + return m1Name < m2Name + } + + a1Name := e1.GetAgent().GetName() + a2Name := e2.GetAgent().GetName() + if a1Name != a2Name { + return a1Name < a2Name + } + + return e1.GetService().GetEnvironment() < e2.GetService().GetEnvironment() + }), + protocmp.Transform(), + )) + }) + } +} func BenchmarkCombinedMetricsToBatch(b *testing.B) { ai := time.Hour @@ -458,192 +487,199 @@ func BenchmarkEventToCombinedMetrics(b *testing.B) { } } -// -// func createTestServiceSummaryMetric( -// ts time.Time, -// ivl time.Duration, -// svcName string, -// overflowCount int, -// ) *modelpb.APMEvent { -// var metricsetSamples []*modelpb.MetricsetSample -// if overflowCount > 0 { -// metricsetSamples = []*modelpb.MetricsetSample{ -// { -// Name: "service_summary.aggregation.overflow_count", -// Value: float64(overflowCount), -// }, -// } -// } -// return &modelpb.APMEvent{ -// Timestamp: timestamppb.New(ts), -// Metricset: &modelpb.Metricset{ -// Name: "service_summary", -// Samples: metricsetSamples, -// Interval: formatDuration(ivl), -// }, -// Service: &modelpb.Service{Name: svcName}, -// } -// } -// -// func createTestTransactionMetric( -// ts time.Time, -// ivl time.Duration, -// svcName string, -// txn testTransaction, -// overflowCount int, -// ) *modelpb.APMEvent { -// histRep := hdrhistogram.New() -// for i := 0; i < txn.count; i++ { -// histRep.RecordDuration(time.Second, 1) -// } -// -// total, counts, values := histRep.Buckets() -// var eventSuccessSummary modelpb.SummaryMetric -// switch txn.eventOutcome { -// case "success": -// eventSuccessSummary.Count = total -// eventSuccessSummary.Sum = float64(total) -// case "failure": -// eventSuccessSummary.Count = total -// case "unknown": -// // Keep both Count and Sum as 0. -// } -// transactionDurationSummary := &modelpb.SummaryMetric{ -// Count: total, -// // only 1 expected element -// Sum: values[0] * float64(counts[0]), -// } -// var metricsetSamples []*modelpb.MetricsetSample -// if overflowCount > 0 { -// metricsetSamples = []*modelpb.MetricsetSample{ -// { -// Name: "transaction.aggregation.overflow_count", -// Value: float64(overflowCount), -// }, -// } -// } -// return &modelpb.APMEvent{ -// Timestamp: timestamppb.New(ts), -// Metricset: &modelpb.Metricset{ -// Name: "transaction", -// Interval: formatDuration(ivl), -// Samples: metricsetSamples, -// DocCount: total, -// }, -// Service: &modelpb.Service{Name: svcName}, -// Transaction: &modelpb.Transaction{ -// Name: txn.txnName, -// Type: txn.txnType, -// DurationHistogram: &modelpb.Histogram{ -// Counts: counts, -// Values: values, -// }, -// DurationSummary: transactionDurationSummary, -// }, -// Faas: txn.faas, -// Event: &modelpb.Event{ -// SuccessCount: &eventSuccessSummary, -// }, -// } -// } -// -// func createTestServiceTransactionMetric( -// ts time.Time, -// ivl time.Duration, -// svcName string, -// svcTxn testServiceTransaction, -// overflowCount int, -// ) *modelpb.APMEvent { -// histRep := hdrhistogram.New() -// for i := 0; i < svcTxn.count; i++ { -// histRep.RecordDuration(time.Second, 1) -// } -// total, counts, values := histRep.Buckets() -// transactionDurationSummary := &modelpb.SummaryMetric{ -// Count: total, -// // only 1 expected element -// Sum: values[0] * float64(counts[0]), -// } -// var metricsetSamples []*modelpb.MetricsetSample -// if overflowCount > 0 { -// metricsetSamples = []*modelpb.MetricsetSample{ -// { -// Name: "service_transaction.aggregation.overflow_count", -// Value: float64(overflowCount), -// }, -// } -// } -// return &modelpb.APMEvent{ -// Timestamp: timestamppb.New(ts), -// Metricset: &modelpb.Metricset{ -// Name: "service_transaction", -// Interval: formatDuration(ivl), -// Samples: metricsetSamples, -// DocCount: total, -// }, -// Service: &modelpb.Service{Name: svcName}, -// Transaction: &modelpb.Transaction{ -// Type: svcTxn.txnType, -// DurationHistogram: &modelpb.Histogram{ -// Counts: counts, -// Values: values, -// }, -// DurationSummary: transactionDurationSummary, -// }, -// Event: &modelpb.Event{ -// SuccessCount: &modelpb.SummaryMetric{ -// // test code generates all success events -// Count: int64(svcTxn.count), -// Sum: float64(svcTxn.count), -// }, -// }, -// } -// } -// -// func createTestSpanMetric( -// ts time.Time, -// ivl time.Duration, -// svcName string, -// span testSpan, -// overflowCount int, -// ) *modelpb.APMEvent { -// var metricsetSamples []*modelpb.MetricsetSample -// if overflowCount > 0 { -// metricsetSamples = []*modelpb.MetricsetSample{ -// { -// Name: "service_destination.aggregation.overflow_count", -// Value: float64(overflowCount), -// }, -// } -// } -// var target *modelpb.ServiceTarget -// if span.targetName != "" { -// target = &modelpb.ServiceTarget{ -// Name: span.targetName, -// } -// } -// return &modelpb.APMEvent{ -// Timestamp: timestamppb.New(ts), -// Metricset: &modelpb.Metricset{ -// Name: "service_destination", -// Interval: formatDuration(ivl), -// Samples: metricsetSamples, -// DocCount: int64(span.count), -// }, -// Service: &modelpb.Service{ -// Name: svcName, -// Target: target, -// }, -// Span: &modelpb.Span{ -// Name: span.spanName, -// DestinationService: &modelpb.DestinationService{ -// Resource: span.destinationResource, -// ResponseTime: &modelpb.AggregatedDuration{ -// // test code generates 1 count for 1 ns -// Count: int64(span.count), -// Sum: durationpb.New(time.Duration(span.count)), -// }, -// }, -// }, -// } -// } +func createTestServiceSummaryMetric( + ts time.Time, + ivl time.Duration, + svcName string, + overflowCount int, +) *modelpb.APMEvent { + var metricsetSamples []*modelpb.MetricsetSample + if overflowCount > 0 { + metricsetSamples = []*modelpb.MetricsetSample{ + { + Name: "service_summary.aggregation.overflow_count", + Value: float64(overflowCount), + }, + } + } + return &modelpb.APMEvent{ + Timestamp: timestamppb.New(ts), + Metricset: &modelpb.Metricset{ + Name: "service_summary", + Samples: metricsetSamples, + Interval: formatDuration(ivl), + }, + Service: &modelpb.Service{Name: svcName}, + } +} + +func createTestTransactionMetric( + ts time.Time, + ivl time.Duration, + svcName string, + txn TransactionAggregationKey, + faas *modelpb.Faas, + count, overflowCount int, +) *modelpb.APMEvent { + histRep := hdrhistogram.New() + histRep.RecordDuration(time.Second, float64(count)) + total, counts, values := histRep.Buckets() + var eventSuccessSummary modelpb.SummaryMetric + switch txn.EventOutcome { + case "success": + eventSuccessSummary.Count = total + eventSuccessSummary.Sum = float64(total) + case "failure": + eventSuccessSummary.Count = total + case "unknown": + // Keep both Count and Sum as 0. + } + transactionDurationSummary := &modelpb.SummaryMetric{ + Count: total, + // only 1 expected element + Sum: values[0] * float64(counts[0]), + } + var metricsetSamples []*modelpb.MetricsetSample + if overflowCount > 0 { + metricsetSamples = []*modelpb.MetricsetSample{ + { + Name: "transaction.aggregation.overflow_count", + Value: float64(overflowCount), + }, + } + } + return &modelpb.APMEvent{ + Timestamp: timestamppb.New(ts), + Metricset: &modelpb.Metricset{ + Name: "transaction", + Interval: formatDuration(ivl), + Samples: metricsetSamples, + DocCount: total, + }, + Service: &modelpb.Service{Name: svcName}, + Transaction: &modelpb.Transaction{ + Name: txn.TransactionName, + Type: txn.TransactionType, + DurationHistogram: &modelpb.Histogram{ + Counts: counts, + Values: values, + }, + DurationSummary: transactionDurationSummary, + }, + Faas: faas, + Event: &modelpb.Event{ + SuccessCount: &eventSuccessSummary, + }, + } +} + +func createTestServiceTransactionMetric( + ts time.Time, + ivl time.Duration, + svcName string, + svcTxn ServiceTransactionAggregationKey, + count, overflowCount int, +) *modelpb.APMEvent { + histRep := hdrhistogram.New() + histRep.RecordDuration(time.Second, float64(count)) + total, counts, values := histRep.Buckets() + transactionDurationSummary := &modelpb.SummaryMetric{ + Count: total, + // only 1 expected element + Sum: values[0] * float64(counts[0]), + } + var metricsetSamples []*modelpb.MetricsetSample + if overflowCount > 0 { + metricsetSamples = []*modelpb.MetricsetSample{ + { + Name: "service_transaction.aggregation.overflow_count", + Value: float64(overflowCount), + }, + } + } + return &modelpb.APMEvent{ + Timestamp: timestamppb.New(ts), + Metricset: &modelpb.Metricset{ + Name: "service_transaction", + Interval: formatDuration(ivl), + Samples: metricsetSamples, + DocCount: total, + }, + Service: &modelpb.Service{Name: svcName}, + Transaction: &modelpb.Transaction{ + Type: svcTxn.TransactionType, + DurationHistogram: &modelpb.Histogram{ + Counts: counts, + Values: values, + }, + DurationSummary: transactionDurationSummary, + }, + Event: &modelpb.Event{ + SuccessCount: &modelpb.SummaryMetric{ + // test code generates all success events + Count: int64(count), + Sum: float64(count), + }, + }, + } +} + +func createTestSpanMetric( + ts time.Time, + ivl time.Duration, + svcName string, + span SpanAggregationKey, + count, overflowCount int, +) *modelpb.APMEvent { + var metricsetSamples []*modelpb.MetricsetSample + if overflowCount > 0 { + metricsetSamples = []*modelpb.MetricsetSample{ + { + Name: "service_destination.aggregation.overflow_count", + Value: float64(overflowCount), + }, + } + } + var target *modelpb.ServiceTarget + if span.TargetName != "" { + target = &modelpb.ServiceTarget{ + Name: span.TargetName, + } + } + return &modelpb.APMEvent{ + Timestamp: timestamppb.New(ts), + Metricset: &modelpb.Metricset{ + Name: "service_destination", + Interval: formatDuration(ivl), + Samples: metricsetSamples, + DocCount: int64(count), + }, + Service: &modelpb.Service{ + Name: svcName, + Target: target, + }, + Span: &modelpb.Span{ + Name: span.SpanName, + DestinationService: &modelpb.DestinationService{ + Resource: span.Resource, + ResponseTime: &modelpb.AggregatedDuration{ + // test code generates 1 count for 1 ns + Count: int64(count), + Sum: durationpb.New(time.Duration(count)), + }, + }, + }, + } +} + +func getTestGlobalLabelsStr(t *testing.T, s string) string { + t.Helper() + var gl GlobalLabels + gl.Labels = make(modelpb.Labels) + gl.Labels["test"] = &modelpb.LabelValue{Value: s} + gls, err := gl.MarshalString() + if err != nil { + t.Fatal(err) + } + return gls +} From 670b125637adf2d2eabef76445d6f3e66dccf252 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 27 Jul 2023 14:34:57 +0800 Subject: [PATCH 12/13] Add test for proto based histogram merger --- aggregators/merger.go | 1 - aggregators/merger_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/aggregators/merger.go b/aggregators/merger.go index caf424a..757d47f 100644 --- a/aggregators/merger.go +++ b/aggregators/merger.go @@ -401,7 +401,6 @@ func mergeSpanMetrics(to, from *aggregationpb.SpanMetrics) { to.Sum += from.Sum } -// TODO: Add tests for merge histograms func mergeHistogram(to, from *aggregationpb.HDRHistogram) { // Assume both histograms are created with same arguments m := make(map[int32]int64) diff --git a/aggregators/merger_test.go b/aggregators/merger_test.go index 3ef0534..1504ab4 100644 --- a/aggregators/merger_test.go +++ b/aggregators/merger_test.go @@ -5,6 +5,7 @@ package aggregators import ( + "math/rand" "reflect" "testing" "time" @@ -14,6 +15,7 @@ import ( "google.golang.org/protobuf/testing/protocmp" "github.com/elastic/apm-aggregation/aggregationpb" + "github.com/elastic/apm-aggregation/aggregators/internal/hdrhistogram" ) func TestMerge(t *testing.T) { @@ -995,3 +997,28 @@ func TestCardinalityEstimationOnSubKeyCollision(t *testing.T) { assert.Equal(t, uint64(2), cmm.metrics.OverflowServices.OverflowServiceTransaction.Estimator.Estimate()) assert.Equal(t, uint64(2), cmm.metrics.OverflowServices.OverflowSpan.Estimator.Estimate()) } + +func TestMergeHistogram(t *testing.T) { + // Test assumes histogram representation Merge is correct + hist1, hist2 := hdrhistogram.New(), hdrhistogram.New() + + for i := 0; i < 1_000_000; i++ { + v1, v2 := rand.Int63n(3_600_000_000), rand.Int63n(3_600_000_000) + c1, c2 := rand.Int63n(1_000), rand.Int63n(1_000) + hist1.RecordValues(v1, c1) + hist2.RecordValues(v2, c2) + } + + histproto1, histproto2 := HistogramToProto(hist1), HistogramToProto(hist2) + hist1.Merge(hist2) + mergeHistogram(histproto1, histproto2) + histActual := hdrhistogram.New() + HistogramFromProto(histActual, histproto1) + + assert.Empty(t, cmp.Diff( + hist1, + histActual, + cmp.AllowUnexported(hdrhistogram.HistogramRepresentation{}), + cmp.AllowUnexported(hdrhistogram.HybridCountsRep{}), + )) +} From e0387e1f65daadc0064b6d86df1eb0a16a7daa67 Mon Sep 17 00:00:00 2001 From: Vishal Raj Date: Thu, 27 Jul 2023 16:41:01 +0800 Subject: [PATCH 13/13] Enable test --- aggregators/aggregator_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go index 4aaffe9..cea30e8 100644 --- a/aggregators/aggregator_test.go +++ b/aggregators/aggregator_test.go @@ -689,7 +689,6 @@ func TestCombinedMetricsKeyOrderedByProjectID(t *testing.T) { } func TestHarvest(t *testing.T) { - t.Skip() cmCount := 5 ivls := []time.Duration{time.Second, 2 * time.Second, 4 * time.Second} m := make(map[time.Duration]map[[16]byte]bool)