From 49d0906ed6761b7aff021292f2598307d8e11134 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Tue, 13 Aug 2024 19:51:09 +0200
Subject: [PATCH 01/11] add converted failing test

A basic test to verify the converted bug in handling tags.
---
 aggregators/converter_test.go | 62 +++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go
index 4bc1cf4..c7b8837 100644
--- a/aggregators/converter_test.go
+++ b/aggregators/converter_test.go
@@ -883,3 +883,65 @@ func TestMarshalEventGlobalLabelsRace(t *testing.T) {
 	}
 	wg.Wait()
 }
+
+func TestUnmarshalBinary(t *testing.T) {
+	// prep work to get b and b2
+	e := &modelpb.APMEvent{
+		Labels: modelpb.Labels{
+			"tag4": &modelpb.LabelValue{
+				Value:  "",
+				Values: []string{"a", "b"},
+				Global: true,
+			},
+		},
+	}
+	b, err := marshalEventGlobalLabels(e)
+	require.NoError(t, err)
+	e2 := &modelpb.APMEvent{
+		Labels: modelpb.Labels{
+			"tag4": &modelpb.LabelValue{
+				Value:  "",
+				Values: []string{"c", "d"},
+				Global: true,
+			},
+		},
+	}
+	b2, err := marshalEventGlobalLabels(e2)
+	require.NoError(t, err)
+	// prep work done
+
+	// check gl
+	gl := globalLabels{}
+	err = gl.UnmarshalBinary(b)
+	require.NoError(t, err)
+	assert.Equal(t, modelpb.Labels{
+		"tag4": &modelpb.LabelValue{
+			Value:  "",
+			Values: []string{"a", "b"},
+			Global: true,
+		},
+	}, gl.Labels)
+
+	// check gl2
+	gl2 := globalLabels{}
+	err = gl2.UnmarshalBinary(b2)
+	require.NoError(t, err)
+	assert.Equal(t, modelpb.Labels{
+		"tag4": &modelpb.LabelValue{
+			Value:  "",
+			Values: []string{"c", "d"},
+			Global: true,
+		},
+	}, gl2.Labels)
+
+	// check gl again
+	// NOTE: this should NOT fail.
+	// It does because of a bug.
+	assert.Equal(t, modelpb.Labels{
+		"tag4": &modelpb.LabelValue{
+			Value:  "",
+			Values: []string{"a", "b"},
+			Global: true,
+		},
+	}, gl.Labels)
+}

From 6c0a6abfbaa6b6c34a2ea896620c0a5332158389 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Tue, 13 Aug 2024 22:25:12 +0200
Subject: [PATCH 02/11] refactor TestAggregateAndHarvest

Refactor to allow running the same test logic with different inputs and
expected results.
---
 aggregators/aggregator_test.go | 300 +++++++++++++++++----------------
 1 file changed, 155 insertions(+), 145 deletions(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index 686bbb4..80b7755 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1075,163 +1075,173 @@ func TestHarvest(t *testing.T) {
 
 func TestAggregateAndHarvest(t *testing.T) {
 	txnDuration := 100 * time.Millisecond
-	batch := modelpb.Batch{
-		{
-			Event: &modelpb.Event{
-				Outcome:  "success",
-				Duration: uint64(txnDuration),
-			},
-			Transaction: &modelpb.Transaction{
-				Name:                "foo",
-				Type:                "txtype",
-				RepresentativeCount: 1,
-			},
-			Service: &modelpb.Service{Name: "svc"},
-			Labels: modelpb.Labels{
-				"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
-				"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
-				"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
-				"mylabel":         &modelpb.LabelValue{Global: false, Value: "myvalue"},
-			},
-			NumericLabels: modelpb.NumericLabels{
-				"user_id":        &modelpb.NumericLabelValue{Global: true, Value: 100},
-				"cost_center":    &modelpb.NumericLabelValue{Global: true, Value: 10},
-				"mynumericlabel": &modelpb.NumericLabelValue{Global: false, Value: 1},
-			},
-		},
+
+	runTest := func(t *testing.T, batch modelpb.Batch, expected []*modelpb.APMEvent) {
+		var events []*modelpb.APMEvent
+		agg, err := New(
+			WithDataDir(t.TempDir()),
+			WithLimits(Limits{
+				MaxSpanGroups:                         1000,
+				MaxSpanGroupsPerService:               100,
+				MaxTransactionGroups:                  100,
+				MaxTransactionGroupsPerService:        10,
+				MaxServiceTransactionGroups:           100,
+				MaxServiceTransactionGroupsPerService: 10,
+				MaxServices:                           10,
+			}),
+			WithProcessor(sliceProcessor(&events)),
+			WithAggregationIntervals([]time.Duration{time.Second}),
+		)
+		require.NoError(t, err)
+		require.NoError(t, agg.AggregateBatch(
+			context.Background(),
+			EncodeToCombinedMetricsKeyID(t, "ab01"),
+			&batch,
+		))
+		require.NoError(t, agg.Close(context.Background()))
+
+		assert.Empty(t, cmp.Diff(
+			expected,
+			events,
+			cmpopts.IgnoreTypes(netip.Addr{}),
+			cmpopts.SortSlices(func(a, b *modelpb.APMEvent) bool {
+				return a.Metricset.Name < b.Metricset.Name
+			}),
+			protocmp.Transform(),
+			protocmp.IgnoreFields(&modelpb.Event{}, "received"),
+		))
 	}
-	var events []*modelpb.APMEvent
-	agg, err := New(
-		WithDataDir(t.TempDir()),
-		WithLimits(Limits{
-			MaxSpanGroups:                         1000,
-			MaxSpanGroupsPerService:               100,
-			MaxTransactionGroups:                  100,
-			MaxTransactionGroupsPerService:        10,
-			MaxServiceTransactionGroups:           100,
-			MaxServiceTransactionGroupsPerService: 10,
-			MaxServices:                           10,
-		}),
-		WithProcessor(sliceProcessor(&events)),
-		WithAggregationIntervals([]time.Duration{time.Second}),
-	)
-	require.NoError(t, err)
-	require.NoError(t, agg.AggregateBatch(
-		context.Background(),
-		EncodeToCombinedMetricsKeyID(t, "ab01"),
-		&batch,
-	))
-	require.NoError(t, agg.Close(context.Background()))
 
-	expected := []*modelpb.APMEvent{
-		{
-			Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
-			Event: &modelpb.Event{
-				SuccessCount: &modelpb.SummaryMetric{
-					Count: 1,
-					Sum:   1,
+	t.Run("success case", func(t *testing.T) {
+		input := modelpb.Batch{
+			{
+				Event: &modelpb.Event{
+					Outcome:  "success",
+					Duration: uint64(txnDuration),
 				},
-				Outcome: "success",
-			},
-			Transaction: &modelpb.Transaction{
-				Name: "foo",
-				Type: "txtype",
-				Root: true,
-				DurationSummary: &modelpb.SummaryMetric{
-					Count: 1,
-					Sum:   100351, // Estimate from histogram
+				Transaction: &modelpb.Transaction{
+					Name:                "foo",
+					Type:                "txtype",
+					RepresentativeCount: 1,
 				},
-				DurationHistogram: &modelpb.Histogram{
-					Values: []float64{100351},
-					Counts: []uint64{1},
+				Service: &modelpb.Service{Name: "svc"},
+				Labels: modelpb.Labels{
+					"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
+					"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
+					"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
+					"mylabel":         &modelpb.LabelValue{Global: false, Value: "myvalue"},
 				},
-			},
-			Service: &modelpb.Service{
-				Name: "svc",
-			},
-			Labels: modelpb.Labels{
-				"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
-				"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
-				"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
-			},
-			NumericLabels: modelpb.NumericLabels{
-				"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
-				"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
-			},
-			Metricset: &modelpb.Metricset{
-				Name:     "transaction",
-				DocCount: 1,
-				Interval: "1s",
-			},
-		},
-		{
-			Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
-			Event:     &modelpb.Event{},
-			Service: &modelpb.Service{
-				Name: "svc",
-			},
-			Labels: modelpb.Labels{
-				"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
-				"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
-				"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
-			},
-			NumericLabels: modelpb.NumericLabels{
-				"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
-				"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
-			},
-			Metricset: &modelpb.Metricset{
-				Name:     "service_summary",
-				Interval: "1s",
-			},
-		},
-		{
-			Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
-			Event: &modelpb.Event{
-				SuccessCount: &modelpb.SummaryMetric{
-					Count: 1,
-					Sum:   1,
+				NumericLabels: modelpb.NumericLabels{
+					"user_id":        &modelpb.NumericLabelValue{Global: true, Value: 100},
+					"cost_center":    &modelpb.NumericLabelValue{Global: true, Value: 10},
+					"mynumericlabel": &modelpb.NumericLabelValue{Global: false, Value: 1},
 				},
 			},
-			Transaction: &modelpb.Transaction{
-				Type: "txtype",
-				DurationSummary: &modelpb.SummaryMetric{
-					Count: 1,
-					Sum:   100351, // Estimate from histogram
+		}
+
+		expected := []*modelpb.APMEvent{
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+					Outcome: "success",
 				},
-				DurationHistogram: &modelpb.Histogram{
-					Values: []float64{100351},
-					Counts: []uint64{1},
+				Transaction: &modelpb.Transaction{
+					Name: "foo",
+					Type: "txtype",
+					Root: true,
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
+					"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
+					"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
+					"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "transaction",
+					DocCount: 1,
+					Interval: "1s",
 				},
 			},
-			Service: &modelpb.Service{
-				Name: "svc",
-			},
-			Labels: modelpb.Labels{
-				"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
-				"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
-				"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
-			},
-			NumericLabels: modelpb.NumericLabels{
-				"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
-				"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event:     &modelpb.Event{},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
+					"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
+					"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
+					"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_summary",
+					Interval: "1s",
+				},
 			},
-			Metricset: &modelpb.Metricset{
-				Name:     "service_transaction",
-				DocCount: 1,
-				Interval: "1s",
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+				},
+				Transaction: &modelpb.Transaction{
+					Type: "txtype",
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"department_name": &modelpb.LabelValue{Global: true, Value: "apm"},
+					"organization":    &modelpb.LabelValue{Global: true, Value: "observability"},
+					"company":         &modelpb.LabelValue{Global: true, Value: "elastic"},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"user_id":     &modelpb.NumericLabelValue{Global: true, Value: 100},
+					"cost_center": &modelpb.NumericLabelValue{Global: true, Value: 10},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_transaction",
+					DocCount: 1,
+					Interval: "1s",
+				},
 			},
-		},
-	}
-	assert.Empty(t, cmp.Diff(
-		expected,
-		events,
-		cmpopts.IgnoreTypes(netip.Addr{}),
-		cmpopts.SortSlices(func(a, b *modelpb.APMEvent) bool {
-			return a.Metricset.Name < b.Metricset.Name
-		}),
-		protocmp.Transform(),
-		protocmp.IgnoreFields(&modelpb.Event{}, "received"),
-	))
+		}
+
+		runTest(t, input, expected)
+	})
+
 }
 
 func TestHarvestOverflowCount(t *testing.T) {

From 57265b3f12fc0f9bfc0cf6e3cf12e489cc6bb448 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Tue, 13 Aug 2024 23:50:16 +0200
Subject: [PATCH 03/11] add global labels values test case

Add an e2e test for global labels values with multiple events.

A batch of 2 events in input with different labels Values but
same key should produce in output events with all different
Values added to the same key.
---
 aggregators/aggregator_test.go | 134 +++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index 80b7755..fa97983 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1242,6 +1242,140 @@ func TestAggregateAndHarvest(t *testing.T) {
 		runTest(t, input, expected)
 	})
 
+	t.Run("with multiple global labels values", func(t *testing.T) {
+		input := modelpb.Batch{
+			{
+				Event: &modelpb.Event{
+					Outcome:  "success",
+					Duration: uint64(txnDuration),
+				},
+				Transaction: &modelpb.Transaction{
+					Name:                "foo",
+					Type:                "txtype",
+					RepresentativeCount: 1,
+				},
+				Service: &modelpb.Service{Name: "svc"},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
+				},
+			},
+			{
+				Event: &modelpb.Event{
+					Outcome:  "success",
+					Duration: uint64(txnDuration),
+				},
+				Transaction: &modelpb.Transaction{
+					Name:                "foo",
+					Type:                "txtype",
+					RepresentativeCount: 1,
+				},
+				Service: &modelpb.Service{Name: "svc"},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"rty", "fgh"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{4, 5}},
+				},
+			},
+		}
+
+		expected := []*modelpb.APMEvent{
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+					Outcome: "success",
+				},
+				Transaction: &modelpb.Transaction{
+					Name: "foo",
+					Type: "txtype",
+					Root: true,
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "transaction",
+					DocCount: 1,
+					Interval: "1s",
+				},
+			},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event:     &modelpb.Event{},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_summary",
+					Interval: "1s",
+				},
+			},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+				},
+				Transaction: &modelpb.Transaction{
+					Type: "txtype",
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_transaction",
+					DocCount: 1,
+					Interval: "1s",
+				},
+			},
+		}
+
+		runTest(t, input, expected)
+	})
+
 }
 
 func TestHarvestOverflowCount(t *testing.T) {

From 3f629f5f74937b2822ebd2259a17fc5fd1fd61c1 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Tue, 13 Aug 2024 23:50:22 +0200
Subject: [PATCH 04/11] fix typos

Fix unrelated typos found while exploring the code.
---
 aggregators/config.go    | 2 +-
 aggregators/converter.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aggregators/config.go b/aggregators/config.go
index da719e4..2d0296b 100644
--- a/aggregators/config.go
+++ b/aggregators/config.go
@@ -83,7 +83,7 @@ func WithLimits(limits Limits) Option {
 // WithProcessor configures the processor for handling of the aggregated
 // metrics post harvest. Processor is called for each decoded combined
 // metrics after they are harvested. CombinedMetrics passed to the
-// processor is pooled and it is releasd back to the pool after processor
+// processor is pooled and it is released back to the pool after processor
 // has returned. If the processor mutates the CombinedMetrics such that it
 // can no longer access the pooled objects, then the Processor should
 // release the objects back to the pool.
diff --git a/aggregators/converter.go b/aggregators/converter.go
index 345e311..3a53838 100644
--- a/aggregators/converter.go
+++ b/aggregators/converter.go
@@ -368,7 +368,7 @@ func eventToCombinedMetrics(
 }
 
 // CombinedMetricsToBatch converts CombinedMetrics to a batch of APMEvents.
-// Events in the batch are popualted using vtproto's sync pool and should be
+// Events in the batch are populated using vtproto's sync pool and should be
 // released back to the pool using `APMEvent#ReturnToVTPool`.
 func CombinedMetricsToBatch(
 	cm *aggregationpb.CombinedMetrics,

From aa210a60d17e8bc3aa68d6b72c39de1fa91e2d95 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Wed, 14 Aug 2024 16:30:20 +0200
Subject: [PATCH 05/11] fix expected output

We only retain the latest tags in a batch of events.
---
 aggregators/aggregator_test.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index fa97983..15296b6 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1309,10 +1309,10 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Name: "svc",
 				},
 				Labels: modelpb.Labels{
-					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"rty", "fgh"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
-					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{4, 5}},
 				},
 				Metricset: &modelpb.Metricset{
 					Name:     "transaction",
@@ -1327,10 +1327,10 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Name: "svc",
 				},
 				Labels: modelpb.Labels{
-					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"rty", "fgh"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
-					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{4, 5}},
 				},
 				Metricset: &modelpb.Metricset{
 					Name:     "service_summary",
@@ -1360,10 +1360,10 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Name: "svc",
 				},
 				Labels: modelpb.Labels{
-					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe", "rty", "fgh"}},
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"rty", "fgh"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
-					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2, 4, 5}},
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{4, 5}},
 				},
 				Metricset: &modelpb.Metricset{
 					Name:     "service_transaction",

From 5e01ab244267b77830e8596465d2a8937a74c644 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Wed, 14 Aug 2024 18:28:41 +0200
Subject: [PATCH 06/11] enhance sorting logic

TestAggregateAndHarvest sort logic only used Metricset.Name.
The latest test added to it s flappy with such a logic, as we are
evaluating the same Metricset.Names in 2 different events with
global labels with equal key and different values.

Enhance the sorting logic to account for equal Metricset.Name and
leverage labels (keys, Value and Values) in the lessFn.
---
 aggregators/aggregator_test.go | 45 +++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index 15296b6..de1d6f1 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1105,7 +1105,50 @@ func TestAggregateAndHarvest(t *testing.T) {
 			events,
 			cmpopts.IgnoreTypes(netip.Addr{}),
 			cmpopts.SortSlices(func(a, b *modelpb.APMEvent) bool {
-				return a.Metricset.Name < b.Metricset.Name
+				// Sort by Metricset.Name, then by (sorted) labels.
+				// labels are sorted by key before comparing.
+				// labels keys are compared and when equal, Value are compared.
+				// If Value is equal, compare each element in Values.
+
+				// handle base case, we can sort by Metricset.Name
+				if a.Metricset.Name != b.Metricset.Name {
+					return a.Metricset.Name < b.Metricset.Name
+				}
+
+				// otherwise sort by sorted labels
+				akeys := make([]string, 0, len(a.Labels))
+				for k := range a.Labels {
+					akeys = append(akeys, k)
+				}
+				sort.Strings(akeys)
+
+				bkeys := make([]string, 0, len(b.Labels))
+				for k := range a.Labels {
+					bkeys = append(bkeys, k)
+				}
+				sort.Strings(bkeys)
+
+				for i := 0; i < len(akeys); i++ {
+					if akeys[i] != bkeys[i] {
+						return akeys[i] < bkeys[i]
+					}
+
+					akey := akeys[i]
+					if a.Labels[akey].Value != "" && a.Labels[akey].Value != b.Labels[akey].Value {
+						return a.Labels[akey].Value < b.Labels[akey].Value
+					}
+
+					bkey := bkeys[i]
+					for _, v := range a.Labels[akey].Values {
+						for _, w := range b.Labels[bkey].Values {
+							if v != w {
+								return v < w
+							}
+						}
+					}
+				}
+
+				return false
 			}),
 			protocmp.Transform(),
 			protocmp.IgnoreFields(&modelpb.Event{}, "received"),

From 93dbb9db690f44b669fdaf195b51367c1d5b54b5 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Wed, 14 Aug 2024 18:30:03 +0200
Subject: [PATCH 07/11] fix TestAggregateAndHarvest expectations

When 2 events in a batch have the same service name but
different tags we output 2 different sets of metrics,
one for each tag set. This was not considered in the
previous expected values.
---
 aggregators/aggregator_test.go | 88 ++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index de1d6f1..ded1305 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1363,6 +1363,43 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Interval: "1s",
 				},
 			},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+					Outcome: "success",
+				},
+				Transaction: &modelpb.Transaction{
+					Name: "foo",
+					Type: "txtype",
+					Root: true,
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "transaction",
+					DocCount: 1,
+					Interval: "1s",
+				},
+			},
 			{
 				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
 				Event:     &modelpb.Event{},
@@ -1380,6 +1417,23 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Interval: "1s",
 				},
 			},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event:     &modelpb.Event{},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_summary",
+					Interval: "1s",
+				},
+			},
 			{
 				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
 				Event: &modelpb.Event{
@@ -1414,6 +1468,40 @@ func TestAggregateAndHarvest(t *testing.T) {
 					Interval: "1s",
 				},
 			},
+			{
+				Timestamp: modelpb.FromTime(time.Unix(0, 0).UTC()),
+				Event: &modelpb.Event{
+					SuccessCount: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   1,
+					},
+				},
+				Transaction: &modelpb.Transaction{
+					Type: "txtype",
+					DurationSummary: &modelpb.SummaryMetric{
+						Count: 1,
+						Sum:   100351, // Estimate from histogram
+					},
+					DurationHistogram: &modelpb.Histogram{
+						Values: []float64{100351},
+						Counts: []uint64{1},
+					},
+				},
+				Service: &modelpb.Service{
+					Name: "svc",
+				},
+				Labels: modelpb.Labels{
+					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+				},
+				NumericLabels: modelpb.NumericLabels{
+					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
+				},
+				Metricset: &modelpb.Metricset{
+					Name:     "service_transaction",
+					DocCount: 1,
+					Interval: "1s",
+				},
+			},
 		}
 
 		runTest(t, input, expected)

From e2fcbec3970ed5bfbf3bf9a6dfc35fac2e66c3e0 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Wed, 14 Aug 2024 18:41:12 +0200
Subject: [PATCH 08/11] Revert "add converted failing test"

This reverts commit 49d0906ed6761b7aff021292f2598307d8e11134.

The same test is covered by TestMarshalEventGlobalLabelsRace,
which also test for a possible race condition.
---
 aggregators/converter_test.go | 62 -----------------------------------
 1 file changed, 62 deletions(-)

diff --git a/aggregators/converter_test.go b/aggregators/converter_test.go
index c7b8837..4bc1cf4 100644
--- a/aggregators/converter_test.go
+++ b/aggregators/converter_test.go
@@ -883,65 +883,3 @@ func TestMarshalEventGlobalLabelsRace(t *testing.T) {
 	}
 	wg.Wait()
 }
-
-func TestUnmarshalBinary(t *testing.T) {
-	// prep work to get b and b2
-	e := &modelpb.APMEvent{
-		Labels: modelpb.Labels{
-			"tag4": &modelpb.LabelValue{
-				Value:  "",
-				Values: []string{"a", "b"},
-				Global: true,
-			},
-		},
-	}
-	b, err := marshalEventGlobalLabels(e)
-	require.NoError(t, err)
-	e2 := &modelpb.APMEvent{
-		Labels: modelpb.Labels{
-			"tag4": &modelpb.LabelValue{
-				Value:  "",
-				Values: []string{"c", "d"},
-				Global: true,
-			},
-		},
-	}
-	b2, err := marshalEventGlobalLabels(e2)
-	require.NoError(t, err)
-	// prep work done
-
-	// check gl
-	gl := globalLabels{}
-	err = gl.UnmarshalBinary(b)
-	require.NoError(t, err)
-	assert.Equal(t, modelpb.Labels{
-		"tag4": &modelpb.LabelValue{
-			Value:  "",
-			Values: []string{"a", "b"},
-			Global: true,
-		},
-	}, gl.Labels)
-
-	// check gl2
-	gl2 := globalLabels{}
-	err = gl2.UnmarshalBinary(b2)
-	require.NoError(t, err)
-	assert.Equal(t, modelpb.Labels{
-		"tag4": &modelpb.LabelValue{
-			Value:  "",
-			Values: []string{"c", "d"},
-			Global: true,
-		},
-	}, gl2.Labels)
-
-	// check gl again
-	// NOTE: this should NOT fail.
-	// It does because of a bug.
-	assert.Equal(t, modelpb.Labels{
-		"tag4": &modelpb.LabelValue{
-			Value:  "",
-			Values: []string{"a", "b"},
-			Global: true,
-		},
-	}, gl.Labels)
-}

From 86f24d93b56cc7d4073306d7000b95b3e95a1aca Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Mon, 19 Aug 2024 20:10:00 +0200
Subject: [PATCH 09/11] fix bug

There was a typo, this loop should have iterated over b.Labels
but was iterating over a.Labels
---
 aggregators/aggregator_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index ded1305..bf26223 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1123,7 +1123,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 				sort.Strings(akeys)
 
 				bkeys := make([]string, 0, len(b.Labels))
-				for k := range a.Labels {
+				for k := range b.Labels {
 					bkeys = append(bkeys, k)
 				}
 				sort.Strings(bkeys)

From 3b3a56b20089800810af2cfe58d0a6c709f0ee52 Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Mon, 19 Aug 2024 20:12:00 +0200
Subject: [PATCH 10/11] guard for b labels length

The previous implementation could panic if len(b) > len(a). Add
a guard clause that limit iterating over label based on len(b).

If len(a.Labels) > len(b.Labels), everything else equal we
conclude that a > b.
---
 aggregators/aggregator_test.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index bf26223..caa7777 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1128,7 +1128,13 @@ func TestAggregateAndHarvest(t *testing.T) {
 				}
 				sort.Strings(bkeys)
 
-				for i := 0; i < len(akeys); i++ {
+				// guard for b labels being shorter than a labels
+				enough := len(akeys)
+				if len(bkeys) < enough {
+					enough = len(bkeys)
+				}
+
+				for i := 0; i < enough; i++ {
 					if akeys[i] != bkeys[i] {
 						return akeys[i] < bkeys[i]
 					}

From 396c6e5e6f19be347fec418b84d59531ee7e352e Mon Sep 17 00:00:00 2001
From: Edoardo Tenani <edoardo.tenani@elastic.co>
Date: Tue, 20 Aug 2024 10:49:06 +0200
Subject: [PATCH 11/11] simplify sort

Leverage a string comparison as a sort function, as
used in https://github.com/elastic/apm-data/blob/81d77648a3b17d4e52859110e233e40683fcdfe3/input/otlp/metrics_test.go#L1146-L1148

Adds an additional tag to the test case to ensure
expected results.
---
 aggregators/aggregator_test.go | 55 ++++------------------------------
 1 file changed, 5 insertions(+), 50 deletions(-)

diff --git a/aggregators/aggregator_test.go b/aggregators/aggregator_test.go
index caa7777..bd49644 100644
--- a/aggregators/aggregator_test.go
+++ b/aggregators/aggregator_test.go
@@ -1105,56 +1105,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 			events,
 			cmpopts.IgnoreTypes(netip.Addr{}),
 			cmpopts.SortSlices(func(a, b *modelpb.APMEvent) bool {
-				// Sort by Metricset.Name, then by (sorted) labels.
-				// labels are sorted by key before comparing.
-				// labels keys are compared and when equal, Value are compared.
-				// If Value is equal, compare each element in Values.
-
-				// handle base case, we can sort by Metricset.Name
-				if a.Metricset.Name != b.Metricset.Name {
-					return a.Metricset.Name < b.Metricset.Name
-				}
-
-				// otherwise sort by sorted labels
-				akeys := make([]string, 0, len(a.Labels))
-				for k := range a.Labels {
-					akeys = append(akeys, k)
-				}
-				sort.Strings(akeys)
-
-				bkeys := make([]string, 0, len(b.Labels))
-				for k := range b.Labels {
-					bkeys = append(bkeys, k)
-				}
-				sort.Strings(bkeys)
-
-				// guard for b labels being shorter than a labels
-				enough := len(akeys)
-				if len(bkeys) < enough {
-					enough = len(bkeys)
-				}
-
-				for i := 0; i < enough; i++ {
-					if akeys[i] != bkeys[i] {
-						return akeys[i] < bkeys[i]
-					}
-
-					akey := akeys[i]
-					if a.Labels[akey].Value != "" && a.Labels[akey].Value != b.Labels[akey].Value {
-						return a.Labels[akey].Value < b.Labels[akey].Value
-					}
-
-					bkey := bkeys[i]
-					for _, v := range a.Labels[akey].Values {
-						for _, w := range b.Labels[bkey].Values {
-							if v != w {
-								return v < w
-							}
-						}
-					}
-				}
-
-				return false
+				return strings.Compare(a.String(), b.String()) == -1
 			}),
 			protocmp.Transform(),
 			protocmp.IgnoreFields(&modelpb.Event{}, "received"),
@@ -1306,6 +1257,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 				Service: &modelpb.Service{Name: "svc"},
 				Labels: modelpb.Labels{
 					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+					"tag": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
 					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
@@ -1396,6 +1348,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 				},
 				Labels: modelpb.Labels{
 					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+					"tag": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
 					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
@@ -1431,6 +1384,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 				},
 				Labels: modelpb.Labels{
 					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+					"tag": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
 					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},
@@ -1498,6 +1452,7 @@ func TestAggregateAndHarvest(t *testing.T) {
 				},
 				Labels: modelpb.Labels{
 					"baz": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
+					"tag": &modelpb.LabelValue{Global: true, Values: []string{"asd", "qwe"}},
 				},
 				NumericLabels: modelpb.NumericLabels{
 					"bar": &modelpb.NumericLabelValue{Global: true, Values: []float64{1, 2}},