open-telemetry · edma2 · Jul 10, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 22, 2024
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: deltatocumulativeprocessor
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: cap the number of exponential histogram buckets to 160
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [33277]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: []
@@ -27,7 +27,7 @@ processors:
     deltatocumulative:
         # how long until a series not receiving new samples is removed
         [ max_stale: <duration> | default = 5m ]
- 
+
         # upper limit of streams to track. new streams exceeding this limit
         # will be dropped
         [ max_streams: <int> | default = 0 (off) ]
@@ -39,4 +39,4 @@ There is no further configuration required. All delta samples are converted to c
 ## Troubleshooting
 
 When [Telemetry is
-enabled](https://opentelemetry.io/docs/collector/configuration/#telemetry), this component exports [several metrics](./documentation.md). 
+enabled](https://opentelemetry.io/docs/collector/configuration/#telemetry), this component exports [several metrics](./documentation.md).
@@ -65,25 +65,95 @@ func (dp Histogram) Add(in Histogram) Histogram {
 	return dp
 }
 
+type bounds struct {
+	lower int32
+	upper int32
+}
+
+// with is an accessory for Merge() to calculate ideal combined scale.
+func (b bounds) with(o bounds) bounds {
+	if o.empty() {
+		return b
+	}
+	if b.empty() {
+		return o
+	}
+	return bounds{
+		lower: min(b.lower, o.lower),
+		upper: max(b.upper, o.upper),
+	}
+}
+
+// empty indicates whether there are any values in a bounds.
+func (b bounds) empty() bool {
+	return b == bounds{}
+}
+
+// boundsAtScale is an accessory for Add() to calculate ideal combined scale.
+func (dp ExpHistogram) boundsAtScale(b expo.Buckets, scale int32) bounds {
+	if b.BucketCounts().Len() == 0 {
+		return bounds{}
+	}
+	shift := dp.Scale() - scale
+	a := expo.Abs(b)
+	return bounds{
+		lower: int32(a.Lower()) >> shift,
+		upper: int32(a.Upper()) >> shift,
+	}
+}
+
+// downscaleNeeded computes how much downscaling is needed by shifting the
+// upper and lower bounds until they are separated by no more than size.
+func downscaleNeeded(b bounds, size int) int32 {
+	var change int32
+
+	for b.upper-b.lower > int32(size) {
+		b.upper >>= 1
+		b.lower >>= 1
-		b.upper >>= 1
-		b.lower >>= 1
+		b.upper /= 2
+		b.lower /= 2
-		b.upper >>= 1
-		b.lower >>= 1
+		b.upper /= 2
+		b.lower /= 2
+		change++
+	}
+	return change
+}
+
 func (dp ExpHistogram) Add(in ExpHistogram) ExpHistogram {
 	type H = ExpHistogram
 
-	if dp.Scale() != in.Scale() {
-		hi, lo := expo.HiLo(dp, in, H.Scale)
-		from, to := expo.Scale(hi.Scale()), expo.Scale(lo.Scale())
-		expo.Downscale(hi.Positive(), from, to)
-		expo.Downscale(hi.Negative(), from, to)
-		hi.SetScale(lo.Scale())
-	}
+	minScale := min(dp.Scale(), in.Scale())
+
+	// logic is adapted from lightstep's algorithm for enforcing max buckets:
+	// https://github.com/lightstep/go-expohisto/blob/4375bf4ef2858552204edb8b4572330c94a4a755/structure/exponential.go#L542
+	// first, calculate the highest and lowest indices for each bucket, given the candidate min scale.
+	// then, calculate how much downscaling is needed to fit the merged range within max bucket count.
+	// finally, perform the actual downscaling.
+	posBounds := dp.boundsAtScale(dp.Positive(), minScale)
+	posBounds = posBounds.with(in.boundsAtScale(in.Positive(), minScale))
+
+	negBounds := dp.boundsAtScale(dp.Negative(), minScale)
+	negBounds = negBounds.with(in.boundsAtScale(in.Negative(), minScale))
+
+	minScale = min(
+		minScale-downscaleNeeded(posBounds, dp.MaxSize),
+		minScale-downscaleNeeded(negBounds, dp.MaxSize),
+	)
+
+	from, to := expo.Scale(dp.Scale()), expo.Scale(minScale)
+	expo.Downscale(dp.Positive(), from, to)
+	expo.Downscale(dp.Negative(), from, to)
+	dp.SetScale(minScale)
+
+	from = expo.Scale(in.Scale())
+	expo.Downscale(in.Positive(), from, to)
+	expo.Downscale(in.Negative(), from, to)
+	in.SetScale(minScale)
+
+	expo.Merge(dp.Positive(), in.Positive())
+	expo.Merge(dp.Negative(), in.Negative())
 
 	if dp.ZeroThreshold() != in.ZeroThreshold() {
 		hi, lo := expo.HiLo(dp, in, H.ZeroThreshold)
 		expo.WidenZero(lo.DataPoint, hi.ZeroThreshold())
 	}
 
-	expo.Merge(dp.Positive(), in.Positive())
-	expo.Merge(dp.Negative(), in.Negative())
-
 	dp.SetTimestamp(in.Timestamp())
 	dp.SetCount(dp.Count() + in.Count())
 	dp.SetZeroCount(dp.ZeroCount() + in.ZeroCount())

@@ -80,10 +80,11 @@ func (dp Histogram) CopyTo(dst Histogram) {
 
 type ExpHistogram struct {
 	expo.DataPoint
+	MaxSize int
 }
 
 func (dp ExpHistogram) Clone() ExpHistogram {
-	clone := ExpHistogram{DataPoint: pmetric.NewExponentialHistogramDataPoint()}
+	clone := ExpHistogram{DataPoint: pmetric.NewExponentialHistogramDataPoint(), MaxSize: dp.MaxSize}
 	if dp.DataPoint != (expo.DataPoint{}) {
 		dp.CopyTo(clone)
 	}

@@ -103,13 +103,5 @@ func Collapse(bs Buckets) {
 			counts.SetAt(i, counts.At(k)+counts.At(k+1))
 		}
 	}
-
-	// zero the excess area. its not needed to represent the observation
-	// anymore, but kept for two reasons:
-	// 1. future observations may need it, no need to re-alloc then if kept
-	// 2. [pcommon.Uint64Slice] can not, in fact, be sliced, so getting rid
-	//    of it would alloc ¯\_(ツ)_/¯
-	for i := size; i < counts.Len(); i++ {
-		counts.SetAt(i, 0)
-	}
+	counts.FromRaw(counts.AsRaw()[:size])
 }
@@ -23,10 +23,11 @@ func TestExpoAdd(t *testing.T) {
 	var obs0 = expotest.Observe0
 
 	cases := []struct {
-		name   string
-		dp, in expdp
-		want   expdp
-		flip   bool
+		name    string
+		dp, in  expdp
+		want    expdp
+		flip    bool
+		maxSize int
 	}{{
 		name: "noop",
 		dp:   expdp{PosNeg: bins{0, 0, 0, 0, 0, 0, 0, 0}.Into(), Count: 0},
@@ -37,6 +38,30 @@ func TestExpoAdd(t *testing.T) {
 		dp:   expdp{PosNeg: bins{0, 0, 0, 0, 0, 0, 0, 0}.Into(), Count: 0},
 		in:   expdp{PosNeg: bins{1, 2, 3, 4, 5, 6, 7, 8}.Into(), Count: 2 * (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8)},
 		want: expdp{PosNeg: bins{1, 2, 3, 4, 5, 6, 7, 8}.Into(), Count: 2 * (0 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8))},
+	}, {}, {
+		name:    "maxsize/1",
+		dp:      expdp{PosNeg: bins{0, 0, 0, ø}.Into(), Count: 0},
+		in:      expdp{PosNeg: bins{ø, ø, ø, ø, 1, 2, 3, 4}.Into(), Count: 2 * (1 + 2 + 3 + 4)},
+		want:    expdp{PosNeg: bins{ø, ø, 0, 10, ø}.Into(), Scale: -3, Count: 2 * (0 + (1 + 2 + 3 + 4))},
+		maxSize: 1,
+	}, {}, {
+		name:    "maxsize/2",
+		dp:      expdp{PosNeg: bins{0, 0, 0, ø}.Into(), Count: 0},
+		in:      expdp{PosNeg: bins{ø, ø, ø, ø, 1, 2, 3, 4}.Into(), Count: 2 * (1 + 2 + 3 + 4)},
+		want:    expdp{PosNeg: bins{ø, ø, 0, 6, 4, ø}.Into(), Scale: -2, Count: 2 * (0 + (1 + 2 + 3 + 4))},
+		maxSize: 2,
+	}, {
+		name:    "maxsize/4",
+		dp:      expdp{PosNeg: bins{0, 0, 0, ø}.Into(), Count: 0},
+		in:      expdp{PosNeg: bins{ø, ø, ø, ø, 1, 2, 3, 4}.Into(), Count: 2 * (1 + 2 + 3 + 4)},
+		want:    expdp{PosNeg: bins{ø, 0, 0, 1, 5, 4, ø}.Into(), Scale: -1, Count: 2 * (0 + (1 + 2 + 3 + 4))},
+		maxSize: 4,
+	}, {
+		name:    "maxsize/8",
+		dp:      expdp{PosNeg: bins{0, 0, 0, ø}.Into(), Count: 0},
+		in:      expdp{PosNeg: bins{ø, ø, ø, ø, 1, 2, 3, 4}.Into(), Count: 2 * (1 + 2 + 3 + 4)},
+		want:    expdp{PosNeg: bins{0, 0, 0, 0, 1, 2, 3, 4}.Into(), Scale: 0, Count: 2 * (0 + (1 + 2 + 3 + 4))},
+		maxSize: 8,
 	}, {
 		name: "lower+shorter",
 		dp:   expdp{PosNeg: bins{ø, ø, ø, ø, ø, 1, 1, 1}.Into(), Count: 2 * 3},
@@ -85,7 +110,6 @@ func TestExpoAdd(t *testing.T) {
 			bs := pmetric.NewExponentialHistogramDataPointBuckets()
 			expotest.ObserveInto(bs, expo.Scale(0), 1, 2, 3, 4)
 			expotest.ObserveInto(bs, expo.Scale(0), 4, 3, 2, 1)
-			bs.BucketCounts().Append([]uint64{0, 0}...) // rescaling leaves zeroed memory. this is expected
 			return bs
 		}()},
 	}}
@@ -95,10 +119,15 @@ func TestExpoAdd(t *testing.T) {
 			return func(t *testing.T) {
 				is := datatest.New(t)
 
+				maxSize := 160
+				if cs.maxSize > 0 {
+					maxSize = cs.maxSize
+				}
+
 				var (
-					dp   = ExpHistogram{dp.Into()}
-					in   = ExpHistogram{in.Into()}
-					want = ExpHistogram{cs.want.Into()}
+					dp   = ExpHistogram{dp.Into(), maxSize}
+					in   = ExpHistogram{in.Into(), maxSize}
+					want = ExpHistogram{cs.want.Into(), maxSize}
 				)
 
 				dp.SetTimestamp(0)
@@ -107,6 +136,7 @@ func TestExpoAdd(t *testing.T) {
 
 				got := dp.Add(in)
 				is.Equal(want.DataPoint, got.DataPoint)
+				is.Equalf(want.MaxSize, got.MaxSize, "MaxSize")
 			}
 		}
 

@@ -72,9 +72,11 @@ func (s Histogram) SetAggregationTemporality(at pmetric.AggregationTemporality)
 
 type ExpHistogram Metric
 
+const expHistogramMaxSize = 160
+
 func (s ExpHistogram) At(i int) data.ExpHistogram {
-	dp := Metric(s).ExponentialHistogram().DataPoints().At(i)
-	return data.ExpHistogram{DataPoint: dp}
+	dp := s.Metric.ExponentialHistogram().DataPoints().At(i)
+	return data.ExpHistogram{DataPoint: dp, MaxSize: expHistogramMaxSize}
 }
 
 func (s ExpHistogram) Len() int {
@@ -87,7 +89,7 @@ func (s ExpHistogram) Ident() Ident {
 
 func (s ExpHistogram) Filter(expr func(data.ExpHistogram) bool) {
 	s.ExponentialHistogram().DataPoints().RemoveIf(func(dp pmetric.ExponentialHistogramDataPoint) bool {
-		return !expr(data.ExpHistogram{DataPoint: dp})
+		return !expr(data.ExpHistogram{DataPoint: dp, MaxSize: expHistogramMaxSize})
 	})
 }