elastic · carsonip · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023
@@ -6,6 +6,7 @@ package aggregators
 
 import (
 	"io"
+	"sort"
 
 	"github.com/axiomhq/hyperloglog"
 	"github.com/cespare/xxhash/v2"
@@ -384,6 +385,7 @@ func mergeSpanMetrics(to, from *aggregationpb.SpanMetrics) {
 // mergeHistogram merges two proto representation of HDRHistogram. The
 // merge assumes both histograms are created with same arguments and
 // their representations are sorted by bucket.
+// Caution: this function may mutate from.Count.
 func mergeHistogram(to, from *aggregationpb.HDRHistogram) {
 	if len(from.Buckets) == 0 {
 		return
@@ -395,27 +397,49 @@ func mergeHistogram(to, from *aggregationpb.HDRHistogram) {
 		return
 	}
 
-	requiredLen := len(to.Buckets) + len(from.Buckets)
-	for toIdx, fromIdx := 0, 0; toIdx < len(to.Buckets) && fromIdx < len(from.Buckets); {
-		v := to.Buckets[toIdx] - from.Buckets[fromIdx]
-		switch {
-		case v == 0:
-			// For every bucket that is common, we need one less bucket in final slice
-			requiredLen--
-			toIdx++
-			fromIdx++
-		case v < 0:
-			toIdx++
-		case v > 0:
-			fromIdx++
+	var extra int
+	toLen, fromLen := len(to.Buckets), len(from.Buckets)
+	if fromLen < toLen { // Heuristics to trade between O(m lg n) and O(n + m).
+		// Fast path to optimize for cases where len(from.Buckets) << len(to.Buckets)
+		// Binary search for all from.Buckets in to.Buckets for fewer comparisons,
+		// mergeHistogram will be O(m lg n) where m = fromLen and n = toLen.
+		for fromIdx := 0; fromIdx < fromLen; fromIdx++ {
+			toIdx, found := sort.Find(toLen, func(toIdx int) int {
+				return int(from.Buckets[fromIdx] - to.Buckets[toIdx])
+			})
+			if found {
+				to.Counts[toIdx] += from.Counts[fromIdx]
+				from.Counts[fromIdx] = 0
+			} else {
+				extra++
+			}
+		}
-		for fromIdx := 0; fromIdx < fromLen; fromIdx++ {
-			toIdx, found := sort.Find(toLen, func(toIdx int) int {
-				return int(from.Buckets[fromIdx] - to.Buckets[toIdx])
-			})
-			if found {
-				to.Counts[toIdx] += from.Counts[fromIdx]
-				from.Counts[fromIdx] = 0
-			} else {
-				extra++
-			}
-		}
+		findIn := to
+		for fromIdx := 0; fromIdx < fromLen; fromIdx++ {
+			target := from.Buckets[fromIdx]
+			toIdx, found := sort.Find(len(findIn), func(toIdx int) int {
+				return int(target - findIn.Buckets[toIdx])
+			})
+			if toIdx == len(findIn) {
+				// there is no bucket in `findIn` which is less than target
+				// so there cant be any bucket in `findIn` which is more than target
+				break
+			}
+			if found {
+				findIn.Counts[toIdx] += from.Counts[fromIdx]
+				from.Counts[fromIdx] = 0
+			} else {
+				extra++
+			}
+			findIn = findIn[toIdx:] // next target will definitely be greater than the current one
+		}
-		for fromIdx := 0; fromIdx < fromLen; fromIdx++ {
-			toIdx, found := sort.Find(toLen, func(toIdx int) int {
-				return int(from.Buckets[fromIdx] - to.Buckets[toIdx])
-			})
-			if found {
-				to.Counts[toIdx] += from.Counts[fromIdx]
-				from.Counts[fromIdx] = 0
-			} else {
-				extra++
-			}
-		}
+		findIn := to
+		for fromIdx := 0; fromIdx < fromLen; fromIdx++ {
+			target := from.Buckets[fromIdx]
+			toIdx, found := sort.Find(len(findIn), func(toIdx int) int {
+				return int(target - findIn.Buckets[toIdx])
+			})
+			if toIdx == len(findIn) {
+				// there is no bucket in `findIn` which is less than target
+				// so there cant be any bucket in `findIn` which is more than target
+				break
+			}
+			if found {
+				findIn.Counts[toIdx] += from.Counts[fromIdx]
+				from.Counts[fromIdx] = 0
+			} else {
+				extra++
+			}
+			findIn = findIn[toIdx:] // next target will definitely be greater than the current one
+		}
+		if extra == 0 {
+			return
+		}
+	} else {
+		// Slow path with runtime O(n + m).
+		requiredLen := toLen + fromLen
+		for toIdx, fromIdx := 0, 0; toIdx < toLen && fromIdx < fromLen; {
+			v := to.Buckets[toIdx] - from.Buckets[fromIdx]
+			switch {
+			case v == 0:
+				// For every bucket that is common, we need one less bucket in final slice
+				requiredLen--
+				toIdx++
+				fromIdx++
+			case v < 0:
+				toIdx++
+			case v > 0:
+				fromIdx++
+			}
 		}
+		extra = requiredLen - toLen
 	}
 
-	toIdx, fromIdx := len(to.Buckets)-1, len(from.Buckets)-1
-	to.Buckets = slices.Grow(to.Buckets, requiredLen-len(to.Buckets))
-	to.Counts = slices.Grow(to.Counts, requiredLen-len(to.Counts))
-	to.Buckets = to.Buckets[:requiredLen]
-	to.Counts = to.Counts[:requiredLen]
+	toIdx, fromIdx := toLen-1, fromLen-1
+	to.Buckets = slices.Grow(to.Buckets, extra)[:toLen+extra]
+	to.Counts = slices.Grow(to.Counts, extra)[:toLen+extra]
 	for idx := len(to.Buckets) - 1; idx >= 0; idx-- {
 		if fromIdx < 0 {
 			break