zyedidia · JoeKar · Feb 9, 2025 · Dec 13, 2024 · dmaluka · Jan 25, 2025
diff --git a/internal/buffer/loc.go b/internal/buffer/loc.go
@@ -47,6 +47,16 @@ func (l Loc) LessEqual(b Loc) bool {
 	return l == b
 }
 
+// Clamp clamps a loc between start and end
+func (l Loc) Clamp(start, end Loc) Loc {
+	if l.GreaterEqual(end) {
+		return end
+	} else if l.LessThan(start) {
+		return start
+	}
+	return l
+}
+
 // The following functions require a buffer to know where newlines are
 
 // Diff returns the distance between two locations
@@ -139,10 +149,5 @@ func ByteOffset(pos Loc, buf *Buffer) int {
 
 // clamps a loc within a buffer
 func clamp(pos Loc, la *LineArray) Loc {
-	if pos.GreaterEqual(la.End()) {
-		return la.End()
-	} else if pos.LessThan(la.Start()) {
-		return la.Start()
-	}
-	return pos
+	return pos.Clamp(la.Start(), la.End())
 }
diff --git a/internal/buffer/search.go b/internal/buffer/search.go
@@ -2,10 +2,56 @@ package buffer
 
 import (
 	"regexp"
+	"unicode/utf8"
 
 	"github.com/zyedidia/micro/v2/internal/util"
 )
 
+// We want "^" and "$" to match only the beginning/end of a line, not the
+// beginning/end of the search region if it is in the middle of a line.
+// In that case we use padded regexps to require a rune before or after
+// the match. (This also affects other empty-string patters like "\\b".)
+// The following two flags indicate the padding used.
+const (
+	padStart = 1 << iota
+	padEnd
+)
+
+func findLineParams(b *Buffer, start, end Loc, i int, r *regexp.Regexp) ([]byte, int, int, *regexp.Regexp) {
+	l := b.LineBytes(i)
+	charpos := 0
+	padMode := 0
+
+	if i == end.Y {
+		nchars := util.CharacterCount(l)
+		end.X = util.Clamp(end.X, 0, nchars)
+		if end.X < nchars {
+			l = util.SliceStart(l, end.X+1)
+			padMode |= padEnd
+		}
+	}
+
+	if i == start.Y {
+		nchars := util.CharacterCount(l)
+		start.X = util.Clamp(start.X, 0, nchars)
+		if start.X > 0 {
+			charpos = start.X - 1
+			l = util.SliceEnd(l, charpos)
+			padMode |= padStart
+		}
+	}
+
+	if padMode == padStart {
+		r = regexp.MustCompile(".(?:" + r.String() + ")")
+	} else if padMode == padEnd {
+		r = regexp.MustCompile("(?:" + r.String() + ").")
+	} else if padMode == padStart|padEnd {
+		r = regexp.MustCompile(".(?:" + r.String() + ").")
+	}
+
+	return l, charpos, padMode, r
+}
+
 func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
 	lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1))
 	if start.Y > b.LinesNum()-1 {
@@ -22,30 +68,19 @@ func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
 	}
 
 	for i := start.Y; i <= end.Y; i++ {
-		l := b.LineBytes(i)
-		charpos := 0
-
-		if i == start.Y && start.Y == end.Y {
-			nchars := util.CharacterCount(l)
-			start.X = util.Clamp(start.X, 0, nchars)
-			end.X = util.Clamp(end.X, 0, nchars)
-			l = util.SliceStart(l, end.X)
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == start.Y {
-			nchars := util.CharacterCount(l)
-			start.X = util.Clamp(start.X, 0, nchars)
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == end.Y {
-			nchars := util.CharacterCount(l)
-			end.X = util.Clamp(end.X, 0, nchars)
-			l = util.SliceStart(l, end.X)
-		}
+		l, charpos, padMode, rPadded := findLineParams(b, start, end, i, r)
 
-		match := r.FindIndex(l)
+		match := rPadded.FindIndex(l)
 
 		if match != nil {
+			if padMode&padStart != 0 {
+				_, size := utf8.DecodeRune(l[match[0]:])
+				match[0] += size
+			}
+			if padMode&padEnd != 0 {
+				_, size := utf8.DecodeLastRune(l[:match[1]])
+				match[1] -= size
+			}
 			start := Loc{charpos + util.RunePos(l, match[0]), i}
 			end := Loc{charpos + util.RunePos(l, match[1]), i}
 			return [2]Loc{start, end}, true
@@ -70,39 +105,39 @@ func (b *Buffer) findUp(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) {
 	}
 
 	for i := end.Y; i >= start.Y; i-- {
-		l := b.LineBytes(i)
-		charpos := 0
-
-		if i == start.Y && start.Y == end.Y {
-			nchars := util.CharacterCount(l)
-			start.X = util.Clamp(start.X, 0, nchars)
-			end.X = util.Clamp(end.X, 0, nchars)
-			l = util.SliceStart(l, end.X)
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == start.Y {
-			nchars := util.CharacterCount(l)
-			start.X = util.Clamp(start.X, 0, nchars)
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == end.Y {
-			nchars := util.CharacterCount(l)
-			end.X = util.Clamp(end.X, 0, nchars)
-			l = util.SliceStart(l, end.X)
-		}
-
-		allMatches := r.FindAllIndex(l, -1)
+		charCount := util.CharacterCount(b.LineBytes(i))
+		from := Loc{0, i}.Clamp(start, end)
+		to := Loc{charCount, i}.Clamp(start, end)
 
+		allMatches := b.findAll(r, from, to)
 		if allMatches != nil {
 			match := allMatches[len(allMatches)-1]
-			start := Loc{charpos + util.RunePos(l, match[0]), i}
-			end := Loc{charpos + util.RunePos(l, match[1]), i}
-			return [2]Loc{start, end}, true
+			return [2]Loc{match[0], match[1]}, true
 		}
 	}
 	return [2]Loc{}, false
 }
 
+func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc {
+	var matches [][2]Loc
+	loc := start
+	for {
+		match, found := b.findDown(r, loc, end)
+		if !found {
+			break
+		}
+		matches = append(matches, match)
+		if match[0] != match[1] {
+			loc = match[1]
+		} else if match[1] != end {
+			loc = match[1].Move(1, b)
+		} else {
+			break
+		}
+	}
+	return matches
+}
+
 // FindNext finds the next occurrence of a given string in the buffer
 // It returns the start and end location of the match (if found) and
 // a boolean indicating if it was found
@@ -146,53 +181,58 @@ func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bo
 }
 
 // ReplaceRegex replaces all occurrences of 'search' with 'replace' in the given area
-// and returns the number of replacements made and the number of runes
+// and returns the number of replacements made and the number of characters
 // added or removed on the last line of the range
 func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []byte, captureGroups bool) (int, int) {
 	if start.GreaterThan(end) {
 		start, end = end, start
 	}
 
-	netrunes := 0
-
+	charsEnd := util.CharacterCount(b.LineBytes(end.Y))
 	found := 0
 	var deltas []Delta
+
 	for i := start.Y; i <= end.Y; i++ {
-		l := b.lines[i].data
-		charpos := 0
-
-		if start.Y == end.Y && i == start.Y {
-			l = util.SliceStart(l, end.X)
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == start.Y {
-			l = util.SliceEnd(l, start.X)
-			charpos = start.X
-		} else if i == end.Y {
-			l = util.SliceStart(l, end.X)
-		}
-		newText := search.ReplaceAllFunc(l, func(in []byte) []byte {
-			var result []byte
-			if captureGroups {
-				for _, submatches := range search.FindAllSubmatchIndex(in, -1) {
-					result = search.Expand(result, replace, in, submatches)
+		l := b.LineBytes(i)
+		charCount := util.CharacterCount(l)
+		if (i == start.Y && start.X > 0) || (i == end.Y && end.X < charCount) {
+			// This replacement code works in general, but it creates a separate
+			// modification for each match. We only use it for the first and last
+			// lines, which may use padded regexps
+
+			from := Loc{0, i}.Clamp(start, end)
+			to := Loc{charCount, i}.Clamp(start, end)
+			matches := b.findAll(search, from, to)
+			found += len(matches)
+
+			for j := len(matches) - 1; j >= 0; j-- {
+				// if we counted upwards, the different deltas would interfere
+				match := matches[j]
+				var newText []byte
+				if captureGroups {
+					newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace)
+				} else {
+					newText = replace
 				}
-			} else {
-				result = replace
+				deltas = append(deltas, Delta{newText, match[0], match[1]})
 			}
-			found++
-			if i == end.Y {
-				netrunes += util.CharacterCount(result) - util.CharacterCount(in)
-			}
-			return result
-		})
-
-		from := Loc{charpos, i}
-		to := Loc{charpos + util.CharacterCount(l), i}
-
-		deltas = append(deltas, Delta{newText, from, to})
+		} else {
+			newLine := search.ReplaceAllFunc(l, func(in []byte) []byte {
+				found++
+				var result []byte
+				if captureGroups {
+					match := search.FindSubmatchIndex(in)
+					result = search.Expand(result, replace, in, match)
+				} else {
+					result = replace
+				}
+				return result
+			})
+			deltas = append(deltas, Delta{newLine, Loc{0, i}, Loc{charCount, i}})
+		}
 	}
+
 	b.MultipleReplace(deltas)
 
-	return found, netrunes
+	return found, util.CharacterCount(b.LineBytes(end.Y)) - charsEnd
 }