diff --git a/internal/glob/glob.go b/internal/glob/glob.go new file mode 100644 index 0000000..6acee6c --- /dev/null +++ b/internal/glob/glob.go @@ -0,0 +1,352 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// NOTE: This file is derived from golang.org/x/tools/gopls/internal/lsp/glob +// Source: https://cs.opensource.google/go/x/tools/+/gopls/v0.14.2:gopls/internal/lsp/glob/glob.go + +// Package glob implements an LSP-compliant glob pattern matcher for testing. +package glob + +import ( + "errors" + "fmt" + "strings" + "unicode/utf8" +) + +// A Glob is an LSP-compliant glob pattern, as defined by the spec: +// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#documentFilter +// +// NOTE: this implementation is currently only intended for testing. In order +// to make it production ready, we'd need to: +// - verify it against the VS Code implementation +// - add more tests +// - microbenchmark, likely avoiding the element interface +// - resolve the question of what is meant by "character". If it's a UTF-16 +// code (as we suspect) it'll be a bit more work. +// +// Quoting from the spec: +// Glob patterns can have the following syntax: +// - `*` to match one or more characters in a path segment +// - `?` to match on one character in a path segment +// - `**` to match any number of path segments, including none +// - `{}` to group sub patterns into an OR expression. (e.g. `**/*.{ts,js}` +// matches all TypeScript and JavaScript files) +// - `[]` to declare a range of characters to match in a path segment +// (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …) +// - `[!...]` to negate a range of characters to match in a path segment +// (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but +// not `example.0`) +// +// Expanding on this: +// - '/' matches one or more literal slashes. +// - any other character matches itself literally. +type Glob struct { + elems []element // pattern elements +} + +// Parse builds a Glob for the given pattern, returning an error if the pattern +// is invalid. +func Parse(pattern string) (*Glob, error) { + g, _, err := parse(pattern, false) + return g, err +} + +func parse(pattern string, nested bool) (*Glob, string, error) { + g := new(Glob) + for len(pattern) > 0 { + switch pattern[0] { + case '/': + pattern = pattern[1:] + g.elems = append(g.elems, slash{}) + + case '*': + if len(pattern) > 1 && pattern[1] == '*' { + if (len(g.elems) > 0 && g.elems[len(g.elems)-1] != slash{}) || (len(pattern) > 2 && pattern[2] != '/') { + return nil, "", errors.New("** may only be adjacent to '/'") + } + pattern = pattern[2:] + g.elems = append(g.elems, starStar{}) + break + } + pattern = pattern[1:] + g.elems = append(g.elems, star{}) + + case '?': + pattern = pattern[1:] + g.elems = append(g.elems, anyChar{}) + + case '{': + var gs group + for pattern[0] != '}' { + pattern = pattern[1:] + g, pat, err := parse(pattern, true) + if err != nil { + return nil, "", err + } + if len(pat) == 0 { + return nil, "", errors.New("unmatched '{'") + } + pattern = pat + gs = append(gs, g) + } + pattern = pattern[1:] + g.elems = append(g.elems, gs) + + case '}', ',': + if nested { + return g, pattern, nil + } + pattern = g.parseLiteral(pattern, false) + + case '[': + pattern = pattern[1:] + if len(pattern) == 0 { + return nil, "", errBadRange + } + negate := false + if pattern[0] == '!' { + pattern = pattern[1:] + negate = true + } + low, sz, err := readRangeRune(pattern) + if err != nil { + return nil, "", err + } + pattern = pattern[sz:] + if len(pattern) == 0 || pattern[0] != '-' { + return nil, "", errBadRange + } + pattern = pattern[1:] + high, sz, err := readRangeRune(pattern) + if err != nil { + return nil, "", err + } + pattern = pattern[sz:] + if len(pattern) == 0 || pattern[0] != ']' { + return nil, "", errBadRange + } + pattern = pattern[1:] + g.elems = append(g.elems, charRange{negate, low, high}) + + default: + pattern = g.parseLiteral(pattern, nested) + } + } + return g, "", nil +} + +// helper for decoding a rune in range elements, e.g. [a-z] +func readRangeRune(input string) (rune, int, error) { + r, sz := utf8.DecodeRuneInString(input) + var err error + if r == utf8.RuneError { + // See the documentation for DecodeRuneInString. + switch sz { + case 0: + err = errBadRange + case 1: + err = errInvalidUTF8 + } + } + return r, sz, err +} + +var ( + errBadRange = errors.New("'[' patterns must be of the form [x-y]") + errInvalidUTF8 = errors.New("invalid UTF-8 encoding") +) + +func (g *Glob) parseLiteral(pattern string, nested bool) string { + var specialChars string + if nested { + specialChars = "*?{[/}," + } else { + specialChars = "*?{[/" + } + end := strings.IndexAny(pattern, specialChars) + if end == -1 { + end = len(pattern) + } + g.elems = append(g.elems, literal(pattern[:end])) + return pattern[end:] +} + +func (g *Glob) String() string { + var b strings.Builder + for _, e := range g.elems { + fmt.Fprint(&b, e) + } + return b.String() +} + +// element holds a glob pattern element, as defined below. +type element fmt.Stringer + +// element types. +type ( + slash struct{} // One or more '/' separators + literal string // string literal, not containing /, *, ?, {}, or [] + star struct{} // * + anyChar struct{} // ? + starStar struct{} // ** + group []*Glob // {foo, bar, ...} grouping + charRange struct { // [a-z] character range + negate bool + low, high rune + } +) + +func (s slash) String() string { return "/" } +func (l literal) String() string { return string(l) } +func (s star) String() string { return "*" } +func (a anyChar) String() string { return "?" } +func (s starStar) String() string { return "**" } +func (g group) String() string { + var parts []string + for _, g := range g { + parts = append(parts, g.String()) + } + return "{" + strings.Join(parts, ",") + "}" +} +func (r charRange) String() string { + return "[" + string(r.low) + "-" + string(r.high) + "]" +} + +// Match reports whether the input string matches the glob pattern. +func (g *Glob) Match(input string) bool { + return match(g.elems, input) +} + +func match(elems []element, input string) (ok bool) { + var elem interface{} + for len(elems) > 0 { + elem, elems = elems[0], elems[1:] + switch elem := elem.(type) { + case slash: + if len(input) == 0 || input[0] != '/' { + return false + } + for input[0] == '/' { + input = input[1:] + } + + case starStar: + // Special cases: + // - **/a matches "a" + // - **/ matches everything + // + // Note that if ** is followed by anything, it must be '/' (this is + // enforced by Parse). + if len(elems) > 0 { + elems = elems[1:] + } + + // A trailing ** matches anything. + if len(elems) == 0 { + return true + } + + // Backtracking: advance pattern segments until the remaining pattern + // elements match. + for len(input) != 0 { + if match(elems, input) { + return true + } + _, input = split(input) + } + return false + + case literal: + if !strings.HasPrefix(input, string(elem)) { + return false + } + input = input[len(elem):] + + case star: + var segInput string + segInput, input = split(input) + + elemEnd := len(elems) + for i, e := range elems { + if e == (slash{}) { + elemEnd = i + break + } + } + segElems := elems[:elemEnd] + elems = elems[elemEnd:] + + // A trailing * matches the entire segment. + if len(segElems) == 0 { + break + } + + // Backtracking: advance characters until remaining subpattern elements + // match. + matched := false + for i := range segInput { + if match(segElems, segInput[i:]) { + matched = true + break + } + } + if !matched { + return false + } + + case anyChar: + if len(input) == 0 || input[0] == '/' { + return false + } + input = input[1:] + + case group: + // Append remaining pattern elements to each group member looking for a + // match. + var branch []element + for _, m := range elem { + branch = branch[:0] + branch = append(branch, m.elems...) + branch = append(branch, elems...) + if match(branch, input) { + return true + } + } + return false + + case charRange: + if len(input) == 0 || input[0] == '/' { + return false + } + c, sz := utf8.DecodeRuneInString(input) + if c < elem.low || c > elem.high { + return false + } + input = input[sz:] + + default: + panic(fmt.Sprintf("segment type %T not implemented", elem)) + } + } + + return len(input) == 0 +} + +// split returns the portion before and after the first slash +// (or sequence of consecutive slashes). If there is no slash +// it returns (input, nil). +func split(input string) (first, rest string) { + i := strings.IndexByte(input, '/') + if i < 0 { + return input, "" + } + first = input[:i] + for j := i; j < len(input); j++ { + if input[j] != '/' { + return first, input[j:] + } + } + return first, "" +} diff --git a/internal/glob/glob_test.go b/internal/glob/glob_test.go new file mode 100644 index 0000000..4d49f2c --- /dev/null +++ b/internal/glob/glob_test.go @@ -0,0 +1,121 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// NOTE: This file is derived from golang.org/x/tools/gopls/internal/lsp/glob +// Source: https://cs.opensource.google/go/x/tools/+/gopls/v0.14.2:gopls/internal/lsp/glob/glob_test.go + +package glob_test + +import ( + "testing" + + "github.com/isaacphi/mcp-language-server/internal/glob" +) + +func TestParseErrors(t *testing.T) { + tests := []string{ + "***", + "ab{c", + "[]", + "[a-]", + "ab{c{d}", + } + + for _, test := range tests { + _, err := glob.Parse(test) + if err == nil { + t.Errorf("Parse(%q) succeeded unexpectedly", test) + } + } +} + +func TestMatch(t *testing.T) { + tests := []struct { + pattern, input string + want bool + }{ + // Basic cases. + {"", "", true}, + {"", "a", false}, + {"", "/", false}, + {"abc", "abc", true}, + + // ** behavior + {"**", "abc", true}, + {"**/abc", "abc", true}, + {"**", "abc/def", true}, + {"{a/**/c,a/**/d}", "a/b/c", true}, + {"{a/**/c,a/**/d}", "a/b/c/d", true}, + {"{a/**/c,a/**/e}", "a/b/c/d", false}, + {"{a/**/c,a/**/e,a/**/d}", "a/b/c/d", true}, + {"{/a/**/c,a/**/e,a/**/d}", "a/b/c/d", true}, + {"{/a/**/c,a/**/e,a/**/d}", "/a/b/c/d", false}, + {"{/a/**/c,a/**/e,a/**/d}", "/a/b/c", true}, + {"{/a/**/e,a/**/e,a/**/d}", "/a/b/c", false}, + + // * and ? behavior + {"/*", "/a", true}, + {"*", "foo", true}, + {"*o", "foo", true}, + {"*o", "foox", false}, + {"f*o", "foo", true}, + {"f*o", "fo", true}, + {"fo?", "foo", true}, + {"fo?", "fox", true}, + {"fo?", "fooo", false}, + {"fo?", "fo", false}, + {"?", "a", true}, + {"?", "ab", false}, + {"?", "", false}, + {"*?", "", false}, + {"?b", "ab", true}, + {"?c", "ab", false}, + + // {} behavior + {"ab{c,d}e", "abce", true}, + {"ab{c,d}e", "abde", true}, + {"ab{c,d}e", "abxe", false}, + {"ab{c,d}e", "abe", false}, + {"{a,b}c", "ac", true}, + {"{a,b}c", "bc", true}, + {"{a,b}c", "ab", false}, + {"a{b,c}", "ab", true}, + {"a{b,c}", "ac", true}, + {"a{b,c}", "bc", false}, + {"ab{c{1,2},d}e", "abc1e", true}, + {"ab{c{1,2},d}e", "abde", true}, + {"ab{c{1,2},d}e", "abc1f", false}, + {"ab{c{1,2},d}e", "abce", false}, + {"ab{c[}-~]}d", "abc}d", true}, + {"ab{c[}-~]}d", "abc~d", true}, + {"ab{c[}-~],y}d", "abcxd", false}, + {"ab{c[}-~],y}d", "abyd", true}, + {"ab{c[}-~],y}d", "abd", false}, + {"{a/b/c,d/e/f}", "a/b/c", true}, + {"/ab{/c,d}e", "/ab/ce", true}, + {"/ab{/c,d}e", "/ab/cf", false}, + + // [-] behavior + {"[a-c]", "a", true}, + {"[a-c]", "b", true}, + {"[a-c]", "c", true}, + {"[a-c]", "d", false}, + {"[a-c]", " ", false}, + + // Realistic examples. + {"**/*.{ts,js}", "path/to/foo.ts", true}, + {"**/*.{ts,js}", "path/to/foo.js", true}, + {"**/*.{ts,js}", "path/to/foo.go", false}, + } + + for _, test := range tests { + g, err := glob.Parse(test.pattern) + if err != nil { + t.Fatalf("New(%q) failed unexpectedly: %v", test.pattern, err) + } + if got := g.Match(test.input); got != test.want { + t.Errorf("New(%q).Match(%q) = %t, want %t", test.pattern, test.input, got, test.want) + } + } +} diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 6e7a0b8..f746533 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -10,6 +10,7 @@ import ( "time" "github.com/fsnotify/fsnotify" + "github.com/isaacphi/mcp-language-server/internal/glob" "github.com/isaacphi/mcp-language-server/internal/logging" "github.com/isaacphi/mcp-language-server/internal/lsp" "github.com/isaacphi/mcp-language-server/internal/protocol" @@ -18,6 +19,41 @@ import ( // Create a logger for the watcher component var watcherLogger = logging.NewLogger(logging.Watcher) +// Glob pattern cache to avoid re-parsing the same patterns +var ( + globCache sync.Map // Thread-safe map: pattern string -> *glob.Glob + globCacheSize int // Approximate cache size + globCacheMu sync.Mutex + maxCacheSize = 100 // Clear cache if it exceeds this size (safety valve) +) + +// parseGlobCached parses a glob pattern with caching. If cache exceeds +// maxCacheSize, it's completely cleared and a warning is logged. +func parseGlobCached(pattern string) (*glob.Glob, error) { + if g, ok := globCache.Load(pattern); ok { + return g.(*glob.Glob), nil + } + + g, err := glob.Parse(pattern) + if err != nil { + return nil, err + } + + globCacheMu.Lock() + globCacheSize++ + cleared := globCacheSize > maxCacheSize + if cleared { + globCache = sync.Map{} + globCacheSize = 1 + } + globCacheMu.Unlock() + globCache.Store(pattern, g) + if cleared { + watcherLogger.Warn("Glob pattern cache exceeded %d entries, clearing cache (possible pattern leak)", maxCacheSize) + } + return g, nil +} + // WorkspaceWatcher manages LSP file watching type WorkspaceWatcher struct { client LSPClient @@ -339,109 +375,7 @@ func (w *WorkspaceWatcher) isPathWatched(path string) (bool, protocol.WatchKind) return false, 0 } -// matchesGlob handles advanced glob patterns including ** and alternatives -func matchesGlob(pattern, path string) bool { - // Handle file extension patterns with braces like *.{go,mod,sum} - if strings.Contains(pattern, "{") && strings.Contains(pattern, "}") { - // Extract extensions from pattern like "*.{go,mod,sum}" - parts := strings.SplitN(pattern, "{", 2) - if len(parts) == 2 { - prefix := parts[0] - extPart := strings.SplitN(parts[1], "}", 2) - if len(extPart) == 2 { - extensions := strings.Split(extPart[0], ",") - suffix := extPart[1] - - // Check if the path matches any of the extensions - for _, ext := range extensions { - extPattern := prefix + ext + suffix - isMatch := matchesSimpleGlob(extPattern, path) - if isMatch { - return true - } - } - return false - } - } - } - - return matchesSimpleGlob(pattern, path) -} - -// matchesSimpleGlob handles glob patterns with ** wildcards -func matchesSimpleGlob(pattern, path string) bool { - // Handle special case for **/*.ext pattern (common in LSP) - if strings.HasPrefix(pattern, "**/") { - rest := strings.TrimPrefix(pattern, "**/") - - // If the rest is a simple file extension pattern like *.go - if strings.HasPrefix(rest, "*.") { - ext := strings.TrimPrefix(rest, "*") - isMatch := strings.HasSuffix(path, ext) - return isMatch - } - - // Otherwise, try to check if the path ends with the rest part - isMatch := strings.HasSuffix(path, rest) - - // If it matches directly, great! - if isMatch { - return true - } - - // Otherwise, check if any path component matches - pathComponents := strings.Split(path, "/") - for i := range pathComponents { - subPath := strings.Join(pathComponents[i:], "/") - if strings.HasSuffix(subPath, rest) { - return true - } - } - - return false - } - - // Handle other ** wildcard pattern cases - if strings.Contains(pattern, "**") { - parts := strings.Split(pattern, "**") - - // Validate the path starts with the first part - if !strings.HasPrefix(path, parts[0]) && parts[0] != "" { - return false - } - - // For patterns like "**/*.go", just check the suffix - if len(parts) == 2 && parts[0] == "" { - isMatch := strings.HasSuffix(path, parts[1]) - return isMatch - } - - // For other patterns, handle middle part - remaining := strings.TrimPrefix(path, parts[0]) - if len(parts) == 2 { - isMatch := strings.HasSuffix(remaining, parts[1]) - return isMatch - } - } - - // Handle simple * wildcard for file extension patterns (*.go, *.sum, etc) - if strings.HasPrefix(pattern, "*.") { - ext := strings.TrimPrefix(pattern, "*") - isMatch := strings.HasSuffix(path, ext) - return isMatch - } - - // Fall back to simple matching for simpler patterns - matched, err := filepath.Match(pattern, path) - if err != nil { - watcherLogger.Error("Error matching pattern %s: %v", pattern, err) - return false - } - - return matched -} - -// matchesPattern checks if a path matches the glob pattern +// matchesPattern checks if a path matches the glob pattern using LSP-compliant glob matching func (w *WorkspaceWatcher) matchesPattern(path string, pattern protocol.GlobPattern) bool { patternInfo, err := pattern.AsPattern() if err != nil { @@ -452,39 +386,28 @@ func (w *WorkspaceWatcher) matchesPattern(path string, pattern protocol.GlobPatt basePath := patternInfo.GetBasePath() patternText := patternInfo.GetPattern() - // watcherLogger.Debug("Matching path %s against pattern %s (base: %s)", path, patternText, basePath) + watcherLogger.Debug("Matching path %s against pattern %s (base: %s)", path, patternText, basePath) path = filepath.ToSlash(path) - // Special handling for wildcard patterns like "**/*" - if patternText == "**/*" { - // This should match any file - // watcherLogger.Debug("Using special matching for **/* pattern") - return true + // Parse the glob pattern (with caching) + g, err := parseGlobCached(patternText) + if err != nil { + watcherLogger.Error("Error parsing glob pattern %q: %v", patternText, err) + return false } - // Special handling for wildcard patterns like "**/*.ext" - if strings.HasPrefix(patternText, "**/") { - if strings.HasPrefix(strings.TrimPrefix(patternText, "**/"), "*.") { - // Extension pattern like **/*.go - ext := strings.TrimPrefix(strings.TrimPrefix(patternText, "**/"), "*") - // watcherLogger.Debug("Using extension matching for **/*.ext pattern: checking if %s ends with %s", path, ext) - return strings.HasSuffix(path, ext) - } else { - // Any other pattern starting with **/ should match any path - // watcherLogger.Debug("Using path substring matching for **/ pattern") + // For patterns without a base path + if basePath == "" { + // Check if the pattern matches the full path or just the file basename + if g.Match(path) { return true } - } - - // For simple patterns without base path - if basePath == "" { - // Check if the pattern matches the full path or just the file extension - fullPathMatch := matchesGlob(patternText, path) - baseNameMatch := matchesGlob(patternText, filepath.Base(path)) - - watcherLogger.Debug("No base path, fullPathMatch: %v, baseNameMatch: %v", fullPathMatch, baseNameMatch) - return fullPathMatch || baseNameMatch + // Also try matching against just the basename + if g.Match(filepath.Base(path)) { + return true + } + return false } // For relative patterns @@ -499,10 +422,7 @@ func (w *WorkspaceWatcher) matchesPattern(path string, pattern protocol.GlobPatt } relPath = filepath.ToSlash(relPath) - isMatch := matchesGlob(patternText, relPath) - watcherLogger.Debug("Relative path matching: %s against %s = %v", relPath, patternText, isMatch) - - return isMatch + return g.Match(relPath) } // debounceHandleFileEvent handles file events with debouncing to reduce notifications