Skip to content

Commit

Permalink
added File.Glob
Browse files Browse the repository at this point in the history
  • Loading branch information
ungerik committed Sep 18, 2024
1 parent ed2d43b commit a509e74
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 40 deletions.
134 changes: 133 additions & 1 deletion file.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"io"
iofs "io/fs"
"iter"
"path"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -474,7 +476,7 @@ func (file File) ListDirIter(patterns ...string) iter.Seq2[File, error] {
// Canceling the context will stop the iteration and yield the context error.
func (file File) ListDirIterContext(ctx context.Context, patterns ...string) iter.Seq2[File, error] {
return func(yield func(File, error) bool) {
const cancel SentinelError = "cancel"
var cancel SentinelError
err := file.ListDirContext(ctx,
func(listedFile File) error {
if !yield(listedFile, nil) {
Expand All @@ -490,6 +492,136 @@ func (file File) ListDirIterContext(ctx context.Context, patterns ...string) ite
}
}

// MustGlob yields files and wildcard substituting path segments
// matching a pattern.
//
// The yielded path segments are the strings necessary
// to substitute wildcard containing path segments in the pattern
// to form a valid path leading up to the yielded files.
// Non wildcard segments and yielded files with wildcards
// in their name pattern are not included.
//
// The syntax of patterns is the same as in [path.Match].
// It always uses slash '/' as path segment separator
// independently of the file's file system.
//
// MustGlob ignores file system errors such as I/O errors reading directories.
// The only possible panic is in case of a malformed pattern.
func (file File) MustGlob(pattern string) iter.Seq2[File, []string] {
iterator, err := file.Glob(pattern)
if err != nil {
panic(err)
}
return iterator
}

// Glob yields files and wildcard substituting path segments
// matching a pattern.
//
// The yielded path segments are the strings necessary
// to substitute wildcard containing path segments in the pattern
// to form a valid path leading up to the yielded files.
// Non wildcard segments and yielded files with wildcards
// in their name pattern are not included.
//
// The syntax of patterns is the same as in [path.Match].
// It always uses slash '/' as path segment separator
// independently of the file's file system.
//
// A pattern ending with a slash '/' will match only directories.
//
// Glob ignores file system errors such as I/O errors reading directories.
// The only possible returned error is [path.ErrBadPattern],
// reporting that the pattern is malformed.
func (file File) Glob(pattern string) (iter.Seq2[File, []string], error) {
dirOnly := strings.HasSuffix(pattern, "/")
pattern = strings.Trim(pattern, "/")
// Check if the pattern is valid
if _, err := path.Match(pattern, ""); err != nil {
return nil, fmt.Errorf("%w: %s", err, pattern)
}
pSegments := strings.Split(pattern, "/")
pSegments = slices.DeleteFunc(pSegments, func(s string) bool {
return s == "" || s == "."
})
for _, seg := range pSegments {
if seg == ".." {
return nil, fmt.Errorf("%w, must not contain '..': %s", path.ErrBadPattern, pattern)
}
}
i := slices.IndexFunc(pSegments, containsWildcard)
if i == -1 {
// No wildcard in pattern, join path and yield file if it exists
file = file.Join(pSegments...)
return func(yield func(File, []string) bool) {
if dirOnly && file.IsDir() || !dirOnly && file.Exists() {
yield(file, nil)
}
}, nil
}
if i > 0 {
// Join non wildcard path before first wildcard
file = file.Join(pSegments[:i]...)
pSegments = pSegments[i:]
}
return file.glob(dirOnly, pSegments, nil), nil
}

func containsWildcard(pattern string) bool {
return strings.ContainsAny(pattern, `*?[\`)
}

func (file File) glob(dirOnly bool, segments, values []string) iter.Seq2[File, []string] {
return func(yield func(File, []string) bool) {
switch len(segments) {
case 0:
// No more segments, yield the file itself
if dirOnly && file.IsDir() || !dirOnly && file.Exists() {
yield(file, values)
}

case 1:
// Last segment, yield all matching files
if pattern := segments[0]; containsWildcard(pattern) {
// Wildcard in last segment, list directory with segment as pattern
for f := range file.ListDirIter(pattern) {
if dirOnly && !f.IsDir() || !dirOnly && !f.Exists() {
continue
}
if !yield(f, values) {
return
}
}
} else {
// No wildcard in last segment, join path and yield file if it exists
f := file.Join(pattern)
if dirOnly && f.IsDir() || !dirOnly && f.Exists() {
yield(f, values)
}
}

default:
if pattern := segments[0]; containsWildcard(pattern) {
// Wildcard in segment, list directory with segment as pattern
for matchedFile := range file.ListDirIter(pattern) {
for f, v := range matchedFile.glob(dirOnly, segments[1:], append(values, matchedFile.Name())) {
if !yield(f, v) {
return
}
}
}
} else {
// No wildcard in segment, join path and recurse
for f, v := range file.Join(segments[0]).glob(dirOnly, segments[1:], values) {
if !yield(f, v) {
return
}
}
}
}
}
}

// ListDirInfo calls the passed callback function for every file and directory in dirPath.
// If any patterns are passed, then only files with a name that matches
// at least one of the patterns are returned.
Expand Down
120 changes: 120 additions & 0 deletions file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import (
"errors"
"fmt"
"path/filepath"
"sort"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/ungerik/go-fs/fsimpl"
)

Expand Down Expand Up @@ -283,3 +285,121 @@ func TestFile_String(t *testing.T) {
path := filepath.Join("dir", "file.ext")
require.Equal(t, path+" (local file system)", File(path).String())
}

func TestFile_Glob(t *testing.T) {
dir := MustMakeTempDir()
t.Cleanup(func() { dir.RemoveRecursive() })
xDir := dir.Join("a", "b", "c", "Hello", "World", "x")
yDir := dir.Join("a", "b", "c", "Hello", "World", "y")
require.NoError(t, xDir.MakeAllDirs())
require.NoError(t, yDir.MakeAllDirs())
cFile := dir.Join("a", "b", "c", "file.txt")
require.NoError(t, cFile.Touch())
xFile1 := xDir.Join("file1.txt")
require.NoError(t, xFile1.Touch())
xFile2 := xDir.Join("file2.txt")
require.NoError(t, xFile2.Touch())
xFile3 := xDir.Join("file3.txt")
require.NoError(t, xFile3.Touch())

type result struct {
file File
values []string
}

tests := []struct {
name string
file File
pattern string
want []result
wantErr bool
}{
{
name: "invalid file",
file: "",
pattern: "",
want: nil,
},
{
name: "empty pattern",
file: dir,
pattern: "",
want: []result{{dir, nil}},
},
{
name: "no wildcard pattern",
file: dir,
pattern: "a/b/c/Hello/World/x/file1.txt",
want: []result{
{xFile1, nil},
},
},
{
name: "no wildcard non-canonical pattern",
file: dir,
pattern: "/./a/b//c/Hello/./././/World/x/file1.txt",
want: []result{
{xFile1, nil},
},
},
{
name: "rooted files",
file: dir,
pattern: "*",
want: []result{
{dir.Join("a"), nil},
},
},
{
name: "rooted dirs",
file: dir,
pattern: "*/",
want: []result{
{dir.Join("a"), nil},
},
},
{
name: "file and dir",
file: dir,
pattern: "./a/b/c/*",
want: []result{
{dir.Join("a", "b", "c", "Hello"), nil},
{cFile, nil},
},
},
{
name: "directories only",
file: dir,
pattern: "./a/b/c/*/",
want: []result{
{dir.Join("a", "b", "c", "Hello"), nil},
},
},
{
name: "complexer pattern",
file: dir,
pattern: "*/b/c/*/W???d/x/file[1-2].txt",
want: []result{
{xFile1, []string{"a", "Hello", "World"}},
{xFile2, []string{"a", "Hello", "World"}},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotIter, err := tt.file.Glob(tt.pattern)
if tt.wantErr {
require.Error(t, err, "File.Glob")
} else {
require.NoError(t, err, "File.Glob")
}
var got []result
for file, values := range gotIter {
require.Truef(t, file.Exists(), "file %s does not exist", file)
got = append(got, result{file, values})
}
sort.Slice(got, func(i, j int) bool { return got[i].file.LocalPath() < got[j].file.LocalPath() })
require.Equal(t, tt.want, got, "file path sorted results")
})
}
}
77 changes: 38 additions & 39 deletions stdfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@ import (
"errors"
"fmt"
iofs "io/fs"
"path"
"sort"
"strings"
)

var (
_ iofs.FS = StdFS{File("")}
_ iofs.SubFS = StdFS{File("")}
_ iofs.StatFS = StdFS{File("")}
_ iofs.GlobFS = StdFS{File("")}
_ iofs.FS = StdFS{File("")}
_ iofs.SubFS = StdFS{File("")}
_ iofs.StatFS = StdFS{File("")}
// _ iofs.GlobFS = StdFS{File("")}
_ iofs.ReadDirFS = StdFS{File("")}
_ iofs.ReadFileFS = StdFS{File("")}
)
Expand Down Expand Up @@ -91,40 +90,40 @@ func (f StdFS) ReadDir(name string) ([]iofs.DirEntry, error) {
// Glob function.
//
// This method implements the io/fs.GlobFS interface.
func (f StdFS) Glob(pattern string) (names []string, err error) {
// if pattern == `u[u][i-i][\d][\d-\d]i[r]/*e*` {
// fmt.Println(pattern)
// }
if strings.Contains(pattern, "//") || strings.Contains(pattern, "[]") {
return nil, fmt.Errorf("invalid glob pattern: %#v", pattern)
}
parentPattern, childPattern, cut := strings.Cut(pattern, "/")
err = f.File.ListDir(
func(file File) error {
names = append(names, file.Name())
return nil
},
parentPattern,
)
if err != nil {
return nil, err
}
sort.Strings(names)
if cut {
parentNames := names
names = nil // Don't include parents in final result
for _, parent := range parentNames {
children, err := f.File.Join(parent).StdFS().Glob(childPattern)
if err != nil {
return nil, err
}
for _, child := range children {
names = append(names, path.Join(parent, child))
}
}
}
return names, nil
}
// func (f StdFS) Glob(pattern string) (names []string, err error) {
// // if pattern == `u[u][i-i][\d][\d-\d]i[r]/*e*` {
// // fmt.Println(pattern)
// // }
// if strings.Contains(pattern, "//") || strings.Contains(pattern, "[]") {
// return nil, fmt.Errorf("invalid glob pattern: %#v", pattern)
// }
// parentPattern, childPattern, cut := strings.Cut(pattern, "/")
// err = f.File.ListDir(
// func(file File) error {
// names = append(names, file.Name())
// return nil
// },
// parentPattern,
// )
// if err != nil {
// return nil, err
// }
// sort.Strings(names)
// if cut {
// parentNames := names
// names = nil // Don't include parents in final result
// for _, parent := range parentNames {
// children, err := f.File.Join(parent).StdFS().Glob(childPattern)
// if err != nil {
// return nil, err
// }
// for _, child := range children {
// names = append(names, path.Join(parent, child))
// }
// }
// }
// return names, nil
// }

func checkStdFSName(name string) error {
if name == "" {
Expand Down

0 comments on commit a509e74

Please sign in to comment.