From e9f46891ea8e6239d5ebec17cfe59ac8d84e7b28 Mon Sep 17 00:00:00 2001 From: neilotoole Date: Thu, 30 Nov 2023 08:11:11 -0700 Subject: [PATCH 1/2] Implemented FSCache.MapFile mechanism --- fscache.go | 81 +++++++++++++++ go.mod | 1 + go.sum | 11 ++ mapfile_test.go | 266 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 359 insertions(+) create mode 100644 mapfile_test.go diff --git a/fscache.go b/fscache.go index fb5b324..735f2fd 100644 --- a/fscache.go +++ b/fscache.go @@ -194,6 +194,52 @@ func (c *FSCache) Get(key string) (r ReadAtCloser, w io.WriteCloser, err error) return r, f, err } +// MapFile maps the file at fp into the cache. This mechanism is useful +// with a StandardFS, because it avoids a superfluous copy operation +// when the file already exists on disk. For other file system implementations, +// the file contents are simply copied into the cache using io.Copy. +// +// When FSCache.Remove or FSCache.Clean are invoked (including indirectly via +// a Haunter), the cache entry for the mapped file is removed, but the file +// itself will not be deleted from disk. +func (c *FSCache) MapFile(fp string) error { + c.mu.RLock() + + key := c.mapKey(fp) + if _, ok := c.files[key]; ok { + c.mu.RUnlock() + return fmt.Errorf("key %s already exists in cache", key) + } + c.mu.RUnlock() + c.mu.Lock() + if _, ok := c.fs.(*StandardFS); !ok { + // It's not a StandardFS, so we need to copy the file into the cache. + f, err := c.newFile(key) + if err != nil { + c.mu.Unlock() + return err + } + + // The copy operation could take a while, we don't want to be locked. + // Not entirely clear if this is the correct thing to do. + c.mu.Unlock() + if err = copyFileTo(f, fp); err != nil { + return err + } + + c.mu.RLock() + c.files[key] = f + c.mu.RUnlock() + return nil + } + + // It's a StandardFS, so we can just map in the existing file. + f := c.mappedFile(fp) + c.files[key] = f + c.mu.Unlock() + return nil +} + // Remove removes the specified key from the cache. func (c *FSCache) Remove(key string) error { c.mu.Lock() @@ -290,6 +336,25 @@ func (f *reloadedFile) next() (*CacheReader, error) { }, err } +type mappedFile struct { + reloadedFile +} + +// remove is a no-op, because we don't want the mapped +// file to ever be deleted by fscache. +func (f *mappedFile) remove() error { + return nil +} + +func (c *FSCache) mappedFile(name string) fileStream { + return &mappedFile{ + reloadedFile: reloadedFile{ + fs: c.fs, + name: name, + }, + } +} + func (f *cachedFile) Name() string { return f.stream.Name() } func (f *cachedFile) remove() error { return f.stream.Remove() } @@ -371,3 +436,19 @@ func (h *handleCounter) InUse() bool { func (h *handleCounter) waitUntilFree() { h.grp.Wait() } + +// copyFileTo copies the file at file path fp into dst. +func copyFileTo(dst io.WriteCloser, fp string) error { + f, err := os.Open(fp) + if err != nil { + _ = dst.Close() + return err + } + defer f.Close() + + if _, err = io.Copy(dst, f); err != nil { + _ = dst.Close() + return err + } + return dst.Close() +} diff --git a/go.mod b/go.mod index f9e287f..4b4b9b6 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.14 require ( github.com/djherbis/atime v1.1.0 github.com/djherbis/stream v1.4.0 + github.com/stretchr/testify v1.4.0 ) diff --git a/go.sum b/go.sum index aa8adc0..9fc2232 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,15 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/djherbis/atime v1.1.0 h1:rgwVbP/5by8BvvjBNrbh64Qz33idKT3pSnMSJsxhi0g= github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE= github.com/djherbis/stream v1.4.0 h1:aVD46WZUiq5kJk55yxJAyw6Kuera6kmC3i2vEQyW/AE= github.com/djherbis/stream v1.4.0/go.mod h1:cqjC1ZRq3FFwkGmUtHwcldbnW8f0Q4YuVsGW1eAFtOk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/mapfile_test.go b/mapfile_test.go new file mode 100644 index 0000000..ddab157 --- /dev/null +++ b/mapfile_test.go @@ -0,0 +1,266 @@ +package fscache_test + +import ( + "io" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/djherbis/fscache" +) + +// TestMapFile demonstrates and tests the proposed FSCache.MapFile mechanism. +// The theory is that if a cache entry already exists as a file on disk, +// and the cache is backed by a filesystem, we can avoid the copy operation +// that occurs when the cache is filled, by instead mapping the file into +// the cache. This is a serious win for large files. +// +// Our example scenario is a Getter interface that gets a reader for a URL +// or filepath. +// +// Two implementations of Getter are provided: StdGetter and MapGetter. Both +// implementations are tested against the local filesystem README.md, and +// the README.md hosted on GitHub via https, using fscache.StandardFS, and +// also fscache.NewMemFs. +// +// Both Getter implementations behave the same for the HTTP case. For the +// file case (which is what we're really interested in), StdGetter uses +// the standard cache-filling mechanism, which is to copy the file content +// bytes from disk into the w returned by FSCache.Get. This is the scenario +// that we're trying to address: the goal is to avoid this unnecessary +// copy operation. +// +// Meanwhile, for the file case, MapGetter uses the new FSCache.MapFile +// mechanism, which avoids the copy operation. +func TestMapFile(t *testing.T) { + const iterations = 5 + const readmeHTTP = "https://raw.githubusercontent.com/djherbis/fscache/master/README.md" + readmeFilepath, err := filepath.Abs("README.md") + require.NoError(t, err) + + newDiskFs := func() fscache.FileSystem { + dir, err := ioutil.TempDir("", "") + require.NoError(t, err) + t.Cleanup(func() { _ = os.RemoveAll(dir) }) + fs, err := fscache.NewFs(dir, os.ModePerm) + require.NoError(t, err) + return fs + } + + testCases := []struct { + name string + getterFactory func(t *testing.T, fs fscache.FileSystem) Getter + fsFactory func() fscache.FileSystem + src string + }{ + {"map_diskfs_http", NewMapGetter, newDiskFs, readmeHTTP}, + {"map_diskfs_file", NewMapGetter, newDiskFs, readmeFilepath}, + {"map_memfs_http", NewMapGetter, fscache.NewMemFs, readmeHTTP}, + {"map_memfs_file", NewMapGetter, fscache.NewMemFs, readmeFilepath}, + {"std_diskfs_http", NewStdGetter, newDiskFs, readmeHTTP}, + {"std_diskfs_file", NewStdGetter, newDiskFs, readmeFilepath}, + {"std_memfs_http", NewStdGetter, fscache.NewMemFs, readmeHTTP}, + {"std_memfs_file", NewStdGetter, fscache.NewMemFs, readmeFilepath}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + g := tc.getterFactory(t, tc.fsFactory()) + for i := 0; i < iterations; i++ { + rc, err := g.Get(tc.src) + require.NoError(t, err) + require.NotNil(t, rc) + b, err := ioutil.ReadAll(rc) + assert.NoError(t, rc.Close()) + require.NoError(t, err) + require.Contains(t, string(b), "Streaming File Cache for #golang") + } + + // Make sure that calling FSCache.Remove doesn't actually + // delete the file from disk. + if tc.src == readmeFilepath { + if mg, ok := g.(*MapGetter); ok { + err = mg.fc.Remove(tc.src) + require.NoError(t, err) + fi, err := os.Stat(tc.src) + require.NoError(t, err) + require.Equal(t, filepath.Base(tc.src), fi.Name()) + } + } + }) + } +} + +// Getter gets a reader for a URL or filepath. +type Getter interface { + Get(urlOrFilepath string) (io.ReadCloser, error) +} + +// NewStdGetter is a factory function for StdGetter. +func NewStdGetter(t *testing.T, fs fscache.FileSystem) Getter { + g := &StdGetter{logf: t.Logf} + var err error + g.fc, err = fscache.NewCache(fs, nil) + require.NoError(t, err) + return g +} + +var _ Getter = (*StdGetter)(nil) + +// StdGetter is a getter that uses the standard cache-filling mechanism. +type StdGetter struct { + fc *fscache.FSCache + logf func(format string, args ...interface{}) +} + +// Get implements Getter. +func (g *StdGetter) Get(urlOrFilepath string) (io.ReadCloser, error) { + if strings.HasPrefix(urlOrFilepath, "http://") || + strings.HasPrefix(urlOrFilepath, "https://") { + + r, w, err := g.fc.Get(urlOrFilepath) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", urlOrFilepath) + return r, nil + } + + g.logf("Cache miss: %s", urlOrFilepath) + + if err = httpGet(urlOrFilepath, w); err != nil { + return nil, err + } + + g.logf("Fetched: %s", urlOrFilepath) + return r, nil + } + + // Thus, urlOrFilepath must be a filepath. + fp := urlOrFilepath + r, w, err := g.fc.Get(fp) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", fp) + return r, nil + } + + g.logf("Cache miss: %s", fp) + + f, err := os.Open(fp) + if err != nil { + return nil, err + } + defer f.Close() + + // We copy the contents of f to w and thus into the cache. + // But, for our use-case, this is useless work. + // We're already using a filesystem FSCache, so we're just + // copying the file from disk to memory and back to disk. + // Boo! + var n int64 + if n, err = io.Copy(w, f); err != nil { + return nil, err + } + + if err = w.Close(); err != nil { + return nil, err + } + g.logf("EXPENSIVE: Copied %d bytes from %s to cache", n, fp) + return r, nil +} + +// NewMapGetter is a factory function for MapGetter. +func NewMapGetter(t *testing.T, fs fscache.FileSystem) Getter { + g := &MapGetter{logf: t.Logf} + var err error + g.fc, err = fscache.NewCache(fs, nil) + require.NoError(t, err) + return g +} + +var _ Getter = (*MapGetter)(nil) + +// MapGetter is a Getter that uses the new FSCache.MapFile mechanism to +// map existing files into the cache. +type MapGetter struct { + fc *fscache.FSCache + logf func(format string, args ...interface{}) +} + +// Get implements Getter. +func (g *MapGetter) Get(urlOrFilepath string) (io.ReadCloser, error) { + if strings.HasPrefix(urlOrFilepath, "http://") || + strings.HasPrefix(urlOrFilepath, "https://") { + + r, w, err := g.fc.Get(urlOrFilepath) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", urlOrFilepath) + return r, nil + } + + g.logf("Cache miss: %s", urlOrFilepath) + + if err = httpGet(urlOrFilepath, w); err != nil { + return nil, err + } + + g.logf("Fetched: %s", urlOrFilepath) + return r, nil + } + + // Thus, urlOrFilepath must be a filepath. + fp := urlOrFilepath + if g.fc.Exists(fp) { + g.logf("Cache hit: %s", fp) + r, _, err := g.fc.Get(fp) + return r, err + } + + g.logf("Cache miss: %s", fp) + g.logf("Mapping file into cache: %s", fp) + + if err := g.fc.MapFile(fp); err != nil { + return nil, err + } + + r, _, err := g.fc.Get(fp) + if err != nil { + return nil, err + } + + return r, nil +} + +// httpGet writes the contents at URL u to w (which +// is always closed). +func httpGet(u string, w io.WriteCloser) error { + resp, err := http.Get(u) + if err != nil { + return err + } + defer resp.Body.Close() + + if _, err = io.Copy(w, resp.Body); err != nil { + _ = w.Close() + return err + } + + return w.Close() +} From 5a666f989b5aacdc286f164228beba3e1c8512be Mon Sep 17 00:00:00 2001 From: neilotoole Date: Thu, 30 Nov 2023 08:16:26 -0700 Subject: [PATCH 2/2] Implemented FSCache.MapFile mechanism --- fscache.go | 81 +++++++++++++++ go.mod | 1 + go.sum | 11 ++ mapfile_test.go | 266 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 359 insertions(+) create mode 100644 mapfile_test.go diff --git a/fscache.go b/fscache.go index fb5b324..a1721ec 100644 --- a/fscache.go +++ b/fscache.go @@ -194,6 +194,52 @@ func (c *FSCache) Get(key string) (r ReadAtCloser, w io.WriteCloser, err error) return r, f, err } +// MapFile maps a file into the cache. This mechanism is useful with a +// StandardFS, because it avoids a superfluous copy operation when the file +// already exists on disk. For other file system implementations, the file +// contents are simply copied into the cache using io.Copy. +// +// When FSCache.Remove or FSCache.Clean are invoked (including indirectly via +// a Haunter), the cache entry for the mapped file is removed, but the file +// itself will not be deleted from disk. +func (c *FSCache) MapFile(name string) error { + c.mu.RLock() + + key := c.mapKey(name) + if _, ok := c.files[key]; ok { + c.mu.RUnlock() + return fmt.Errorf("key %s already exists in cache", key) + } + c.mu.RUnlock() + c.mu.Lock() + if _, ok := c.fs.(*StandardFS); !ok { + // It's not a StandardFS, so we need to copy the file into the cache. + f, err := c.newFile(key) + if err != nil { + c.mu.Unlock() + return err + } + + // The copy operation could take a while, we don't want to be locked. + // Not entirely clear if this is the correct thing to do. + c.mu.Unlock() + if err = copyFileTo(f, name); err != nil { + return err + } + + c.mu.RLock() + c.files[key] = f + c.mu.RUnlock() + return nil + } + + // It's a StandardFS, so we can just map in the existing file. + f := c.mappedFile(name) + c.files[key] = f + c.mu.Unlock() + return nil +} + // Remove removes the specified key from the cache. func (c *FSCache) Remove(key string) error { c.mu.Lock() @@ -290,6 +336,25 @@ func (f *reloadedFile) next() (*CacheReader, error) { }, err } +type mappedFile struct { + reloadedFile +} + +// remove is a no-op, because we don't want the mapped +// file to ever be deleted by fscache. +func (f *mappedFile) remove() error { + return nil +} + +func (c *FSCache) mappedFile(name string) fileStream { + return &mappedFile{ + reloadedFile: reloadedFile{ + fs: c.fs, + name: name, + }, + } +} + func (f *cachedFile) Name() string { return f.stream.Name() } func (f *cachedFile) remove() error { return f.stream.Remove() } @@ -371,3 +436,19 @@ func (h *handleCounter) InUse() bool { func (h *handleCounter) waitUntilFree() { h.grp.Wait() } + +// copyFileTo copies the file at file path fp into dst. +func copyFileTo(dst io.WriteCloser, fp string) error { + f, err := os.Open(fp) + if err != nil { + _ = dst.Close() + return err + } + defer f.Close() + + if _, err = io.Copy(dst, f); err != nil { + _ = dst.Close() + return err + } + return dst.Close() +} diff --git a/go.mod b/go.mod index f9e287f..4b4b9b6 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.14 require ( github.com/djherbis/atime v1.1.0 github.com/djherbis/stream v1.4.0 + github.com/stretchr/testify v1.4.0 ) diff --git a/go.sum b/go.sum index aa8adc0..9fc2232 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,15 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/djherbis/atime v1.1.0 h1:rgwVbP/5by8BvvjBNrbh64Qz33idKT3pSnMSJsxhi0g= github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE= github.com/djherbis/stream v1.4.0 h1:aVD46WZUiq5kJk55yxJAyw6Kuera6kmC3i2vEQyW/AE= github.com/djherbis/stream v1.4.0/go.mod h1:cqjC1ZRq3FFwkGmUtHwcldbnW8f0Q4YuVsGW1eAFtOk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/mapfile_test.go b/mapfile_test.go new file mode 100644 index 0000000..523a175 --- /dev/null +++ b/mapfile_test.go @@ -0,0 +1,266 @@ +package fscache_test + +import ( + "io" + "io/ioutil" + "net/http" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/djherbis/fscache" +) + +// TestMapFile demonstrates and tests the proposed FSCache.MapFile mechanism. +// The theory is that if a cache entry already exists as a file on disk, +// and the cache is backed by a filesystem, we can avoid the copy operation +// that occurs when the cache is filled, by instead mapping the file into +// the cache. This is a serious win for large files. +// +// Our example scenario is a Getter interface that gets a reader for a URL +// or filepath. +// +// Two implementations of Getter are provided: StdGetter and MapGetter. Both +// implementations are tested against the local filesystem README.md, and +// the README.md hosted on GitHub via https, using fscache.StandardFS, and +// also fscache.NewMemFs. +// +// Both Getter implementations behave the same for the HTTP case. For the +// file case (which is what we're really interested in), StdGetter uses +// the standard cache-filling mechanism, which is to copy the file content +// bytes from disk into the w returned by FSCache.Get. This is the scenario +// that we're trying to address: the goal is to avoid this unnecessary +// copy operation. +// +// Meanwhile, for the file case, MapGetter uses the new FSCache.MapFile +// mechanism, which avoids the copy operation. +func TestMapFile(t *testing.T) { + const iterations = 5 + const readmeHTTP = "https://raw.githubusercontent.com/djherbis/fscache/master/README.md" + readmeFilepath, err := filepath.Abs("README.md") + require.NoError(t, err) + + newDiskFs := func() fscache.FileSystem { + dir, err := ioutil.TempDir("", "") + require.NoError(t, err) + t.Cleanup(func() { _ = os.RemoveAll(dir) }) + fs, err := fscache.NewFs(dir, os.ModePerm) + require.NoError(t, err) + return fs + } + + testCases := []struct { + test string + getterFactory func(t *testing.T, fs fscache.FileSystem) Getter + fsFactory func() fscache.FileSystem + src string + }{ + {"map_diskfs_http", NewMapGetter, newDiskFs, readmeHTTP}, + {"map_diskfs_file", NewMapGetter, newDiskFs, readmeFilepath}, + {"map_memfs_http", NewMapGetter, fscache.NewMemFs, readmeHTTP}, + {"map_memfs_file", NewMapGetter, fscache.NewMemFs, readmeFilepath}, + {"std_diskfs_http", NewStdGetter, newDiskFs, readmeHTTP}, + {"std_diskfs_file", NewStdGetter, newDiskFs, readmeFilepath}, + {"std_memfs_http", NewStdGetter, fscache.NewMemFs, readmeHTTP}, + {"std_memfs_file", NewStdGetter, fscache.NewMemFs, readmeFilepath}, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.test, func(t *testing.T) { + g := tc.getterFactory(t, tc.fsFactory()) + for i := 0; i < iterations; i++ { + rc, err := g.Get(tc.src) + require.NoError(t, err) + require.NotNil(t, rc) + b, err := ioutil.ReadAll(rc) + assert.NoError(t, rc.Close()) + require.NoError(t, err) + require.Contains(t, string(b), "Streaming File Cache for #golang") + } + + // Make sure that calling FSCache.Remove doesn't actually + // delete the file from disk. + if tc.src == readmeFilepath { + if mg, ok := g.(*MapGetter); ok { + err = mg.fc.Remove(tc.src) + require.NoError(t, err) + fi, err := os.Stat(tc.src) + require.NoError(t, err) + require.Equal(t, filepath.Base(tc.src), fi.Name()) + } + } + }) + } +} + +// Getter gets a reader for a URL or filepath. +type Getter interface { + Get(urlOrFilepath string) (io.ReadCloser, error) +} + +// NewStdGetter is a factory function for StdGetter. +func NewStdGetter(t *testing.T, fs fscache.FileSystem) Getter { + g := &StdGetter{logf: t.Logf} + var err error + g.fc, err = fscache.NewCache(fs, nil) + require.NoError(t, err) + return g +} + +var _ Getter = (*StdGetter)(nil) + +// StdGetter is a getter that uses the standard cache-filling mechanism. +type StdGetter struct { + fc *fscache.FSCache + logf func(format string, args ...interface{}) +} + +// Get implements Getter. +func (g *StdGetter) Get(urlOrFilepath string) (io.ReadCloser, error) { + if strings.HasPrefix(urlOrFilepath, "http://") || + strings.HasPrefix(urlOrFilepath, "https://") { + + r, w, err := g.fc.Get(urlOrFilepath) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", urlOrFilepath) + return r, nil + } + + g.logf("Cache miss: %s", urlOrFilepath) + + if err = httpGet(urlOrFilepath, w); err != nil { + return nil, err + } + + g.logf("Fetched: %s", urlOrFilepath) + return r, nil + } + + // Thus, urlOrFilepath must be a filepath. + fp := urlOrFilepath + r, w, err := g.fc.Get(fp) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", fp) + return r, nil + } + + g.logf("Cache miss: %s", fp) + + f, err := os.Open(fp) + if err != nil { + return nil, err + } + defer f.Close() + + // We copy the contents of f to w and thus into the cache. + // But, for our use-case, this is useless work. + // We're already using a filesystem FSCache, so we're just + // copying the file from disk to memory and back to disk. + // Boo! + var n int64 + if n, err = io.Copy(w, f); err != nil { + return nil, err + } + + if err = w.Close(); err != nil { + return nil, err + } + g.logf("EXPENSIVE: Copied %d bytes from %s to cache", n, fp) + return r, nil +} + +// NewMapGetter is a factory function for MapGetter. +func NewMapGetter(t *testing.T, fs fscache.FileSystem) Getter { + g := &MapGetter{logf: t.Logf} + var err error + g.fc, err = fscache.NewCache(fs, nil) + require.NoError(t, err) + return g +} + +var _ Getter = (*MapGetter)(nil) + +// MapGetter is a Getter that uses the new FSCache.MapFile mechanism to +// map existing files into the cache. +type MapGetter struct { + fc *fscache.FSCache + logf func(format string, args ...interface{}) +} + +// Get implements Getter. +func (g *MapGetter) Get(urlOrFilepath string) (io.ReadCloser, error) { + if strings.HasPrefix(urlOrFilepath, "http://") || + strings.HasPrefix(urlOrFilepath, "https://") { + + r, w, err := g.fc.Get(urlOrFilepath) + if err != nil { + return nil, err + } + + if w == nil { + g.logf("Cache hit: %s", urlOrFilepath) + return r, nil + } + + g.logf("Cache miss: %s", urlOrFilepath) + + if err = httpGet(urlOrFilepath, w); err != nil { + return nil, err + } + + g.logf("Fetched: %s", urlOrFilepath) + return r, nil + } + + // Thus, urlOrFilepath must be a filepath. + fp := urlOrFilepath + if g.fc.Exists(fp) { + g.logf("Cache hit: %s", fp) + r, _, err := g.fc.Get(fp) + return r, err + } + + g.logf("Cache miss: %s", fp) + g.logf("Mapping file into cache: %s", fp) + + if err := g.fc.MapFile(fp); err != nil { + return nil, err + } + + r, _, err := g.fc.Get(fp) + if err != nil { + return nil, err + } + + return r, nil +} + +// httpGet writes the contents at URL u to w (which +// is always closed). +func httpGet(u string, w io.WriteCloser) error { + resp, err := http.Get(u) + if err != nil { + return err + } + defer resp.Body.Close() + + if _, err = io.Copy(w, resp.Body); err != nil { + _ = w.Close() + return err + } + + return w.Close() +}