Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions fscache.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,52 @@ func (c *FSCache) Get(key string) (r ReadAtCloser, w io.WriteCloser, err error)
return r, f, err
}

// MapFile maps a file into the cache. This mechanism is useful with a
// StandardFS, because it avoids a superfluous copy operation when the file
// already exists on disk. For other file system implementations, the file
// contents are simply copied into the cache using io.Copy.
//
// When FSCache.Remove or FSCache.Clean are invoked (including indirectly via
// a Haunter), the cache entry for the mapped file is removed, but the file
// itself will not be deleted from disk.
func (c *FSCache) MapFile(name string) error {
c.mu.RLock()

key := c.mapKey(name)
if _, ok := c.files[key]; ok {
c.mu.RUnlock()
return fmt.Errorf("key %s already exists in cache", key)
}
c.mu.RUnlock()
c.mu.Lock()
if _, ok := c.fs.(*StandardFS); !ok {
// It's not a StandardFS, so we need to copy the file into the cache.
f, err := c.newFile(key)
if err != nil {
c.mu.Unlock()
return err
}

// The copy operation could take a while, we don't want to be locked.
// Not entirely clear if this is the correct thing to do.
c.mu.Unlock()
if err = copyFileTo(f, name); err != nil {
return err
}

c.mu.RLock()
c.files[key] = f
c.mu.RUnlock()
return nil
}

// It's a StandardFS, so we can just map in the existing file.
f := c.mappedFile(name)
c.files[key] = f
c.mu.Unlock()
return nil
}

// Remove removes the specified key from the cache.
func (c *FSCache) Remove(key string) error {
c.mu.Lock()
Expand Down Expand Up @@ -290,6 +336,25 @@ func (f *reloadedFile) next() (*CacheReader, error) {
}, err
}

type mappedFile struct {
reloadedFile
}

// remove is a no-op, because we don't want the mapped
// file to ever be deleted by fscache.
func (f *mappedFile) remove() error {
return nil
}

func (c *FSCache) mappedFile(name string) fileStream {
return &mappedFile{
reloadedFile: reloadedFile{
fs: c.fs,
name: name,
},
}
}

func (f *cachedFile) Name() string { return f.stream.Name() }

func (f *cachedFile) remove() error { return f.stream.Remove() }
Expand Down Expand Up @@ -371,3 +436,19 @@ func (h *handleCounter) InUse() bool {
func (h *handleCounter) waitUntilFree() {
h.grp.Wait()
}

// copyFileTo copies the file at file path fp into dst.
func copyFileTo(dst io.WriteCloser, fp string) error {
f, err := os.Open(fp)
if err != nil {
_ = dst.Close()
return err
}
defer f.Close()

if _, err = io.Copy(dst, f); err != nil {
_ = dst.Close()
return err
}
return dst.Close()
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ go 1.14
require (
github.com/djherbis/atime v1.1.0
github.com/djherbis/stream v1.4.0
github.com/stretchr/testify v1.4.0
)
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/djherbis/atime v1.1.0 h1:rgwVbP/5by8BvvjBNrbh64Qz33idKT3pSnMSJsxhi0g=
github.com/djherbis/atime v1.1.0/go.mod h1:28OF6Y8s3NQWwacXc5eZTsEsiMzp7LF8MbXE+XJPdBE=
github.com/djherbis/stream v1.4.0 h1:aVD46WZUiq5kJk55yxJAyw6Kuera6kmC3i2vEQyW/AE=
github.com/djherbis/stream v1.4.0/go.mod h1:cqjC1ZRq3FFwkGmUtHwcldbnW8f0Q4YuVsGW1eAFtOk=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
266 changes: 266 additions & 0 deletions mapfile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
package fscache_test

import (
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/djherbis/fscache"
)

// TestMapFile demonstrates and tests the proposed FSCache.MapFile mechanism.
// The theory is that if a cache entry already exists as a file on disk,
// and the cache is backed by a filesystem, we can avoid the copy operation
// that occurs when the cache is filled, by instead mapping the file into
// the cache. This is a serious win for large files.
//
// Our example scenario is a Getter interface that gets a reader for a URL
// or filepath.
//
// Two implementations of Getter are provided: StdGetter and MapGetter. Both
// implementations are tested against the local filesystem README.md, and
// the README.md hosted on GitHub via https, using fscache.StandardFS, and
// also fscache.NewMemFs.
//
// Both Getter implementations behave the same for the HTTP case. For the
// file case (which is what we're really interested in), StdGetter uses
// the standard cache-filling mechanism, which is to copy the file content
// bytes from disk into the w returned by FSCache.Get. This is the scenario
// that we're trying to address: the goal is to avoid this unnecessary
// copy operation.
//
// Meanwhile, for the file case, MapGetter uses the new FSCache.MapFile
// mechanism, which avoids the copy operation.
func TestMapFile(t *testing.T) {
const iterations = 5
const readmeHTTP = "https://raw.githubusercontent.com/djherbis/fscache/master/README.md"
readmeFilepath, err := filepath.Abs("README.md")
require.NoError(t, err)

newDiskFs := func() fscache.FileSystem {
dir, err := ioutil.TempDir("", "")
require.NoError(t, err)
t.Cleanup(func() { _ = os.RemoveAll(dir) })
fs, err := fscache.NewFs(dir, os.ModePerm)
require.NoError(t, err)
return fs
}

testCases := []struct {
test string
getterFactory func(t *testing.T, fs fscache.FileSystem) Getter
fsFactory func() fscache.FileSystem
src string
}{
{"map_diskfs_http", NewMapGetter, newDiskFs, readmeHTTP},
{"map_diskfs_file", NewMapGetter, newDiskFs, readmeFilepath},
{"map_memfs_http", NewMapGetter, fscache.NewMemFs, readmeHTTP},
{"map_memfs_file", NewMapGetter, fscache.NewMemFs, readmeFilepath},
{"std_diskfs_http", NewStdGetter, newDiskFs, readmeHTTP},
{"std_diskfs_file", NewStdGetter, newDiskFs, readmeFilepath},
{"std_memfs_http", NewStdGetter, fscache.NewMemFs, readmeHTTP},
{"std_memfs_file", NewStdGetter, fscache.NewMemFs, readmeFilepath},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.test, func(t *testing.T) {
g := tc.getterFactory(t, tc.fsFactory())
for i := 0; i < iterations; i++ {
rc, err := g.Get(tc.src)
require.NoError(t, err)
require.NotNil(t, rc)
b, err := ioutil.ReadAll(rc)
assert.NoError(t, rc.Close())
require.NoError(t, err)
require.Contains(t, string(b), "Streaming File Cache for #golang")
}

// Make sure that calling FSCache.Remove doesn't actually
// delete the file from disk.
if tc.src == readmeFilepath {
if mg, ok := g.(*MapGetter); ok {
err = mg.fc.Remove(tc.src)
require.NoError(t, err)
fi, err := os.Stat(tc.src)
require.NoError(t, err)
require.Equal(t, filepath.Base(tc.src), fi.Name())
}
}
})
}
}

// Getter gets a reader for a URL or filepath.
type Getter interface {
Get(urlOrFilepath string) (io.ReadCloser, error)
}

// NewStdGetter is a factory function for StdGetter.
func NewStdGetter(t *testing.T, fs fscache.FileSystem) Getter {
g := &StdGetter{logf: t.Logf}
var err error
g.fc, err = fscache.NewCache(fs, nil)
require.NoError(t, err)
return g
}

var _ Getter = (*StdGetter)(nil)

// StdGetter is a getter that uses the standard cache-filling mechanism.
type StdGetter struct {
fc *fscache.FSCache
logf func(format string, args ...interface{})
}

// Get implements Getter.
func (g *StdGetter) Get(urlOrFilepath string) (io.ReadCloser, error) {
if strings.HasPrefix(urlOrFilepath, "http://") ||
strings.HasPrefix(urlOrFilepath, "https://") {

r, w, err := g.fc.Get(urlOrFilepath)
if err != nil {
return nil, err
}

if w == nil {
g.logf("Cache hit: %s", urlOrFilepath)
return r, nil
}

g.logf("Cache miss: %s", urlOrFilepath)

if err = httpGet(urlOrFilepath, w); err != nil {
return nil, err
}

g.logf("Fetched: %s", urlOrFilepath)
return r, nil
}

// Thus, urlOrFilepath must be a filepath.
fp := urlOrFilepath
r, w, err := g.fc.Get(fp)
if err != nil {
return nil, err
}

if w == nil {
g.logf("Cache hit: %s", fp)
return r, nil
}

g.logf("Cache miss: %s", fp)

f, err := os.Open(fp)
if err != nil {
return nil, err
}
defer f.Close()

// We copy the contents of f to w and thus into the cache.
// But, for our use-case, this is useless work.
// We're already using a filesystem FSCache, so we're just
// copying the file from disk to memory and back to disk.
// Boo!
var n int64
if n, err = io.Copy(w, f); err != nil {
return nil, err
}

if err = w.Close(); err != nil {
return nil, err
}
g.logf("EXPENSIVE: Copied %d bytes from %s to cache", n, fp)
return r, nil
}

// NewMapGetter is a factory function for MapGetter.
func NewMapGetter(t *testing.T, fs fscache.FileSystem) Getter {
g := &MapGetter{logf: t.Logf}
var err error
g.fc, err = fscache.NewCache(fs, nil)
require.NoError(t, err)
return g
}

var _ Getter = (*MapGetter)(nil)

// MapGetter is a Getter that uses the new FSCache.MapFile mechanism to
// map existing files into the cache.
type MapGetter struct {
fc *fscache.FSCache
logf func(format string, args ...interface{})
}

// Get implements Getter.
func (g *MapGetter) Get(urlOrFilepath string) (io.ReadCloser, error) {
if strings.HasPrefix(urlOrFilepath, "http://") ||
strings.HasPrefix(urlOrFilepath, "https://") {

r, w, err := g.fc.Get(urlOrFilepath)
if err != nil {
return nil, err
}

if w == nil {
g.logf("Cache hit: %s", urlOrFilepath)
return r, nil
}

g.logf("Cache miss: %s", urlOrFilepath)

if err = httpGet(urlOrFilepath, w); err != nil {
return nil, err
}

g.logf("Fetched: %s", urlOrFilepath)
return r, nil
}

// Thus, urlOrFilepath must be a filepath.
fp := urlOrFilepath
if g.fc.Exists(fp) {
g.logf("Cache hit: %s", fp)
r, _, err := g.fc.Get(fp)
return r, err
}

g.logf("Cache miss: %s", fp)
g.logf("Mapping file into cache: %s", fp)

if err := g.fc.MapFile(fp); err != nil {
return nil, err
}

r, _, err := g.fc.Get(fp)
if err != nil {
return nil, err
}

return r, nil
}

// httpGet writes the contents at URL u to w (which
// is always closed).
func httpGet(u string, w io.WriteCloser) error {
resp, err := http.Get(u)
if err != nil {
return err
}
defer resp.Body.Close()

if _, err = io.Copy(w, resp.Body); err != nil {
_ = w.Close()
return err
}

return w.Close()
}