-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilecache.go
443 lines (375 loc) · 13.2 KB
/
filecache.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
package datablock
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"sync"
"github.com/sirupsen/logrus"
)
const emptyFileID = ""
// FileCache manages a set of bytes as a cache
type FileCache struct {
size uint64 // Total size of the cache
blob []byte // The cache storage
index map[string]uint64 // Overview of where the data is placed in the cache
hits map[string]uint64 // Keeping track of data popularity
offset uint64 // The current position in the cache storage (end of data)
rw *sync.RWMutex // Used for avoiding data races and other issues
cacheWarningGiven bool // Used to only warn once if the cache is full
compress bool // Enable data compression
maxEntitySize uint64 // Maximum size per entity in cache
compressionSpeed bool // Prioritize faster or better compression?
verbose bool // Verbose mode?
maxGivenDataSize uint64 // Maximum size of uncompressed data to be stored in cache
}
var (
// ErrRemoval is used if a filename that does not exist is attempted to be removed
ErrRemoval = errors.New("can't remove a file ID that does not exist")
// ErrNoData is used if no data is attempted to be stored in the cache
ErrNoData = errors.New("no data")
// ErrAlreadyStored is used if a given filename has already been stored in the cache
ErrAlreadyStored = errors.New("file ID is already stored")
// ErrLargerThanCache is used if the given data is larger than the total cache size
ErrLargerThanCache = errors.New("data is larger than the the total cache size")
// ErrEntityTooLarge is used if a maximum size per entity has been set
ErrEntityTooLarge = errors.New("data is larger than the allowed size")
// ErrGivenDataSizeTooLarge is returned if the uncompressed size of the given data is too large
ErrGivenDataSizeTooLarge = errors.New("size of given data is larger than allowed")
)
// NewFileCache creates a new FileCache struct.
// cacheSize is the total cache size, in bytes.
// compress is for enabling compression of cache data.
// maxEntitySize is for setting a per-file maximum size. (0 to disable)
// compressionSpeed is if speedy compression should be used over compact compression.
// maxGivenDataSize is the maximum amount of bytes that can be given at once. (0 to disable, 1 MiB is recommended)
func NewFileCache(cacheSize uint64, compress bool, maxEntitySize uint64, compressionSpeed bool, maxGivenDataSize uint64) *FileCache {
var cache FileCache
cache.size = cacheSize
cache.blob = make([]byte, cacheSize) // The cache storage
cache.index = make(map[string]uint64)
cache.hits = make(map[string]uint64)
cache.rw = &sync.RWMutex{}
cache.compress = compress
cache.maxEntitySize = maxEntitySize // Maximum size of data when stored in the cache (possibly compressed). (0 to disable)
cache.compressionSpeed = compressionSpeed // Prioritize compression speed over better compression? set in datablock.go
cache.maxGivenDataSize = maxGivenDataSize // Maximum size of given data to be stored in cache (0 to disable)
return &cache
}
// Normalize the filename
func (cache *FileCache) normalize(filename string) string {
return normalize(filename)
}
// Remove bytes from the cache blob
// i is the position, n is the number of bytes to remove
func (cache *FileCache) removeBytes(i, n uint64) {
cache.blob = append(cache.blob[:i], cache.blob[i+n:]...)
// Extend to the original capacity after removing bytes
cache.blob = cache.blob[:cap(cache.blob)]
}
// IsEmpty checks if the cache is empty
func (cache *FileCache) IsEmpty() bool {
return 0 == len(cache.index)
}
// Shuffle all indices that refers to position after removedpos, offset to the left
// Also moves the end of the data offset to the left
func (cache *FileCache) shuffleIndicesLeft(removedpos, offset uint64) {
for id, pos := range cache.index {
if pos > removedpos {
cache.index[id] -= offset
}
}
}
// Remove a data index
func (cache *FileCache) removeIndex(id string) {
delete(cache.index, id)
}
// Remove data from the cache and shuffle the rest of the data to the left
// Also adjusts all index pointers to indexes larger than the current position
// Also adjusts the cache.offset
func (cache *FileCache) remove(id string) error {
if !cache.hasFile(id) {
return ErrRemoval
}
// Find the position and size of the given id
pos := cache.index[id]
size := cache.dataSize(id)
cache.removeIndex(id)
cache.shuffleIndicesLeft(pos, size)
cache.removeBytes(pos, size)
cache.offset -= uint64(size)
return nil
}
// Find the data with the least hits, that is currently in the cache
func (cache *FileCache) leastPopular() (string, error) {
// If there is no data, return an error
if len(cache.index) == 0 {
return emptyFileID, ErrNoData
}
// Loop through all the data and return the first with no cache hits
for id := range cache.index {
// If there are no cache hits, return this id
foundHit := false
for hitID := range cache.hits {
if hitID == id {
foundHit = true
break
}
}
if !foundHit {
return id, nil
}
}
var (
leastHits uint64
leastHitsID string
firstRound = true
)
// Loop through all the data and find the one with the least cache hits
for id := range cache.index {
// If there is a cache hit, check if it is the first round
if firstRound {
// First candidate
leastHits = cache.hits[id]
leastHitsID = id
firstRound = false
continue
}
// Not the first round, compare with the least popular ID so far
if cache.hits[id] < leastHits {
// Found a less popular ID
leastHits = cache.hits[id]
leastHitsID = id
}
}
// Return the one with the fewest cache hits.
return leastHitsID, nil
}
// Store a file in the cache
// Returns the data (it may be compressed) and an error
func (cache *FileCache) storeData(filename string, data []byte) (storedDataBlock *DataBlock, err error) {
// Check if the given data is too large before attempting to compress it
if cache.maxGivenDataSize != 0 && uint64(len(data)) > cache.maxGivenDataSize {
return nil, ErrGivenDataSizeTooLarge
}
// Compress the data, if compression is enabled
var fileSize uint64
if cache.compress {
compressedData, dataLength, err := compress(data, cache.compressionSpeed)
if err != nil {
return nil, fmt.Errorf("Compression error: %s", err)
}
data = compressedData
fileSize = uint64(dataLength)
} else {
fileSize = uint64(len(data))
}
id := cache.normalize(filename)
if cache.hasFile(id) {
return nil, ErrAlreadyStored
}
if fileSize > cache.size {
return nil, ErrLargerThanCache
}
if cache.maxEntitySize != 0 && fileSize > cache.maxEntitySize {
return nil, ErrEntityTooLarge
}
// Warn once that the cache is now full
if !cache.cacheWarningGiven && fileSize > cache.freeSpace() {
logrus.Warn("Cache is full. You may want to increase the cache size.")
cache.cacheWarningGiven = true
}
// While there is not enough space, remove the least popular data
for fileSize > cache.freeSpace() {
// Find the least popular data
removeID, err := cache.leastPopular()
if err != nil {
return nil, err
}
// Remove it
if cache.verbose {
logrus.Info(fmt.Sprintf("Removing the unpopular %v from the cache (%d bytes)", removeID, cache.dataSize(removeID)))
}
spaceBefore := cache.freeSpace()
err = cache.remove(removeID)
if err != nil {
return nil, err
}
spaceAfter := cache.freeSpace()
// Panic if there is no more free cache space after removing data
if spaceBefore == spaceAfter {
panic(fmt.Sprintf("Removed %v, but the free space is the same! Still %d bytes.", removeID, spaceAfter))
}
}
if cache.verbose {
logrus.Info(fmt.Sprintf("Storing in cache: %v", id))
}
// Register the position in the data index
cache.index[id] = cache.offset
// Copy the contents to the cache
var i uint64
for i = 0; i < fileSize; i++ {
cache.blob[cache.offset+i] = data[i]
}
// Move the offset to the end of the data (the next free location)
cache.offset += uint64(fileSize)
return newDataBlockSpecified(data, cache.compress, cache.compressionSpeed), nil
}
// Check if the given filename exists in the cache
func (cache *FileCache) hasFile(id string) bool {
for key := range cache.index {
if key == id {
return true
}
}
return false
}
// Find the next start of a data block, given a position
// Returns a position and true if a next position was found
func (cache *FileCache) nextData(startpos uint64) (uint64, bool) {
// Find the end of the data (larger than startpos, but smallest diff)
datasize := cache.size - startpos // Largest possible data size
endpos := startpos // Initial end of data
found := false
for _, pos := range cache.index {
if pos > startpos && (pos-startpos) < datasize {
datasize = (pos - startpos)
endpos = pos
found = true
}
}
// endpos is the next start of a data block
return endpos, found
}
// Find the size of a cached data block. id must exist.
func (cache *FileCache) dataSize(id string) uint64 {
startpos := cache.index[id]
// Find the next data block
endpos, foundNext := cache.nextData(startpos)
// Use the end of data as the end position, if no next position is found
if !foundNext {
endpos = cache.offset
}
return endpos - startpos
}
// Store a file in the cache
// Return true if we got the data from the file, regardless of cache errors.
func (cache *FileCache) storeFile(filename string) (*DataBlock, bool, error) {
// Read the file
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, false, err
}
// Store in cache, log a warning if the cache has filled up and needs to make space every time
_, err = cache.storeData(filename, data)
// Return the uncompressed data
return NewDataBlock(data, cache.compressionSpeed), true, err
}
// Retrieve a file from the cache, or from disk
func (cache *FileCache) fetchAndCache(filename string) (*DataBlock, error) {
// RWMutex locks
cache.rw.Lock()
defer cache.rw.Unlock()
id := cache.normalize(filename)
// Check if the file needs to be read from disk
fileCached := cache.hasFile(id)
if !fileCached {
if cache.verbose {
logrus.Info("Reading from disk: " + string(id))
logrus.Info("Storing in cache: " + string(id))
}
block, gotTheData, err := cache.storeFile(string(id))
// Cache errors are logged as warnings, and not being returned
if err != nil {
// Log cache errors as warnings (could be that the file is too large)
if cache.verbose {
logrus.Warn(err)
}
}
// Only return an error here if we were not able to read the file from disk
if !gotTheData {
return nil, err
}
// Return the data, with no errors to report
return block, nil
}
if cache.verbose {
logrus.Info("Retrieving from cache: " + string(id))
}
// Find the start of the data
startpos := cache.index[id]
// Find the size of the data
size := cache.dataSize(id)
// Copy the data from the cache
data := make([]byte, size)
var i uint64
for i = 0; i < size; i++ {
data[i] = cache.blob[startpos+i]
}
// Mark a cache hit
cache.hits[id]++
// Return the data block
return newDataBlockSpecified(data, cache.compress, cache.compressionSpeed), nil
}
func (cache *FileCache) freeSpace() uint64 {
return cache.size - cache.offset
}
// Stats returns formatted cache statistics
func (cache *FileCache) Stats() string {
cache.rw.Lock()
defer cache.rw.Unlock()
var buf bytes.Buffer
buf.WriteString("Cache information:\n")
buf.WriteString(fmt.Sprintf("\tCompression:\t%s\n", map[bool]string{true: "enabled", false: "disabled"}[cache.compress]))
buf.WriteString(fmt.Sprintf("\tTotal cache:\t%d bytes\n", cache.size))
buf.WriteString(fmt.Sprintf("\tFree cache:\t%d bytes\n", cache.freeSpace()))
buf.WriteString(fmt.Sprintf("\tEnd of data:\tat %d\n", cache.offset))
if len(cache.index) > 0 {
buf.WriteString("\tData in cache:\n")
for id, pos := range cache.index {
buf.WriteString(fmt.Sprintf("\t\tid=%v\tpos=%d\tsize=%d\n", id, pos, cache.dataSize(id)))
}
}
var totalHits uint64
if len(cache.hits) > 0 {
buf.WriteString("\tCache hits:\n")
for id, hits := range cache.hits {
buf.WriteString(fmt.Sprintf("\t\tid=%v\thits=%d\n", id, hits))
totalHits += hits
}
buf.WriteString(fmt.Sprintf("\tTotal cache hits:\t%d", totalHits))
}
return buf.String()
}
// Clear the entire cache
func (cache *FileCache) Clear() {
cache.rw.Lock()
defer cache.rw.Unlock()
cache.offset = 0
cache.index = make(map[string]uint64)
cache.hits = make(map[string]uint64)
// No need to clear the actual bytes, unless perhaps if there should be
// changes to the caching algorithm in the future.
//cache.blob = make([]byte, cache.size)
// Allow one warning if the cache should fill up
cache.cacheWarningGiven = false
}
// For reading files, with optional caching.
func (cache *FileCache) Read(filename string, cached bool) (*DataBlock, error) {
if cached {
// Read the file from cache (or disk, if not cached)
return cache.fetchAndCache(filename)
}
// Normalize the filename
filename = string(cache.normalize(filename))
if cache.verbose {
logrus.Info("Reading from disk: " + filename)
}
// Read the file
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
// Return the uncompressed data
return NewDataBlock(data, cache.compressionSpeed), nil
}