Skip to content

Commit 4a91add

Browse files
authored
Merge pull request #468 from Shopify/revalidate-mtime
Revalidate cache based on source digest
2 parents c12002f + df6267f commit 4a91add

File tree

7 files changed

+197
-37
lines changed

7 files changed

+197
-37
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Unreleased
22

3+
* Revalidate stale cache entries by digesting the source content.
4+
This should significantly improve performance in environments where `mtime` isn't preserved (e.g. CI systems doing a git clone, etc).
5+
See #468.
36
* Open source files and cache entries with `O_NOATIME` when available to reduce disk accesses. See #469.
47
* `bootsnap precompile --gemfile` now look for `.rb` files in the whole gem and not just the `lib/` directory. See #466.
58

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ Bootsnap cache misses can be monitored though a callback:
9999
Bootsnap.instrumentation = ->(event, path) { puts "#{event} #{path}" }
100100
```
101101

102-
`event` is either `:miss` or `:stale`. You can also call `Bootsnap.log!` as a shortcut to
102+
`event` is either `:miss`, `:stale` or `:revalidated`. You can also call `Bootsnap.log!` as a shortcut to
103103
log all events to STDERR.
104104

105105
To turn instrumentation back off you can set it to nil:

ext/bootsnap/bootsnap.c

Lines changed: 142 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ struct bs_cache_key {
5858
uint32_t ruby_revision;
5959
uint64_t size;
6060
uint64_t mtime;
61-
uint64_t data_size; /* not used for equality */
62-
uint8_t pad[24];
61+
uint64_t data_size; //
62+
uint64_t digest;
63+
uint8_t digest_set;
64+
uint8_t pad[15];
6365
} __attribute__((packed));
6466

6567
/*
@@ -73,7 +75,7 @@ struct bs_cache_key {
7375
STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
7476

7577
/* Effectively a schema version. Bumping invalidates all previous caches */
76-
static const uint32_t current_version = 4;
78+
static const uint32_t current_version = 5;
7779

7880
/* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
7981
* new OS ABI, etc. */
@@ -93,6 +95,7 @@ static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
9395
static ID instrumentation_method;
9496
static VALUE sym_miss;
9597
static VALUE sym_stale;
98+
static VALUE sym_revalidated;
9699
static bool instrumentation_enabled = false;
97100
static bool readonly = false;
98101

@@ -104,9 +107,18 @@ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handl
104107
static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
105108

106109
/* Helpers */
110+
enum cache_status {
111+
miss,
112+
hit,
113+
stale,
114+
};
107115
static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
108116
static int bs_read_key(int fd, struct bs_cache_key * key);
109-
static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2);
117+
static enum cache_status cache_key_equal_fast_path(struct bs_cache_key * k1, struct bs_cache_key * k2);
118+
static int cache_key_equal_slow_path(struct bs_cache_key * current_key, struct bs_cache_key * cached_key, const VALUE input_data);
119+
static int update_cache_key(struct bs_cache_key *current_key, int cache_fd, const char ** errno_provenance);
120+
121+
static void bs_cache_key_digest(struct bs_cache_key * key, const VALUE input_data);
110122
static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args);
111123
static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler);
112124
static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance);
@@ -171,6 +183,9 @@ Init_bootsnap(void)
171183
sym_stale = ID2SYM(rb_intern("stale"));
172184
rb_global_variable(&sym_stale);
173185

186+
sym_revalidated = ID2SYM(rb_intern("revalidated"));
187+
rb_global_variable(&sym_revalidated);
188+
174189
rb_define_module_function(rb_mBootsnap, "instrumentation_enabled=", bs_instrumentation_enabled_set, 1);
175190
rb_define_module_function(rb_mBootsnap_CompileCache_Native, "readonly=", bs_readonly_set, 1);
176191
rb_define_module_function(rb_mBootsnap_CompileCache_Native, "coverage_running?", bs_rb_coverage_running, 0);
@@ -189,6 +204,14 @@ bs_instrumentation_enabled_set(VALUE self, VALUE enabled)
189204
return enabled;
190205
}
191206

207+
static inline void
208+
bs_instrumentation(VALUE event, VALUE path)
209+
{
210+
if (RB_UNLIKELY(instrumentation_enabled)) {
211+
rb_funcall(rb_mBootsnap, instrumentation_method, 2, event, path);
212+
}
213+
}
214+
192215
static VALUE
193216
bs_readonly_set(VALUE self, VALUE enabled)
194217
{
@@ -294,17 +317,53 @@ bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_C
294317
* The data_size member is not compared, as it serves more of a "header"
295318
* function.
296319
*/
297-
static int
298-
cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2)
320+
static enum cache_status cache_key_equal_fast_path(struct bs_cache_key *k1,
321+
struct bs_cache_key *k2) {
322+
if (k1->version == k2->version &&
323+
k1->ruby_platform == k2->ruby_platform &&
324+
k1->compile_option == k2->compile_option &&
325+
k1->ruby_revision == k2->ruby_revision && k1->size == k2->size) {
326+
return (k1->mtime == k2->mtime) ? hit : stale;
327+
}
328+
return miss;
329+
}
330+
331+
static int cache_key_equal_slow_path(struct bs_cache_key *current_key,
332+
struct bs_cache_key *cached_key,
333+
const VALUE input_data)
299334
{
300-
return (
301-
k1->version == k2->version &&
302-
k1->ruby_platform == k2->ruby_platform &&
303-
k1->compile_option == k2->compile_option &&
304-
k1->ruby_revision == k2->ruby_revision &&
305-
k1->size == k2->size &&
306-
k1->mtime == k2->mtime
307-
);
335+
bs_cache_key_digest(current_key, input_data);
336+
return current_key->digest == cached_key->digest;
337+
}
338+
339+
static int update_cache_key(struct bs_cache_key *current_key, int cache_fd, const char ** errno_provenance)
340+
{
341+
lseek(cache_fd, 0, SEEK_SET);
342+
ssize_t nwrite = write(cache_fd, current_key, KEY_SIZE);
343+
if (nwrite < 0) {
344+
*errno_provenance = "update_cache_key:write";
345+
return -1;
346+
}
347+
348+
#ifdef HAVE_FDATASYNC
349+
if (fdatasync(cache_fd) < 0) {
350+
*errno_provenance = "update_cache_key:fdatasync";
351+
return -1;
352+
}
353+
#endif
354+
355+
return 0;
356+
}
357+
358+
/*
359+
* Fills the cache key digest.
360+
*/
361+
static void bs_cache_key_digest(struct bs_cache_key *key,
362+
const VALUE input_data) {
363+
if (key->digest_set)
364+
return;
365+
key->digest = fnv1a_64(input_data);
366+
key->digest_set = 1;
308367
}
309368

310369
/*
@@ -393,6 +452,7 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
393452
key->ruby_revision = current_ruby_revision;
394453
key->size = (uint64_t)statbuf.st_size;
395454
key->mtime = (uint64_t)statbuf.st_mtime;
455+
key->digest_set = false;
396456

397457
return fd;
398458
}
@@ -436,7 +496,12 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
436496
{
437497
int fd, res;
438498

439-
fd = open(path, O_RDONLY | O_NOATIME);
499+
if (readonly) {
500+
fd = open(path, O_RDONLY | O_NOATIME);
501+
} else {
502+
fd = open(path, O_RDWR | O_NOATIME);
503+
}
504+
440505
if (fd < 0) {
441506
*errno_provenance = "bs_fetch:open_cache_file:open";
442507
return CACHE_MISS;
@@ -681,7 +746,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
681746
int res, valid_cache = 0, exception_tag = 0;
682747
const char * errno_provenance = NULL;
683748

684-
VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
749+
VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
685750
VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
686751
VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
687752

@@ -699,20 +764,43 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
699764
cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
700765
if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
701766
/* This is ok: valid_cache remains false, we re-populate it. */
702-
if (RB_UNLIKELY(instrumentation_enabled)) {
703-
rb_funcall(rb_mBootsnap, instrumentation_method, 2, cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v);
704-
}
767+
bs_instrumentation(cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v);
705768
} else if (cache_fd < 0) {
706769
exception_message = rb_str_new_cstr(cache_path);
707770
goto fail_errno;
708771
} else {
709772
/* True if the cache existed and no invalidating changes have occurred since
710773
* it was generated. */
711-
valid_cache = cache_key_equal(&current_key, &cached_key);
712-
if (RB_UNLIKELY(instrumentation_enabled)) {
713-
if (!valid_cache) {
714-
rb_funcall(rb_mBootsnap, instrumentation_method, 2, sym_stale, path_v);
774+
775+
switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
776+
case hit:
777+
valid_cache = true;
778+
break;
779+
case miss:
780+
valid_cache = false;
781+
break;
782+
case stale:
783+
valid_cache = false;
784+
if ((input_data = bs_read_contents(current_fd, current_key.size,
785+
&errno_provenance)) == Qfalse) {
786+
exception_message = path_v;
787+
goto fail_errno;
788+
}
789+
valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
790+
if (valid_cache) {
791+
if (!readonly) {
792+
if (update_cache_key(&current_key, cache_fd, &errno_provenance)) {
793+
exception_message = path_v;
794+
goto fail_errno;
795+
}
796+
}
797+
bs_instrumentation(sym_revalidated, path_v);
715798
}
799+
break;
800+
};
801+
802+
if (!valid_cache) {
803+
bs_instrumentation(sym_stale, path_v);
716804
}
717805
}
718806

@@ -726,7 +814,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
726814
else if (res == CACHE_UNCOMPILABLE) {
727815
/* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
728816
This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
729-
if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse){
817+
if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
730818
exception_message = path_v;
731819
goto fail_errno;
732820
}
@@ -745,7 +833,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
745833
/* Cache is stale, invalid, or missing. Regenerate and write it out. */
746834

747835
/* Read the contents of the source file into a buffer */
748-
if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse){
836+
if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
749837
exception_message = path_v;
750838
goto fail_errno;
751839
}
@@ -767,6 +855,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
767855
* We do however ignore any failures to persist the cache, as it's better
768856
* to move along, than to interrupt the process.
769857
*/
858+
bs_cache_key_digest(&current_key, input_data);
770859
atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
771860

772861
/* Having written the cache, now convert storage_data to output_data */
@@ -822,12 +911,16 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
822911
static VALUE
823912
bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
824913
{
914+
if (readonly) {
915+
return Qfalse;
916+
}
917+
825918
struct bs_cache_key cached_key, current_key;
826919
int cache_fd = -1, current_fd = -1;
827920
int res, valid_cache = 0, exception_tag = 0;
828921
const char * errno_provenance = NULL;
829922

830-
VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
923+
VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
831924
VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
832925

833926
/* Open the source file and generate a cache key for it */
@@ -843,7 +936,28 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
843936
} else {
844937
/* True if the cache existed and no invalidating changes have occurred since
845938
* it was generated. */
846-
valid_cache = cache_key_equal(&current_key, &cached_key);
939+
switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
940+
case hit:
941+
valid_cache = true;
942+
break;
943+
case miss:
944+
valid_cache = false;
945+
break;
946+
case stale:
947+
valid_cache = false;
948+
if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
949+
goto fail;
950+
}
951+
valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
952+
if (valid_cache) {
953+
if (update_cache_key(&current_key, cache_fd, &errno_provenance)) {
954+
goto fail;
955+
}
956+
957+
bs_instrumentation(sym_revalidated, path_v);
958+
}
959+
break;
960+
};
847961
}
848962

849963
if (valid_cache) {
@@ -870,6 +984,7 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
870984
if (!RB_TYPE_P(storage_data, T_STRING)) goto fail;
871985

872986
/* Write the cache key and storage_data to the cache directory */
987+
bs_cache_key_digest(&current_key, input_data);
873988
res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
874989
if (res < 0) goto fail;
875990

ext/bootsnap/extconf.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
require "mkmf"
44

55
if %w[ruby truffleruby].include?(RUBY_ENGINE)
6+
have_func "fdatasync", "fcntl.h"
7+
68
unless RUBY_PLATFORM.match?(/mswin|mingw|cygwin/)
79
append_cppflags ["_GNU_SOURCE"] # Needed of O_NOATIME
810
end

test/compile_cache_key_format_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class CompileCacheKeyFormatTest < Minitest::Test
2222

2323
def test_key_version
2424
key = cache_key_for_file(FILE)
25-
exp = [4].pack("L")
25+
exp = [5].pack("L")
2626
assert_equal(exp, key[R[:version]])
2727
end
2828

0 commit comments

Comments
 (0)