@@ -58,8 +58,10 @@ struct bs_cache_key {
58
58
uint32_t ruby_revision ;
59
59
uint64_t size ;
60
60
uint64_t mtime ;
61
- uint64_t data_size ; /* not used for equality */
62
- uint8_t pad [24 ];
61
+ uint64_t data_size ; //
62
+ uint64_t digest ;
63
+ uint8_t digest_set ;
64
+ uint8_t pad [15 ];
63
65
} __attribute__((packed ));
64
66
65
67
/*
@@ -73,7 +75,7 @@ struct bs_cache_key {
73
75
STATIC_ASSERT (sizeof (struct bs_cache_key ) == KEY_SIZE );
74
76
75
77
/* Effectively a schema version. Bumping invalidates all previous caches */
76
- static const uint32_t current_version = 4 ;
78
+ static const uint32_t current_version = 5 ;
77
79
78
80
/* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
79
81
* new OS ABI, etc. */
@@ -93,6 +95,7 @@ static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
93
95
static ID instrumentation_method ;
94
96
static VALUE sym_miss ;
95
97
static VALUE sym_stale ;
98
+ static VALUE sym_revalidated ;
96
99
static bool instrumentation_enabled = false;
97
100
static bool readonly = false;
98
101
@@ -104,9 +107,18 @@ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handl
104
107
static VALUE bs_rb_precompile (VALUE self , VALUE cachedir_v , VALUE path_v , VALUE handler );
105
108
106
109
/* Helpers */
110
+ enum cache_status {
111
+ miss ,
112
+ hit ,
113
+ stale ,
114
+ };
107
115
static void bs_cache_path (const char * cachedir , const VALUE path , char (* cache_path )[MAX_CACHEPATH_SIZE ]);
108
116
static int bs_read_key (int fd , struct bs_cache_key * key );
109
- static int cache_key_equal (struct bs_cache_key * k1 , struct bs_cache_key * k2 );
117
+ static enum cache_status cache_key_equal_fast_path (struct bs_cache_key * k1 , struct bs_cache_key * k2 );
118
+ static int cache_key_equal_slow_path (struct bs_cache_key * current_key , struct bs_cache_key * cached_key , const VALUE input_data );
119
+ static int update_cache_key (struct bs_cache_key * current_key , int cache_fd , const char * * errno_provenance );
120
+
121
+ static void bs_cache_key_digest (struct bs_cache_key * key , const VALUE input_data );
110
122
static VALUE bs_fetch (char * path , VALUE path_v , char * cache_path , VALUE handler , VALUE args );
111
123
static VALUE bs_precompile (char * path , VALUE path_v , char * cache_path , VALUE handler );
112
124
static int open_current_file (char * path , struct bs_cache_key * key , const char * * errno_provenance );
@@ -171,6 +183,9 @@ Init_bootsnap(void)
171
183
sym_stale = ID2SYM (rb_intern ("stale" ));
172
184
rb_global_variable (& sym_stale );
173
185
186
+ sym_revalidated = ID2SYM (rb_intern ("revalidated" ));
187
+ rb_global_variable (& sym_revalidated );
188
+
174
189
rb_define_module_function (rb_mBootsnap , "instrumentation_enabled=" , bs_instrumentation_enabled_set , 1 );
175
190
rb_define_module_function (rb_mBootsnap_CompileCache_Native , "readonly=" , bs_readonly_set , 1 );
176
191
rb_define_module_function (rb_mBootsnap_CompileCache_Native , "coverage_running?" , bs_rb_coverage_running , 0 );
@@ -189,6 +204,14 @@ bs_instrumentation_enabled_set(VALUE self, VALUE enabled)
189
204
return enabled ;
190
205
}
191
206
207
+ static inline void
208
+ bs_instrumentation (VALUE event , VALUE path )
209
+ {
210
+ if (RB_UNLIKELY (instrumentation_enabled )) {
211
+ rb_funcall (rb_mBootsnap , instrumentation_method , 2 , event , path );
212
+ }
213
+ }
214
+
192
215
static VALUE
193
216
bs_readonly_set (VALUE self , VALUE enabled )
194
217
{
@@ -294,17 +317,53 @@ bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_C
294
317
* The data_size member is not compared, as it serves more of a "header"
295
318
* function.
296
319
*/
297
- static int
298
- cache_key_equal (struct bs_cache_key * k1 , struct bs_cache_key * k2 )
320
+ static enum cache_status cache_key_equal_fast_path (struct bs_cache_key * k1 ,
321
+ struct bs_cache_key * k2 ) {
322
+ if (k1 -> version == k2 -> version &&
323
+ k1 -> ruby_platform == k2 -> ruby_platform &&
324
+ k1 -> compile_option == k2 -> compile_option &&
325
+ k1 -> ruby_revision == k2 -> ruby_revision && k1 -> size == k2 -> size ) {
326
+ return (k1 -> mtime == k2 -> mtime ) ? hit : stale ;
327
+ }
328
+ return miss ;
329
+ }
330
+
331
+ static int cache_key_equal_slow_path (struct bs_cache_key * current_key ,
332
+ struct bs_cache_key * cached_key ,
333
+ const VALUE input_data )
299
334
{
300
- return (
301
- k1 -> version == k2 -> version &&
302
- k1 -> ruby_platform == k2 -> ruby_platform &&
303
- k1 -> compile_option == k2 -> compile_option &&
304
- k1 -> ruby_revision == k2 -> ruby_revision &&
305
- k1 -> size == k2 -> size &&
306
- k1 -> mtime == k2 -> mtime
307
- );
335
+ bs_cache_key_digest (current_key , input_data );
336
+ return current_key -> digest == cached_key -> digest ;
337
+ }
338
+
339
+ static int update_cache_key (struct bs_cache_key * current_key , int cache_fd , const char * * errno_provenance )
340
+ {
341
+ lseek (cache_fd , 0 , SEEK_SET );
342
+ ssize_t nwrite = write (cache_fd , current_key , KEY_SIZE );
343
+ if (nwrite < 0 ) {
344
+ * errno_provenance = "update_cache_key:write" ;
345
+ return -1 ;
346
+ }
347
+
348
+ #ifdef HAVE_FDATASYNC
349
+ if (fdatasync (cache_fd ) < 0 ) {
350
+ * errno_provenance = "update_cache_key:fdatasync" ;
351
+ return -1 ;
352
+ }
353
+ #endif
354
+
355
+ return 0 ;
356
+ }
357
+
358
+ /*
359
+ * Fills the cache key digest.
360
+ */
361
+ static void bs_cache_key_digest (struct bs_cache_key * key ,
362
+ const VALUE input_data ) {
363
+ if (key -> digest_set )
364
+ return ;
365
+ key -> digest = fnv1a_64 (input_data );
366
+ key -> digest_set = 1 ;
308
367
}
309
368
310
369
/*
@@ -393,6 +452,7 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
393
452
key -> ruby_revision = current_ruby_revision ;
394
453
key -> size = (uint64_t )statbuf .st_size ;
395
454
key -> mtime = (uint64_t )statbuf .st_mtime ;
455
+ key -> digest_set = false;
396
456
397
457
return fd ;
398
458
}
@@ -436,7 +496,12 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
436
496
{
437
497
int fd , res ;
438
498
439
- fd = open (path , O_RDONLY | O_NOATIME );
499
+ if (readonly ) {
500
+ fd = open (path , O_RDONLY | O_NOATIME );
501
+ } else {
502
+ fd = open (path , O_RDWR | O_NOATIME );
503
+ }
504
+
440
505
if (fd < 0 ) {
441
506
* errno_provenance = "bs_fetch:open_cache_file:open" ;
442
507
return CACHE_MISS ;
@@ -681,7 +746,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
681
746
int res , valid_cache = 0 , exception_tag = 0 ;
682
747
const char * errno_provenance = NULL ;
683
748
684
- VALUE input_data ; /* data read from source file, e.g. YAML or ruby source */
749
+ VALUE input_data = Qfalse ; /* data read from source file, e.g. YAML or ruby source */
685
750
VALUE storage_data ; /* compiled data, e.g. msgpack / binary iseq */
686
751
VALUE output_data ; /* return data, e.g. ruby hash or loaded iseq */
687
752
@@ -699,20 +764,43 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
699
764
cache_fd = open_cache_file (cache_path , & cached_key , & errno_provenance );
700
765
if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE ) {
701
766
/* This is ok: valid_cache remains false, we re-populate it. */
702
- if (RB_UNLIKELY (instrumentation_enabled )) {
703
- rb_funcall (rb_mBootsnap , instrumentation_method , 2 , cache_fd == CACHE_MISS ? sym_miss : sym_stale , path_v );
704
- }
767
+ bs_instrumentation (cache_fd == CACHE_MISS ? sym_miss : sym_stale , path_v );
705
768
} else if (cache_fd < 0 ) {
706
769
exception_message = rb_str_new_cstr (cache_path );
707
770
goto fail_errno ;
708
771
} else {
709
772
/* True if the cache existed and no invalidating changes have occurred since
710
773
* it was generated. */
711
- valid_cache = cache_key_equal (& current_key , & cached_key );
712
- if (RB_UNLIKELY (instrumentation_enabled )) {
713
- if (!valid_cache ) {
714
- rb_funcall (rb_mBootsnap , instrumentation_method , 2 , sym_stale , path_v );
774
+
775
+ switch (cache_key_equal_fast_path (& current_key , & cached_key )) {
776
+ case hit :
777
+ valid_cache = true;
778
+ break ;
779
+ case miss :
780
+ valid_cache = false;
781
+ break ;
782
+ case stale :
783
+ valid_cache = false;
784
+ if ((input_data = bs_read_contents (current_fd , current_key .size ,
785
+ & errno_provenance )) == Qfalse ) {
786
+ exception_message = path_v ;
787
+ goto fail_errno ;
788
+ }
789
+ valid_cache = cache_key_equal_slow_path (& current_key , & cached_key , input_data );
790
+ if (valid_cache ) {
791
+ if (!readonly ) {
792
+ if (update_cache_key (& current_key , cache_fd , & errno_provenance )) {
793
+ exception_message = path_v ;
794
+ goto fail_errno ;
795
+ }
796
+ }
797
+ bs_instrumentation (sym_revalidated , path_v );
715
798
}
799
+ break ;
800
+ };
801
+
802
+ if (!valid_cache ) {
803
+ bs_instrumentation (sym_stale , path_v );
716
804
}
717
805
}
718
806
@@ -726,7 +814,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
726
814
else if (res == CACHE_UNCOMPILABLE ) {
727
815
/* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
728
816
This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
729
- if ((input_data = bs_read_contents (current_fd , current_key .size , & errno_provenance )) == Qfalse ){
817
+ if (input_data == Qfalse && (input_data = bs_read_contents (current_fd , current_key .size , & errno_provenance )) == Qfalse ) {
730
818
exception_message = path_v ;
731
819
goto fail_errno ;
732
820
}
@@ -745,7 +833,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
745
833
/* Cache is stale, invalid, or missing. Regenerate and write it out. */
746
834
747
835
/* Read the contents of the source file into a buffer */
748
- if ((input_data = bs_read_contents (current_fd , current_key .size , & errno_provenance )) == Qfalse ){
836
+ if (input_data == Qfalse && (input_data = bs_read_contents (current_fd , current_key .size , & errno_provenance )) == Qfalse ) {
749
837
exception_message = path_v ;
750
838
goto fail_errno ;
751
839
}
@@ -767,6 +855,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
767
855
* We do however ignore any failures to persist the cache, as it's better
768
856
* to move along, than to interrupt the process.
769
857
*/
858
+ bs_cache_key_digest (& current_key , input_data );
770
859
atomic_write_cache_file (cache_path , & current_key , storage_data , & errno_provenance );
771
860
772
861
/* Having written the cache, now convert storage_data to output_data */
@@ -822,12 +911,16 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
822
911
static VALUE
823
912
bs_precompile (char * path , VALUE path_v , char * cache_path , VALUE handler )
824
913
{
914
+ if (readonly ) {
915
+ return Qfalse ;
916
+ }
917
+
825
918
struct bs_cache_key cached_key , current_key ;
826
919
int cache_fd = -1 , current_fd = -1 ;
827
920
int res , valid_cache = 0 , exception_tag = 0 ;
828
921
const char * errno_provenance = NULL ;
829
922
830
- VALUE input_data ; /* data read from source file, e.g. YAML or ruby source */
923
+ VALUE input_data = Qfalse ; /* data read from source file, e.g. YAML or ruby source */
831
924
VALUE storage_data ; /* compiled data, e.g. msgpack / binary iseq */
832
925
833
926
/* Open the source file and generate a cache key for it */
@@ -843,7 +936,28 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
843
936
} else {
844
937
/* True if the cache existed and no invalidating changes have occurred since
845
938
* it was generated. */
846
- valid_cache = cache_key_equal (& current_key , & cached_key );
939
+ switch (cache_key_equal_fast_path (& current_key , & cached_key )) {
940
+ case hit :
941
+ valid_cache = true;
942
+ break ;
943
+ case miss :
944
+ valid_cache = false;
945
+ break ;
946
+ case stale :
947
+ valid_cache = false;
948
+ if ((input_data = bs_read_contents (current_fd , current_key .size , & errno_provenance )) == Qfalse ) {
949
+ goto fail ;
950
+ }
951
+ valid_cache = cache_key_equal_slow_path (& current_key , & cached_key , input_data );
952
+ if (valid_cache ) {
953
+ if (update_cache_key (& current_key , cache_fd , & errno_provenance )) {
954
+ goto fail ;
955
+ }
956
+
957
+ bs_instrumentation (sym_revalidated , path_v );
958
+ }
959
+ break ;
960
+ };
847
961
}
848
962
849
963
if (valid_cache ) {
@@ -870,6 +984,7 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
870
984
if (!RB_TYPE_P (storage_data , T_STRING )) goto fail ;
871
985
872
986
/* Write the cache key and storage_data to the cache directory */
987
+ bs_cache_key_digest (& current_key , input_data );
873
988
res = atomic_write_cache_file (cache_path , & current_key , storage_data , & errno_provenance );
874
989
if (res < 0 ) goto fail ;
875
990
0 commit comments