diff --git a/build/wd_compile_cache.bzl b/build/wd_compile_cache.bzl new file mode 100644 index 00000000000..def35423218 --- /dev/null +++ b/build/wd_compile_cache.bzl @@ -0,0 +1,62 @@ +""" +Bazel rules for generating compilation caches from a list of input files. +""" + +def compile_cache_name(file_path): + return file_path + "_cache" + +def _gen_compile_cache_impl(ctx): + file_list = ctx.actions.declare_file("in") + + # Get the File objects from the labels + srcs = [] + for src in ctx.attr.srcs: + srcs.extend(src.files.to_list()) + + outs = [compile_cache_name(src.basename) for src in srcs] + outs = [ctx.actions.declare_file(compile_cache_name(src.basename)) for src in srcs] + + content = [] + for i in range(0, len(srcs)): + content.append("{} {}".format(srcs[i].path, outs[i].path)) + + ctx.actions.write( + output = file_list, + content = "\n".join(content) + "\n", + ) + + args = ctx.actions.args() + args.add(file_list) + + ctx.actions.run_shell( + outputs = outs, + inputs = [file_list] + srcs, + command = ctx.executable._tool.path + " $@", + arguments = [args], + use_default_shell_env = True, + tools = [ctx.executable._tool], + ) + + return [ + DefaultInfo(files = depset(direct = outs)), + ] + +gen_compile_cache = rule( + implementation = _gen_compile_cache_impl, + attrs = { + "srcs": attr.label_list(mandatory = True, allow_files = True), + "_tool": attr.label( + executable = True, + allow_single_file = True, + cfg = "target", + default = "//src/workerd/tools:create_compile_cache", + ), + }, +) + +def wd_compile_cache(name, srcs): + gen_compile_cache( + name = name, + srcs = srcs, + visibility = ["//visibility:public"], + ) diff --git a/build/wd_js_bundle.bzl b/build/wd_js_bundle.bzl index 77d1d862e60..c788508370e 100644 --- a/build/wd_js_bundle.bzl +++ b/build/wd_js_bundle.bzl @@ -1,5 +1,6 @@ load("@bazel_skylib//rules:copy_file.bzl", "copy_file") load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library") +load("@workerd//:build/wd_compile_cache.bzl", "compile_cache_name", "wd_compile_cache") CAPNP_TEMPLATE = """@{schema_id}; @@ -13,7 +14,7 @@ const {const_name} :Modules.Bundle = ( ]); """ -MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {ts_declaration})""" +MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {extras})""" def _to_name(file_name): return file_name.removesuffix(".js") @@ -26,10 +27,30 @@ def _relative_path(file_path, dir_path): fail("file_path need to start with dir_path: " + file_path + " vs " + dir_path) return file_path.removeprefix(dir_path) +def _get_compile_cache(compile_cache, m): + if not compile_cache: + return None + files = m.files.to_list() + + if len(files) != 1: + fail("only single file expected") + + return compile_cache.get(files[0].path) + def _gen_api_bundle_capnpn_impl(ctx): output_dir = ctx.outputs.out.dirname + "/" - def _render_module(name, label, src_type, type): + def _render_module(name, label, src_type, type, compile_cache = None): + ts_declaration = ( + "tsDeclaration = embed \"" + _relative_path( + ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data), + output_dir, + ) + "\", " + ) if name in ctx.attr.declarations else "" + compile_cache = ( + "compileCache = embed \"{}\", ".format(_relative_path(compile_cache, output_dir)) + ) if compile_cache else "" + return MODULE_TEMPLATE.format( name = name, # capnp doesn't allow ".." dir escape, make paths relative. @@ -40,20 +61,21 @@ def _gen_api_bundle_capnpn_impl(ctx): output_dir, ), type = type, - ts_declaration = ( - "tsDeclaration = embed \"" + _relative_path( - ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data), - output_dir, - ) + "\", " - ) if name in ctx.attr.declarations else "", + extras = ts_declaration + compile_cache, ) + compile_cache = {} + if ctx.attr.compile_cache: + locations = ctx.expand_location("$(locations {})".format(ctx.attr.compile_cache.label)).split(" ") + for loc in locations: + compile_cache[loc.removesuffix("_cache")] = loc + modules = [ - _render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin") + _render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin", _get_compile_cache(compile_cache, m)) for m in ctx.attr.builtin_modules ] modules += [ - _render_module(ctx.attr.internal_modules[m], m.label, "src", "internal") + _render_module(ctx.attr.internal_modules[m], m.label, "src", "internal", _get_compile_cache(compile_cache, m)) for m in ctx.attr.internal_modules ] modules += [ @@ -86,10 +108,11 @@ gen_api_bundle_capnpn = rule( "internal_wasm_modules": attr.label_keyed_string_dict(allow_files = True), "internal_data_modules": attr.label_keyed_string_dict(allow_files = True), "internal_json_modules": attr.label_keyed_string_dict(allow_files = True), + "compile_cache": attr.label(), "declarations": attr.string_dict(), "data": attr.label_list(allow_files = True), "const_name": attr.string(mandatory = True), - "deps": attr.label_list(), + "deps": attr.label_list(providers = [DefaultInfo]), }, ) @@ -124,7 +147,9 @@ def wd_js_bundle( internal_data_modules = [], internal_json_modules = [], declarations = [], - deps = []): + deps = [], + data = [], + gen_compile_cache = False): """Generate cc capnp library with js api bundle. NOTE: Due to capnpc embed limitation all modules must be in the same or sub directory of the @@ -190,7 +215,7 @@ def wd_js_bundle( declarations, ) - data = ( + data = data + ( list(builtin_modules_dict) + list(internal_modules_dict) + list(internal_wasm_modules_dict) + @@ -200,6 +225,17 @@ def wd_js_bundle( list(internal_declarations.values()) ) + compile_cache = None + if gen_compile_cache: + srcs = builtin_modules_dict.keys() + internal_modules_dict.keys() + wd_compile_cache( + name = name + "@compile_cache", + srcs = srcs, + ) + compile_cache = name + "@compile_cache" + deps = deps + [compile_cache] + data = data + [compile_cache] + gen_api_bundle_capnpn( name = name + "@gen", out = name + ".capnp", @@ -211,6 +247,7 @@ def wd_js_bundle( internal_data_modules = internal_data_modules_dict, internal_json_modules = internal_json_modules_dict, declarations = builtin_declarations | internal_declarations, + compile_cache = compile_cache, data = data, deps = deps, ) diff --git a/build/wd_ts_bundle.bzl b/build/wd_ts_bundle.bzl index 38fdc788395..ec2ece66a41 100644 --- a/build/wd_ts_bundle.bzl +++ b/build/wd_ts_bundle.bzl @@ -26,7 +26,9 @@ def wd_ts_bundle( internal_json_modules = [], lint = True, deps = [], - js_deps = []): + js_deps = [], + data = [], + gen_compile_cache = False): """Compiles typescript modules and generates api bundle with the result. Args: @@ -77,6 +79,8 @@ def wd_ts_bundle( declarations = declarations, schema_id = schema_id, deps = deps + js_deps, + data = data, + gen_compile_cache = gen_compile_cache, ) if lint: diff --git a/src/node/BUILD.bazel b/src/node/BUILD.bazel index 341c0db8f36..440c167a704 100644 --- a/src/node/BUILD.bazel +++ b/src/node/BUILD.bazel @@ -3,6 +3,7 @@ load("@workerd//:build/wd_ts_bundle.bzl", "wd_ts_bundle") wd_ts_bundle( name = "node", eslintrc_json = "eslint.config.mjs", + gen_compile_cache = True, import_name = "node", internal_modules = glob([ "internal/*.ts", diff --git a/src/workerd/jsg/BUILD.bazel b/src/workerd/jsg/BUILD.bazel index d713c28bd75..3574cb29c6f 100644 --- a/src/workerd/jsg/BUILD.bazel +++ b/src/workerd/jsg/BUILD.bazel @@ -61,6 +61,7 @@ wd_cc_library( ":modules_capnp", ":observer", ":url", + "//src/workerd/tools:compile_cache_capnp", "//src/workerd/util", "//src/workerd/util:sentry", "//src/workerd/util:thread-scopes", diff --git a/src/workerd/jsg/compile-cache.c++ b/src/workerd/jsg/compile-cache.c++ index b6b9be5c797..0bb073feefa 100644 --- a/src/workerd/jsg/compile-cache.c++ +++ b/src/workerd/jsg/compile-cache.c++ @@ -7,25 +7,51 @@ namespace workerd::jsg { // CompileCache::Data -std::unique_ptr CompileCache::Data::AsCachedData() { - return std::make_unique( - data, length, v8::ScriptCompiler::CachedData::BufferNotOwned); +kj::Own CompileCache::Data::AsCachedData() { + return kj::heap( + data.begin(), data.size(), v8::ScriptCompiler::CachedData::BufferNotOwned) + .attach(addRefToThis()); } // CompileCache -void CompileCache::add( - kj::StringPtr key, std::shared_ptr cached) const { - cache.lockExclusive()->upsert(kj::str(key), Data(kj::mv(cached)), [](auto&, auto&&) {}); +void CompileCache::add(kj::StringPtr key, v8::Local script) const { + auto cached = v8::ScriptCompiler::CreateCodeCache(script); + auto data = kj::heapArray(cached->data, cached->length); + cache.lockExclusive()->upsert(kj::str(key), kj::arc(kj::mv(data)), [](auto&, auto&&) {}); + delete cached; } -kj::Maybe CompileCache::find(kj::StringPtr key) const { +kj::Maybe> CompileCache::find(kj::StringPtr key) const { KJ_IF_SOME(value, cache.lockExclusive()->find(key)) { - if (value.data != nullptr) { - return value; - } + return value.addRef(); } return kj::none; } +void CompileCache::serialize(capnp::MessageBuilder& message) const { + auto builder = message.initRoot(); + auto lock = cache.lockShared(); + auto entries = builder.initEntries(lock->size()); + + size_t i = 0; + for (auto& current: *lock) { + auto entry = entries[i]; + entry.setPath(current.key); + entry.setData(current.value->data); + i++; + } +} + +void CompileCache::deserialize(capnp::PackedFdMessageReader& message) const { + auto input = message.getRoot(); + auto lock = cache.lockExclusive(); + for (auto entry: input.getEntries()) { + auto path = entry.getPath(); + auto data = entry.getData(); + auto compiled_cache = kj::heapArray(data.begin(), data.size()); + lock->insert(kj::heapString(path.cStr(), path.size()), kj::arc(kj::mv(compiled_cache))); + } +} + } // namespace workerd::jsg diff --git a/src/workerd/jsg/compile-cache.h b/src/workerd/jsg/compile-cache.h index c8e208d44d4..bc5b8ed8d82 100644 --- a/src/workerd/jsg/compile-cache.h +++ b/src/workerd/jsg/compile-cache.h @@ -6,8 +6,12 @@ #include "jsg.h" #include "setup.h" +#include + #include +#include +#include #include namespace workerd::jsg { @@ -22,28 +26,22 @@ namespace workerd::jsg { // we'd likely need to have find return an atomic refcount or something similar. class CompileCache { public: - class Data { + class Data: public kj::AtomicRefcounted, public kj::EnableAddRefToThis { public: - Data(): data(nullptr), length(0), owningPtr(nullptr) {}; - explicit Data(std::shared_ptr cached_data) - : data(cached_data->data), - length(cached_data->length), - owningPtr(cached_data) {}; + explicit Data(kj::Array cached_data): data(kj::mv(cached_data)) {} // Returns a v8::ScriptCompiler::CachedData corresponding to this // CompileCache::Data. The lifetime of the returned // v8::ScriptCompiler::CachedData must not outlive that of the data. - std::unique_ptr AsCachedData(); - - const uint8_t* data; - size_t length; + kj::Own AsCachedData(); - private: - std::shared_ptr owningPtr; + kj::Array data; }; - void add(kj::StringPtr key, std::shared_ptr cached) const; - kj::Maybe find(kj::StringPtr key) const; + void add(kj::StringPtr key, v8::Local script) const; + kj::Maybe> find(kj::StringPtr key) const; + void serialize(capnp::MessageBuilder& message) const; + void deserialize(capnp::PackedFdMessageReader& message) const; static const CompileCache& get() { static const CompileCache instance; @@ -52,7 +50,7 @@ class CompileCache { private: // The key is the address of the static global that was compiled to produce the CachedData. - kj::MutexGuarded> cache; + kj::MutexGuarded>> cache; }; } // namespace workerd::jsg diff --git a/src/workerd/jsg/modules.c++ b/src/workerd/jsg/modules.c++ index 4107313bb88..db5da5cfa86 100644 --- a/src/workerd/jsg/modules.c++ +++ b/src/workerd/jsg/modules.c++ @@ -393,7 +393,7 @@ v8::Local compileEsmModule(jsg::Lock& js, v8::ScriptOrigin origin(v8StrIntern(js.v8Isolate, name), resourceLineOffset, resourceColumnOffset, resourceIsSharedCrossOrigin, scriptId, {}, resourceIsOpaque, isWasm, isModule); v8::Local contentStr; - v8::ScriptCompiler::CachedData* existingCacheData = nullptr; + kj::Own existingCacheData; auto compileOptions = v8::ScriptCompiler::kNoCompileOptions; const auto& compileCache = CompileCache::get(); @@ -406,20 +406,18 @@ v8::Local compileEsmModule(jsg::Lock& js, // We only enable compile cache for built-in modules for now. KJ_IF_SOME(cached, compileCache.find(name)) { compileOptions = v8::ScriptCompiler::kConsumeCodeCache; - existingCacheData = cached.AsCachedData().release(); + existingCacheData = cached->AsCachedData(); } } else { contentStr = jsg::v8Str(js.v8Isolate, content); } - v8::ScriptCompiler::Source source(contentStr, origin, existingCacheData); + v8::ScriptCompiler::Source source(contentStr, origin, existingCacheData.get()); auto module = jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source, compileOptions)); - if (existingCacheData == nullptr) { - auto cachedData = std::shared_ptr( - v8::ScriptCompiler::CreateCodeCache(module->GetUnboundModuleScript())); - compileCache.add(name, kj::mv(cachedData)); + if (existingCacheData.get() == nullptr) { + compileCache.add(name, module->GetUnboundModuleScript()); } return module; diff --git a/src/workerd/jsg/modules.capnp b/src/workerd/jsg/modules.capnp index 24fd7b948ff..434dc3b232f 100644 --- a/src/workerd/jsg/modules.capnp +++ b/src/workerd/jsg/modules.capnp @@ -23,6 +23,9 @@ struct Module { tsDeclaration @3 :Text; type @2 :ModuleType; + + # Optional compile cache to be used to speed up module loading + compileCache @7 :Data; } diff --git a/src/workerd/server/BUILD.bazel b/src/workerd/server/BUILD.bazel index 1c3c46c7db4..9caa8431ba7 100644 --- a/src/workerd/server/BUILD.bazel +++ b/src/workerd/server/BUILD.bazel @@ -140,6 +140,7 @@ wd_cc_library( hdrs = [ "v8-platform-impl.h", ], + visibility = ["//visibility:public"], deps = [ "//src/workerd/jsg", "@capnp-cpp//src/kj", diff --git a/src/workerd/tools/BUILD.bazel b/src/workerd/tools/BUILD.bazel index 2331e0eeee9..e14f8019b6c 100644 --- a/src/workerd/tools/BUILD.bazel +++ b/src/workerd/tools/BUILD.bazel @@ -1,6 +1,8 @@ load("@bazel_skylib//rules:run_binary.bzl", "run_binary") load("@rules_rust//rust:defs.bzl", "rust_binary") load("//:build/cc_ast_dump.bzl", "cc_ast_dump") +load("//:build/wd_cc_binary.bzl", "wd_cc_binary") +load("//:build/wd_cc_capnp_library.bzl", "wd_cc_capnp_library") # ======================================================================================== # Parameter Name Extractor @@ -26,7 +28,10 @@ cc_ast_dump( "//src/workerd/io", "//src/workerd/jsg", "@capnp-cpp//src/capnp", - ], + ] + select({ + "@platforms//os:windows": [], + "//conditions:default": ["@workerd//src/workerd/util:symbolizer"], + }), ) rust_binary( @@ -71,3 +76,25 @@ run_binary( tool = "param_extractor_bin", visibility = ["//visibility:public"], ) + +wd_cc_capnp_library( + name = "compile_cache_capnp", + srcs = ["compile-cache.capnp"], + data = [], + visibility = ["//visibility:public"], + deps = [], +) + +# This binary is used to generate compile cache for workerd. +# This will later be used by workerd to access compile caches of JS internals. +wd_cc_binary( + name = "create_compile_cache", + srcs = ["create-compile-cache.c++"], + visibility = ["//visibility:public"], + deps = [ + ":compile_cache_capnp", + "//src/workerd/jsg", + "@capnp-cpp//src/kj", + "@workerd-v8//:v8", + ], +) diff --git a/src/workerd/tools/compile-cache.capnp b/src/workerd/tools/compile-cache.capnp new file mode 100644 index 00000000000..b29e9633792 --- /dev/null +++ b/src/workerd/tools/compile-cache.capnp @@ -0,0 +1,13 @@ +@0xd36f904ea8f67738; + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("workerd::tools"); + +struct CompileCache { + entries @0 :List(CompileCacheEntry); + + struct CompileCacheEntry { + path @0 :Text; + data @1 :Data; + } +} diff --git a/src/workerd/tools/create-compile-cache.c++ b/src/workerd/tools/create-compile-cache.c++ new file mode 100644 index 00000000000..0e61c58413e --- /dev/null +++ b/src/workerd/tools/create-compile-cache.c++ @@ -0,0 +1,136 @@ +#include +#include +#include + +#include +#include +#include + +namespace workerd::tools { +namespace { + +constexpr int resourceLineOffset = 0; +constexpr int resourceColumnOffset = 0; +constexpr bool resourceIsSharedCrossOrigin = false; +constexpr int scriptId = -1; +constexpr bool resourceIsOpaque = false; +constexpr bool isWasm = false; +constexpr bool isModule = true; + +struct CompilerCacheContext: public jsg::Object, public jsg::ContextGlobal { + JSG_RESOURCE_TYPE(CompilerCacheContext) {} +}; + +JSG_DECLARE_ISOLATE_TYPE(CompileCacheIsolate, CompilerCacheContext); + +// CompileCacheCreator receives an argument of a text file where each line +// represents the path of the file to create compile caches for. +class CompileCacheCreator { +public: + explicit CompileCacheCreator(kj::ProcessContext& context) + : context(context), + ccIsolate(system, kj::heap(), params) {}; + + kj::MainFunc getMain() { + return kj::MainBuilder( + context, "Process a file list", "This binary processes the specified file list.") + .expectArg("", KJ_BIND_METHOD(*this, setFilePath)) + .callAfterParsing(KJ_BIND_METHOD(*this, run)) + .build(); + } + + void readFiles() { + auto fs = kj::newDiskFilesystem(); + auto& dir = fs->getCurrent(); + auto fileList = dir.openFile(filePath); + auto fileListContent = fileList->mmap(0, fileList->stat().size); + + size_t start = 0; + size_t end = 0; + + while (end < fileListContent.size()) { + while (end < fileListContent.size() && fileListContent[end] != '\n') { + end++; + } + + auto line = kj::str(fileListContent.slice(start, end).asChars()); + if (line.size() > 0) { + auto space = KJ_REQUIRE_NONNULL(line.findFirst(' ')); + auto path = kj::str(line.first(space)); + auto out = kj::str(line.slice(space + 1)); + + auto file = dir.openFile(kj::Path::parse(path)); + auto content = file->mmap(0, file->stat().size); + + file_contents.add(Target{ + .sourcePath = kj::mv(path), + .sourceContent = kj::str(content), + .outputPath = kj::mv(out), + }); + } + + end++; + start = end; + } + } + + kj::MainBuilder::Validity run() { + readFiles(); + + auto options = v8::ScriptCompiler::kNoCompileOptions; + auto fs = kj::newDiskFilesystem(); + auto& dir = fs->getCurrent(); + + ccIsolate.runInLockScope([&](CompileCacheIsolate::Lock& isolateLock) { + JSG_WITHIN_CONTEXT_SCOPE(isolateLock, + isolateLock.newContext().getHandle(isolateLock), + [&](jsg::Lock& js) { + for (auto& target: file_contents) { + v8::ScriptOrigin origin(jsg::v8StrIntern(js.v8Isolate, target.sourcePath), + resourceLineOffset, resourceColumnOffset, resourceIsSharedCrossOrigin, scriptId, {}, + resourceIsOpaque, isWasm, isModule); + + auto contentStr = jsg::newExternalOneByteString(js, target.sourceContent); + auto source = v8::ScriptCompiler::Source(contentStr, origin, nullptr); + auto module = + jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source, options)); + + auto output = dir.openFile(kj::Path::parse(target.outputPath), + kj::WriteMode::CREATE | kj::WriteMode::CREATE_PARENT); + auto codeCache = v8::ScriptCompiler::CreateCodeCache(module->GetUnboundModuleScript()); + output->writeAll(kj::arrayPtr(codeCache->data, codeCache->length)); + delete codeCache; + } + }); + }); + + return true; + } + +private: + kj::ProcessContext& context; + kj::Path filePath{}; + + jsg::V8System system{}; + v8::Isolate::CreateParams params{}; + CompileCacheIsolate ccIsolate; + + kj::MainBuilder::Validity setFilePath(kj::StringPtr path) { + filePath = kj::Path::parse(path); + return true; + } + + struct Target { + kj::String sourcePath; + kj::String sourceContent; + kj::String outputPath; + }; + + // Key is the path of the file, and value is the content. + kj::Vector file_contents{}; +}; + +} // namespace +} // namespace workerd::tools + +KJ_MAIN(workerd::tools::CompileCacheCreator)