Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create compile caches on compile step #3004

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
62 changes: 62 additions & 0 deletions build/wd_compile_cache.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""
Bazel rules for generating compilation caches from a list of input files.
"""

def compile_cache_name(file_path):
return file_path + "_cache"

def _gen_compile_cache_impl(ctx):
file_list = ctx.actions.declare_file("in")

# Get the File objects from the labels
srcs = []
for src in ctx.attr.srcs:
srcs.extend(src.files.to_list())

outs = [compile_cache_name(src.basename) for src in srcs]
outs = [ctx.actions.declare_file(compile_cache_name(src.basename)) for src in srcs]

content = []
for i in range(0, len(srcs)):
content.append("{} {}".format(srcs[i].path, outs[i].path))

ctx.actions.write(
output = file_list,
content = "\n".join(content) + "\n",
)

args = ctx.actions.args()
args.add(file_list)

ctx.actions.run_shell(
outputs = outs,
inputs = [file_list] + srcs,
command = ctx.executable._tool.path + " $@",
arguments = [args],
use_default_shell_env = True,
tools = [ctx.executable._tool],
)

return [
DefaultInfo(files = depset(direct = outs)),
]

gen_compile_cache = rule(
implementation = _gen_compile_cache_impl,
attrs = {
"srcs": attr.label_list(mandatory = True, allow_files = True),
"_tool": attr.label(
executable = True,
allow_single_file = True,
cfg = "target",
default = "//src/workerd/tools:create_compile_cache",
),
},
)

def wd_compile_cache(name, srcs):
gen_compile_cache(
name = name,
srcs = srcs,
visibility = ["//visibility:public"],
)
63 changes: 50 additions & 13 deletions build/wd_js_bundle.bzl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@capnp-cpp//src/capnp:cc_capnp_library.bzl", "cc_capnp_library")
load("@workerd//:build/wd_compile_cache.bzl", "compile_cache_name", "wd_compile_cache")

CAPNP_TEMPLATE = """@{schema_id};

Expand All @@ -13,7 +14,7 @@ const {const_name} :Modules.Bundle = (
]);
"""

MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {ts_declaration})"""
MODULE_TEMPLATE = """ (name = "{name}", {src_type} = embed "{path}", type = {type}, {extras})"""

def _to_name(file_name):
return file_name.removesuffix(".js")
Expand All @@ -26,10 +27,30 @@ def _relative_path(file_path, dir_path):
fail("file_path need to start with dir_path: " + file_path + " vs " + dir_path)
return file_path.removeprefix(dir_path)

def _get_compile_cache(compile_cache, m):
if not compile_cache:
return None
files = m.files.to_list()

if len(files) != 1:
fail("only single file expected")

return compile_cache.get(files[0].path)

def _gen_api_bundle_capnpn_impl(ctx):
output_dir = ctx.outputs.out.dirname + "/"

def _render_module(name, label, src_type, type):
def _render_module(name, label, src_type, type, compile_cache = None):
ts_declaration = (
"tsDeclaration = embed \"" + _relative_path(
ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data),
output_dir,
) + "\", "
) if name in ctx.attr.declarations else ""
compile_cache = (
"compileCache = embed \"{}\", ".format(_relative_path(compile_cache, output_dir))
) if compile_cache else ""

return MODULE_TEMPLATE.format(
name = name,
# capnp doesn't allow ".." dir escape, make paths relative.
Expand All @@ -40,20 +61,21 @@ def _gen_api_bundle_capnpn_impl(ctx):
output_dir,
),
type = type,
ts_declaration = (
"tsDeclaration = embed \"" + _relative_path(
ctx.expand_location("$(location {})".format(ctx.attr.declarations[name]), ctx.attr.data),
output_dir,
) + "\", "
) if name in ctx.attr.declarations else "",
extras = ts_declaration + compile_cache,
)

compile_cache = {}
if ctx.attr.compile_cache:
locations = ctx.expand_location("$(locations {})".format(ctx.attr.compile_cache.label)).split(" ")
for loc in locations:
compile_cache[loc.removesuffix("_cache")] = loc

modules = [
_render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin")
_render_module(ctx.attr.builtin_modules[m], m.label, "src", "builtin", _get_compile_cache(compile_cache, m))
for m in ctx.attr.builtin_modules
]
modules += [
_render_module(ctx.attr.internal_modules[m], m.label, "src", "internal")
_render_module(ctx.attr.internal_modules[m], m.label, "src", "internal", _get_compile_cache(compile_cache, m))
for m in ctx.attr.internal_modules
]
modules += [
Expand Down Expand Up @@ -86,10 +108,11 @@ gen_api_bundle_capnpn = rule(
"internal_wasm_modules": attr.label_keyed_string_dict(allow_files = True),
"internal_data_modules": attr.label_keyed_string_dict(allow_files = True),
"internal_json_modules": attr.label_keyed_string_dict(allow_files = True),
"compile_cache": attr.label(),
"declarations": attr.string_dict(),
"data": attr.label_list(allow_files = True),
"const_name": attr.string(mandatory = True),
"deps": attr.label_list(),
"deps": attr.label_list(providers = [DefaultInfo]),
},
)

Expand Down Expand Up @@ -124,7 +147,9 @@ def wd_js_bundle(
internal_data_modules = [],
internal_json_modules = [],
declarations = [],
deps = []):
deps = [],
data = [],
gen_compile_cache = False):
"""Generate cc capnp library with js api bundle.

NOTE: Due to capnpc embed limitation all modules must be in the same or sub directory of the
Expand Down Expand Up @@ -190,7 +215,7 @@ def wd_js_bundle(
declarations,
)

data = (
data = data + (
list(builtin_modules_dict) +
list(internal_modules_dict) +
list(internal_wasm_modules_dict) +
Expand All @@ -200,6 +225,17 @@ def wd_js_bundle(
list(internal_declarations.values())
)

compile_cache = None
if gen_compile_cache:
srcs = builtin_modules_dict.keys() + internal_modules_dict.keys()
wd_compile_cache(
name = name + "@compile_cache",
srcs = srcs,
)
compile_cache = name + "@compile_cache"
deps = deps + [compile_cache]
data = data + [compile_cache]

gen_api_bundle_capnpn(
name = name + "@gen",
out = name + ".capnp",
Expand All @@ -211,6 +247,7 @@ def wd_js_bundle(
internal_data_modules = internal_data_modules_dict,
internal_json_modules = internal_json_modules_dict,
declarations = builtin_declarations | internal_declarations,
compile_cache = compile_cache,
data = data,
deps = deps,
)
Expand Down
6 changes: 5 additions & 1 deletion build/wd_ts_bundle.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def wd_ts_bundle(
internal_json_modules = [],
lint = True,
deps = [],
js_deps = []):
js_deps = [],
data = [],
gen_compile_cache = False):
"""Compiles typescript modules and generates api bundle with the result.

Args:
Expand Down Expand Up @@ -77,6 +79,8 @@ def wd_ts_bundle(
declarations = declarations,
schema_id = schema_id,
deps = deps + js_deps,
data = data,
gen_compile_cache = gen_compile_cache,
)

if lint:
Expand Down
1 change: 1 addition & 0 deletions src/node/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ load("@workerd//:build/wd_ts_bundle.bzl", "wd_ts_bundle")
wd_ts_bundle(
name = "node",
eslintrc_json = "eslint.config.mjs",
gen_compile_cache = True,
import_name = "node",
internal_modules = glob([
"internal/*.ts",
Expand Down
1 change: 1 addition & 0 deletions src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ wd_cc_library(
":modules_capnp",
":observer",
":url",
"//src/workerd/tools:compile_cache_capnp",
"//src/workerd/util",
"//src/workerd/util:sentry",
"//src/workerd/util:thread-scopes",
Expand Down
46 changes: 36 additions & 10 deletions src/workerd/jsg/compile-cache.c++
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,51 @@ namespace workerd::jsg {

// CompileCache::Data

std::unique_ptr<v8::ScriptCompiler::CachedData> CompileCache::Data::AsCachedData() {
return std::make_unique<v8::ScriptCompiler::CachedData>(
data, length, v8::ScriptCompiler::CachedData::BufferNotOwned);
kj::Own<v8::ScriptCompiler::CachedData> CompileCache::Data::AsCachedData() {
return kj::heap<v8::ScriptCompiler::CachedData>(
data.begin(), data.size(), v8::ScriptCompiler::CachedData::BufferNotOwned)
.attach(addRefToThis());
}

// CompileCache

void CompileCache::add(
kj::StringPtr key, std::shared_ptr<v8::ScriptCompiler::CachedData> cached) const {
cache.lockExclusive()->upsert(kj::str(key), Data(kj::mv(cached)), [](auto&, auto&&) {});
void CompileCache::add(kj::StringPtr key, v8::Local<v8::UnboundModuleScript> script) const {
auto cached = v8::ScriptCompiler::CreateCodeCache(script);
auto data = kj::heapArray<kj::byte>(cached->data, cached->length);
cache.lockExclusive()->upsert(kj::str(key), kj::arc<Data>(kj::mv(data)), [](auto&, auto&&) {});
delete cached;
}

kj::Maybe<CompileCache::Data&> CompileCache::find(kj::StringPtr key) const {
kj::Maybe<kj::Arc<CompileCache::Data>> CompileCache::find(kj::StringPtr key) const {
KJ_IF_SOME(value, cache.lockExclusive()->find(key)) {
if (value.data != nullptr) {
return value;
}
return value.addRef();
}
return kj::none;
}

void CompileCache::serialize(capnp::MessageBuilder& message) const {
auto builder = message.initRoot<workerd::tools::CompileCache>();
auto lock = cache.lockShared();
auto entries = builder.initEntries(lock->size());

size_t i = 0;
for (auto& current: *lock) {
auto entry = entries[i];
entry.setPath(current.key);
entry.setData(current.value->data);
i++;
}
}

void CompileCache::deserialize(capnp::PackedFdMessageReader& message) const {
auto input = message.getRoot<workerd::tools::CompileCache>();
auto lock = cache.lockExclusive();
for (auto entry: input.getEntries()) {
auto path = entry.getPath();
auto data = entry.getData();
auto compiled_cache = kj::heapArray<kj::byte>(data.begin(), data.size());
lock->insert(kj::heapString(path.cStr(), path.size()), kj::arc<Data>(kj::mv(compiled_cache)));
}
}

} // namespace workerd::jsg
28 changes: 13 additions & 15 deletions src/workerd/jsg/compile-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
#include "jsg.h"
#include "setup.h"

#include <workerd/tools/compile-cache.capnp.h>

#include <v8.h>

#include <capnp/message.h>
#include <capnp/serialize-packed.h>
#include <kj/string.h>

namespace workerd::jsg {
Expand All @@ -22,28 +26,22 @@ namespace workerd::jsg {
// we'd likely need to have find return an atomic refcount or something similar.
class CompileCache {
public:
class Data {
class Data: public kj::AtomicRefcounted, public kj::EnableAddRefToThis<Data> {
public:
Data(): data(nullptr), length(0), owningPtr(nullptr) {};
explicit Data(std::shared_ptr<v8::ScriptCompiler::CachedData> cached_data)
: data(cached_data->data),
length(cached_data->length),
owningPtr(cached_data) {};
explicit Data(kj::Array<kj::byte> cached_data): data(kj::mv(cached_data)) {}

// Returns a v8::ScriptCompiler::CachedData corresponding to this
// CompileCache::Data. The lifetime of the returned
// v8::ScriptCompiler::CachedData must not outlive that of the data.
std::unique_ptr<v8::ScriptCompiler::CachedData> AsCachedData();

const uint8_t* data;
size_t length;
kj::Own<v8::ScriptCompiler::CachedData> AsCachedData();

private:
std::shared_ptr<void> owningPtr;
kj::Array<kj::byte> data;
};

void add(kj::StringPtr key, std::shared_ptr<v8::ScriptCompiler::CachedData> cached) const;
kj::Maybe<Data&> find(kj::StringPtr key) const;
void add(kj::StringPtr key, v8::Local<v8::UnboundModuleScript> script) const;
kj::Maybe<kj::Arc<Data>> find(kj::StringPtr key) const;
void serialize(capnp::MessageBuilder& message) const;
void deserialize(capnp::PackedFdMessageReader& message) const;

static const CompileCache& get() {
static const CompileCache instance;
Expand All @@ -52,7 +50,7 @@ class CompileCache {

private:
// The key is the address of the static global that was compiled to produce the CachedData.
kj::MutexGuarded<kj::HashMap<kj::String, Data>> cache;
kj::MutexGuarded<kj::HashMap<kj::String, kj::Arc<Data>>> cache;
};

} // namespace workerd::jsg
12 changes: 5 additions & 7 deletions src/workerd/jsg/modules.c++
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ v8::Local<v8::Module> compileEsmModule(jsg::Lock& js,
v8::ScriptOrigin origin(v8StrIntern(js.v8Isolate, name), resourceLineOffset, resourceColumnOffset,
resourceIsSharedCrossOrigin, scriptId, {}, resourceIsOpaque, isWasm, isModule);
v8::Local<v8::String> contentStr;
v8::ScriptCompiler::CachedData* existingCacheData = nullptr;
kj::Own<v8::ScriptCompiler::CachedData> existingCacheData;
auto compileOptions = v8::ScriptCompiler::kNoCompileOptions;
const auto& compileCache = CompileCache::get();

Expand All @@ -406,20 +406,18 @@ v8::Local<v8::Module> compileEsmModule(jsg::Lock& js,
// We only enable compile cache for built-in modules for now.
KJ_IF_SOME(cached, compileCache.find(name)) {
compileOptions = v8::ScriptCompiler::kConsumeCodeCache;
existingCacheData = cached.AsCachedData().release();
existingCacheData = cached->AsCachedData();
}
} else {
contentStr = jsg::v8Str(js.v8Isolate, content);
}

v8::ScriptCompiler::Source source(contentStr, origin, existingCacheData);
v8::ScriptCompiler::Source source(contentStr, origin, existingCacheData.get());
auto module =
jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source, compileOptions));

if (existingCacheData == nullptr) {
auto cachedData = std::shared_ptr<v8::ScriptCompiler::CachedData>(
v8::ScriptCompiler::CreateCodeCache(module->GetUnboundModuleScript()));
compileCache.add(name, kj::mv(cachedData));
if (existingCacheData.get() == nullptr) {
compileCache.add(name, module->GetUnboundModuleScript());
}

return module;
Expand Down
Loading
Loading