From 05e75681981adc4dcdbce7fb2df84fbee30e84fd Mon Sep 17 00:00:00 2001 From: Dan Lapid Date: Thu, 17 Oct 2024 15:58:58 +0000 Subject: [PATCH] Instantiate Emscripten Runtime for python workers earlier. Move ownership of metrics and limitEnforcer to the api type. Currently ownership is shared even though the Isolate class encapsulates the api class. Moving complete ownership to the underlying api class allows the isolate class to be constructed in a different scope to the api class. This is useful for preinitialization of the api class before a request has come in. Add updateConfiguration function to jsg Isolates This can be used to update the given configuration at runtime. Note that while some jsg structs are lazily using the configuration, others can use it at construction and will have the original configuration value. --- src/pyodide/BUILD.bazel | 76 +++++- src/pyodide/emscripten_setup.capnp | 13 ++ src/pyodide/internal/pool/emscriptenSetup.ts | 10 +- src/pyodide/internal/python.ts | 39 +--- src/pyodide/types/emscripten.d.ts | 4 + src/pyodide/types/setup-emscripten.d.ts | 5 + src/workerd/api/BUILD.bazel | 5 + src/workerd/api/modules.h | 3 + src/workerd/api/pyodide/setup-emscripten.c++ | 229 +++++++++++++++++++ src/workerd/api/pyodide/setup-emscripten.h | 55 +++++ src/workerd/api/rtti.c++ | 1 + src/workerd/io/worker.h | 9 + src/workerd/jsg/promise.h | 7 +- src/workerd/jsg/resource.h | 5 + src/workerd/jsg/setup.h | 5 + src/workerd/jsg/type-wrapper.h | 15 ++ src/workerd/jsg/value.h | 6 +- src/workerd/server/workerd-api.c++ | 19 +- src/workerd/server/workerd-api.h | 5 +- 19 files changed, 458 insertions(+), 53 deletions(-) create mode 100644 src/pyodide/emscripten_setup.capnp create mode 100644 src/pyodide/types/setup-emscripten.d.ts create mode 100644 src/workerd/api/pyodide/setup-emscripten.c++ create mode 100644 src/workerd/api/pyodide/setup-emscripten.h diff --git a/src/pyodide/BUILD.bazel b/src/pyodide/BUILD.bazel index 0eabada074b..6b6cebe1cce 100644 --- a/src/pyodide/BUILD.bazel +++ b/src/pyodide/BUILD.bazel @@ -197,8 +197,6 @@ INTERNAL_MODULES = glob( [ "internal/*.ts", "internal/topLevelEntropy/*.ts", - # The pool directory is only needed by typescript, it shouldn't be used at runtime. - "internal/pool/*.ts", "types/*.ts", "types/*/*.ts", ], @@ -217,21 +215,14 @@ wd_ts_bundle( name = "pyodide", eslintrc_json = "eslint.config.mjs", import_name = "pyodide", - internal_data_modules = ["generated/python_stdlib.zip"] + INTERNAL_DATA_MODULES, + internal_data_modules = INTERNAL_DATA_MODULES, internal_json_modules = [ "generated/pyodide-lock.json", "generated/pyodide-bucket.json", ], - internal_modules = [ - "generated/emscriptenSetup.js", - ] + INTERNAL_MODULES, - internal_wasm_modules = ["generated/pyodide.asm.wasm"], + internal_modules = INTERNAL_MODULES, js_deps = [ - "generated/emscriptenSetup", - "pyodide.asm.js@rule", - "pyodide.asm.wasm@rule", "pyodide-lock.js@rule", - "python_stdlib.zip@rule", "pyodide-bucket.json@rule", ], lint = False, @@ -286,3 +277,66 @@ genrule( tools = ["@capnp-cpp//src/capnp:capnp_tool"], visibility = ["//visibility:public"], ) + +copy_file( + name = "emscripten_setup_capnp_file", + src = "emscripten_setup.capnp", + out = "generated/emscripten_setup.capnp", +) + +genrule( + name = "emscripten_setup.capnp.bin@rule", + srcs = [ + ":generated/emscripten_setup.capnp", + ":generated/emscriptenSetup.js", + ":generated/python_stdlib.zip", + ":generated/pyodide.asm.wasm", + ], + outs = ["emscripten_setup.capnp.bin"], + cmd = " ".join([ + "$(execpath @capnp-cpp//src/capnp:capnp_tool)", + "eval", + "$(location generated/emscripten_setup.capnp)", + "emscriptenSetup", + "-o binary", + "> $@", + ]), + tools = ["@capnp-cpp//src/capnp:capnp_tool"], + visibility = ["//visibility:public"], +) + +capnp_embed( + name = "emscripten_setup_capnp_file_embed", + src = "generated/emscripten_setup.capnp", + deps = ["emscripten_setup_capnp_file"], +) + +capnp_embed( + name = "emscripten_setup_js_file_embed", + src = "generated/emscriptenSetup.js", + deps = ["generated/emscriptenSetup"], +) + +capnp_embed( + name = "python_stdlib_zip_file_embed", + src = "generated/python_stdlib.zip", + deps = ["python_stdlib.zip@rule"], +) + +capnp_embed( + name = "pyodide_asm_wasm_file_embed", + src = "generated/pyodide.asm.wasm", + deps = ["pyodide.asm.wasm@rule"], +) + +cc_capnp_library( + name = "emscripten_setup_capnp", + srcs = ["generated/emscripten_setup.capnp"], + visibility = ["//visibility:public"], + deps = [ + ":emscripten_setup_capnp_file_embed", + ":emscripten_setup_js_file_embed", + ":pyodide_asm_wasm_file_embed", + ":python_stdlib_zip_file_embed", + ], +) diff --git a/src/pyodide/emscripten_setup.capnp b/src/pyodide/emscripten_setup.capnp new file mode 100644 index 00000000000..a47d99256c6 --- /dev/null +++ b/src/pyodide/emscripten_setup.capnp @@ -0,0 +1,13 @@ +@0xc00ad00cc650fb45; + +struct EmscriptenSetup { + code @0 :Text; + pyodideAsmWasm @1 :Data; + pythonStdlibZip @2 :Data; +} + +const emscriptenSetup :EmscriptenSetup = ( + code = embed "emscriptenSetup.js", + pyodideAsmWasm = embed "pyodide.asm.wasm", + pythonStdlibZip = embed "python_stdlib.zip", +); diff --git a/src/pyodide/internal/pool/emscriptenSetup.ts b/src/pyodide/internal/pool/emscriptenSetup.ts index b9b37c0ee99..d72946898c6 100644 --- a/src/pyodide/internal/pool/emscriptenSetup.ts +++ b/src/pyodide/internal/pool/emscriptenSetup.ts @@ -13,7 +13,7 @@ import { reportError } from 'pyodide-internal:util'; */ import { _createPyodideModule } from 'pyodide-internal:generated/pyodide.asm'; -export { +import { setUnsafeEval, setGetRandomValues, } from 'pyodide-internal:pool/builtin_wrappers'; @@ -56,7 +56,7 @@ function getWaitForDynlibs(resolveReadyPromise: PreRunHook): PreRunHook { * This is a simplified version of the `prepareFileSystem` function here: * https://github.com/pyodide/pyodide/blob/main/src/js/module.ts */ -function getPrepareFileSystem(pythonStdlib: Uint8Array): PreRunHook { +function getPrepareFileSystem(pythonStdlib: ArrayBuffer): PreRunHook { return function prepareFileSystem(Module: Module): void { try { const pymajor = Module._py_version_major(); @@ -118,7 +118,7 @@ function getInstantiateWasm( */ function getEmscriptenSettings( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, pyodideWasmModule: WebAssembly.Module ): EmscriptenSettings { const config: PyodideConfig = { @@ -193,7 +193,7 @@ function* featureDetectionMonkeyPatchesContextManager() { */ export async function instantiateEmscriptenModule( isWorkerd: boolean, - pythonStdlib: Uint8Array, + pythonStdlib: ArrayBuffer, wasmModule: WebAssembly.Module ): Promise { const emscriptenSettings = getEmscriptenSettings( @@ -210,6 +210,8 @@ export async function instantiateEmscriptenModule( // Wait until we've executed all the preRun hooks before proceeding const emscriptenModule = await emscriptenSettings.readyPromise; + emscriptenModule.setUnsafeEval = setUnsafeEval; + emscriptenModule.setGetRandomValues = setGetRandomValues; return emscriptenModule; } catch (e) { console.warn('Error in instantiateEmscriptenModule'); diff --git a/src/pyodide/internal/python.ts b/src/pyodide/internal/python.ts index 3fd959690a9..67018b87aec 100644 --- a/src/pyodide/internal/python.ts +++ b/src/pyodide/internal/python.ts @@ -18,39 +18,11 @@ import { entropyBeforeTopLevel, getRandomValues, } from 'pyodide-internal:topLevelEntropy/lib'; +import { default as SetupEmscripten } from 'internal:setup-emscripten'; + import { default as UnsafeEval } from 'internal:unsafe-eval'; import { simpleRunPython } from 'pyodide-internal:util'; -/** - * This file is a simplified version of the Pyodide loader: - * https://github.com/pyodide/pyodide/blob/main/src/js/pyodide.ts - * - * In particular, it drops the package lock, which disables - * `pyodide.loadPackage`. In trade we add memory snapshots here. - */ - -/** - * _createPyodideModule and pyodideWasmModule together are produced by the - * Emscripten linker - */ -import pyodideWasmModule from 'pyodide-internal:generated/pyodide.asm.wasm'; - -/** - * The Python and Pyodide stdlib zipped together. The zip format is convenient - * because Python has a "ziploader" that allows one to import directly from a - * zip file. - * - * The ziploader solves bootstrapping problems around unpacking: Python comes - * with a bunch of C libs to unpack various archive formats, but they need stuff - * in this zip file to initialize their runtime state. - */ -import pythonStdlib from 'pyodide-internal:generated/python_stdlib.zip'; -import { - instantiateEmscriptenModule, - setUnsafeEval, - setGetRandomValues, -} from 'pyodide-internal:generated/emscriptenSetup'; - /** * After running `instantiateEmscriptenModule` but before calling into any C * APIs, we call this function. If `MEMORY` is defined, then we will have passed @@ -90,14 +62,15 @@ export async function loadPyodide( indexURL: string ): Promise { const Module = await enterJaegerSpan('instantiate_emscripten', () => - instantiateEmscriptenModule(isWorkerd, pythonStdlib, pyodideWasmModule) + SetupEmscripten.getModule() ); + Module.API.config.jsglobals = globalThis; if (isWorkerd) { Module.API.config.indexURL = indexURL; Module.API.config.resolveLockFilePromise!(lockfile); } - setUnsafeEval(UnsafeEval); - setGetRandomValues(getRandomValues); + Module.setUnsafeEval(UnsafeEval); + Module.setGetRandomValues(getRandomValues); await enterJaegerSpan('prepare_wasm_linear_memory', () => prepareWasmLinearMemory(Module) ); diff --git a/src/pyodide/types/emscripten.d.ts b/src/pyodide/types/emscripten.d.ts index e465da6c377..5b2ccba9a6f 100644 --- a/src/pyodide/types/emscripten.d.ts +++ b/src/pyodide/types/emscripten.d.ts @@ -68,4 +68,8 @@ interface Module { addRunDependency(x: string): void; removeRunDependency(x: string): void; noInitialRun: boolean; + setUnsafeEval(mod: typeof import('internal:unsafe-eval').default): void; + setGetRandomValues( + func: typeof import('pyodide-internal:topLevelEntropy/lib').getRandomValues + ): void; } diff --git a/src/pyodide/types/setup-emscripten.d.ts b/src/pyodide/types/setup-emscripten.d.ts new file mode 100644 index 00000000000..191e2ed3c5d --- /dev/null +++ b/src/pyodide/types/setup-emscripten.d.ts @@ -0,0 +1,5 @@ +declare namespace SetupEmscripten { + const getModule: () => Module; +} + +export default SetupEmscripten; diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index 719018cabbf..9b3ffeff4ce 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -16,6 +16,7 @@ filegroup( "pyodide/pyodide.c++", "memory-cache.c++", "r2*.c++", + "pyodide/setup-emscripten.c++", "rtti.c++", "url.c++", "util.c++", @@ -126,15 +127,19 @@ wd_cc_library( name = "pyodide", srcs = [ "pyodide/pyodide.c++", + "pyodide/setup-emscripten.c++", ], hdrs = [ "pyodide/pyodide.h", + "pyodide/setup-emscripten.h", + "//src/pyodide:generated/emscripten_setup.capnp.h", "//src/pyodide:generated/pyodide_extra.capnp.h", ], implementation_deps = ["//src/workerd/util:string-buffer"], visibility = ["//visibility:public"], deps = [ "//src/pyodide", + "//src/pyodide:emscripten_setup_capnp", "//src/pyodide:pyodide_extra_capnp", "//src/workerd/io", "//src/workerd/jsg", diff --git a/src/workerd/api/modules.h b/src/workerd/api/modules.h index 4587c403266..8376039bfba 100644 --- a/src/workerd/api/modules.h +++ b/src/workerd/api/modules.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -21,6 +22,7 @@ template void registerModules(Registry& registry, auto featureFlags) { node::registerNodeJsCompatModules(registry, featureFlags); if (featureFlags.getPythonWorkers()) { + pyodide::registerSetupEmscriptenModule(registry, featureFlags); pyodide::registerPyodideModules(registry, featureFlags); } registerUnsafeModules(registry, featureFlags); @@ -48,6 +50,7 @@ void registerBuiltinModules(jsg::modules::ModuleRegistry::Builder& builder, auto } if (featureFlags.getPythonWorkers()) { + builder.add(pyodide::getInternalSetupEmscriptenModuleBundle(featureFlags)); builder.add(pyodide::getExternalPyodideModuleBundle(featureFlags)); builder.add(pyodide::getInternalPyodideModuleBundle(featureFlags)); } diff --git a/src/workerd/api/pyodide/setup-emscripten.c++ b/src/workerd/api/pyodide/setup-emscripten.c++ new file mode 100644 index 00000000000..613dfdd13bb --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.c++ @@ -0,0 +1,229 @@ +#include "setup-emscripten.h" + +#include + +namespace workerd::api::pyodide { + +jsg::JsValue SetupEmscripten::getModule(jsg::Lock& js) { + KJ_IF_SOME(module, emscriptenModule) { + return module.getHandle(js); + } else { + auto& runtime = KJ_ASSERT_NONNULL(Worker::Api::current().getEmscriptenRuntime()); + js.v8Context()->SetSecurityToken(runtime.contextToken.getHandle(js)); + emscriptenModule = runtime.emscriptenRuntime; + return KJ_ASSERT_NONNULL(emscriptenModule).getHandle(js); + } +} + +void SetupEmscripten::visitForGc(jsg::GcVisitor& visitor) { + // const_cast is ok because the GcVisitor doesn't actually change the underlying value of the object. + KJ_IF_SOME(module, emscriptenModule) { + visitor.visit(const_cast&>(module)); + } +} + +v8::Local loadEmscriptenSetupModule(jsg::Lock& js) { + v8::Local contentStr = + jsg::v8Str(js.v8Isolate, EMSCRIPTEN_SETUP->getCode().asArray()); + v8::ScriptOrigin origin( + jsg::v8StrIntern(js.v8Isolate, "pyodide-internal:generated/emscriptenSetup"), 0, 0, false, -1, + {}, false, false, true); + v8::ScriptCompiler::Source source(contentStr, origin); + return jsg::check(v8::ScriptCompiler::CompileModule(js.v8Isolate, &source)); +} + +jsg::JsValue resolvePromise(jsg::Lock& js, jsg::JsValue prom) { + auto promise = KJ_ASSERT_NONNULL(prom.tryCast()); + if (promise.state() == jsg::PromiseState::PENDING) { + js.runMicrotasks(); + } + KJ_ASSERT(promise.state() == jsg::PromiseState::FULFILLED); + return promise.result(); +} + +void instantiateEmscriptenSetupModule(jsg::Lock& js, v8::Local& module) { + jsg::instantiateModule(js, module); + auto evalPromise = KJ_ASSERT_NONNULL( + jsg::JsValue(jsg::check(module->Evaluate(js.v8Context()))).tryCast()); + resolvePromise(js, evalPromise); + KJ_ASSERT(module->GetStatus() == v8::Module::kEvaluated); +} + +v8::Local getInstantiateEmscriptenModule( + jsg::Lock& js, v8::Local& module) { + auto instantiateEmscriptenModule = + js.v8Get(module->GetModuleNamespace().As(), "instantiateEmscriptenModule"_kj); + KJ_ASSERT(instantiateEmscriptenModule->IsFunction()); + return instantiateEmscriptenModule.As(); +} + +template +jsg::JsValue callFunction(jsg::Lock& js, v8::Local& func, Args... args) { + v8::LocalVector argv( + js.v8Isolate, std::initializer_list>{args...}); + return jsg::JsValue( + jsg::check(func->Call(js.v8Context(), js.v8Null(), argv.size(), argv.data()))); +} + +jsg::JsValue callInstantiateEmscriptenModule(jsg::Lock& js, + v8::Local& func, + bool isWorkerd, + capnp::Data::Reader pythonStdlibZipReader, + capnp::Data::Reader pyodideAsmWasmReader) { + AllowV8BackgroundThreadsScope scope; + js.setAllowEval(true); + KJ_DEFER(js.setAllowEval(false)); + + auto pythonStdlibZip = v8::ArrayBuffer::New(js.v8Isolate, pythonStdlibZipReader.size(), + v8::BackingStoreInitializationMode::kUninitialized); + memcpy(pythonStdlibZip->Data(), pythonStdlibZipReader.begin(), pythonStdlibZipReader.size()); + auto pyodideAsmWasm = jsg::check(v8::WasmModuleObject::Compile(js.v8Isolate, + v8::MemorySpan(pyodideAsmWasmReader.begin(), pyodideAsmWasmReader.size()))); + return resolvePromise(js, + callFunction( + js, func, js.boolean(isWorkerd), kj::mv(pythonStdlibZip), kj::mv(pyodideAsmWasm))); +} + +void handleLog(jsg::Lock& js, + LogLevel level, + const v8::Global& original, + const v8::FunctionCallbackInfo& info) { + // Call original V8 implementation so messages sent to connected inspector if any + auto context = js.v8Context(); + int length = info.Length(); + v8::LocalVector args(js.v8Isolate, length + 1); + for (auto i: kj::zeroTo(length)) args[i] = info[i]; + jsg::check(original.Get(js.v8Isolate)->Call(context, info.This(), length, args.data())); + + // The TryCatch is initialised here to catch cases where the v8 isolate's execution is + // terminating, usually as a result of an infinite loop. We need to perform the initialisation + // here because `message` is called multiple times. + v8::TryCatch tryCatch(js.v8Isolate); + auto message = [&]() { + int length = info.Length(); + kj::Vector stringified(length); + for (auto i: kj::zeroTo(length)) { + auto arg = info[i]; + // serializeJson and v8::Value::ToString can throw JS exceptions + // (e.g. for recursive objects) so we eat them here, to ensure logging and non-logging code + // have the same exception behavior. + if (!tryCatch.CanContinue()) { + stringified.add(kj::str("{}")); + break; + } + // The following code checks the `arg` to see if it should be serialised to JSON. + // + // We use the following criteria: if arg is null, a number, a boolean, an array, a string, an + // object or it defines a `toJSON` property that is a function, then the arg gets serialised + // to JSON. + // + // Otherwise we stringify the argument. + js.withinHandleScope([&] { + auto context = js.v8Context(); + bool shouldSerialiseToJson = false; + if (arg->IsNull() || arg->IsNumber() || arg->IsArray() || arg->IsBoolean() || + arg->IsString() || + arg->IsUndefined()) { // This is special cased for backwards compatibility. + shouldSerialiseToJson = true; + } + if (arg->IsObject()) { + v8::Local obj = arg.As(); + v8::Local freshObj = v8::Object::New(js.v8Isolate); + + // Determine whether `obj` is constructed using `{}` or `new Object()`. This ensures + // we don't serialise values like Promises to JSON. + if (obj->GetPrototypeV2()->SameValue(freshObj->GetPrototypeV2()) || + obj->GetPrototypeV2()->IsNull()) { + shouldSerialiseToJson = true; + } + + // Check if arg has a `toJSON` property which is a function. + auto toJSONStr = jsg::v8StrIntern(js.v8Isolate, "toJSON"_kj); + v8::MaybeLocal toJSON = obj->GetRealNamedProperty(context, toJSONStr); + if (!toJSON.IsEmpty()) { + if (jsg::check(toJSON)->IsFunction()) { + shouldSerialiseToJson = true; + } + } + } + + if (kj::runCatchingExceptions([&]() { + // On the off chance the the arg is the request.cf object, let's make + // sure we do not log proxied fields here. + if (shouldSerialiseToJson) { + auto s = js.serializeJson(arg); + // serializeJson returns the string "undefined" for some values (undefined, + // Symbols, functions). We remap these values to null to ensure valid JSON output. + if (s == "undefined"_kj) { + stringified.add(kj::str("null")); + } else { + stringified.add(kj::mv(s)); + } + } else { + stringified.add(js.serializeJson(jsg::check(arg->ToString(context)))); + } + }) != kj::none) { + stringified.add(kj::str("{}")); + }; + }); + } + return kj::str("[", kj::delimited(stringified, ", "_kj), "]"); + }; + + KJ_LOG(INFO, "console.log()", message()); +} + +void setupConsole(jsg::Lock& lock, v8::Local& global, v8::Local& context) { + auto consoleStr = jsg::v8StrIntern(lock.v8Isolate, "console"); + auto console = jsg::check(global->Get(context, consoleStr)).As(); + auto setHandler = [&](const char* method, LogLevel level) { + auto methodStr = jsg::v8StrIntern(lock.v8Isolate, method); + v8::Global original( + lock.v8Isolate, jsg::check(console->Get(context, methodStr)).As()); + + auto f = lock.wrapSimpleFunction(context, + [level, original = kj::mv(original)]( + jsg::Lock& js, const v8::FunctionCallbackInfo& info) { + handleLog(js, level, original, info); + }); + jsg::check(console->Set(context, methodStr, f)); + }; + + setHandler("debug", LogLevel::DEBUG_); + setHandler("error", LogLevel::ERROR); + setHandler("info", LogLevel::INFO); + setHandler("log", LogLevel::LOG); + setHandler("warn", LogLevel::WARN); +} +void setWebAssemblyModuleHasInstance(jsg::Lock& lock, v8::Local context) { + auto instanceof = [](const v8::FunctionCallbackInfo& info) { + jsg::Lock::from(info.GetIsolate()).withinHandleScope([&] { + info.GetReturnValue().Set(info[0]->IsWasmModuleObject()); + }); + }; + v8::Local function = jsg::check(v8::Function::New(context, instanceof)); + + v8::Object* webAssembly = v8::Object::Cast(*jsg::check( + context->Global()->Get(context, jsg::v8StrIntern(lock.v8Isolate, "WebAssembly")))); + v8::Object* module = v8::Object::Cast( + *jsg::check(webAssembly->Get(context, jsg::v8StrIntern(lock.v8Isolate, "Module")))); + + jsg::check( + module->DefineOwnProperty(context, v8::Symbol::GetHasInstance(lock.v8Isolate), function)); +} + +EmscriptenRuntime initializeEmscriptenRuntime(jsg::Lock& js, bool isWorkerd) { + // TODO: add tracing span + auto context = js.v8Context(); + auto global = context->Global(); + setWebAssemblyModuleHasInstance(js, context); + setupConsole(js, global, context); + auto module = loadEmscriptenSetupModule(js); + instantiateEmscriptenSetupModule(js, module); + auto instantiateEmscriptenModule = getInstantiateEmscriptenModule(js, module); + auto emscriptenModule = callInstantiateEmscriptenModule(js, instantiateEmscriptenModule, + isWorkerd, EMSCRIPTEN_SETUP->getPythonStdlibZip(), EMSCRIPTEN_SETUP->getPyodideAsmWasm()); + auto contextToken = jsg::JsValue(context->GetSecurityToken()); + return EmscriptenRuntime{contextToken.addRef(js), emscriptenModule.addRef(js)}; +} +} // namespace workerd::api::pyodide diff --git a/src/workerd/api/pyodide/setup-emscripten.h b/src/workerd/api/pyodide/setup-emscripten.h new file mode 100644 index 00000000000..0e06527b6af --- /dev/null +++ b/src/workerd/api/pyodide/setup-emscripten.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +namespace workerd::api::pyodide { + +using instantiateEmscriptenModuleFunction = jsg::Function>( + jsg::JsBoolean, jsg::JsString, jsg::JsString)>; + +struct EmscriptenRuntime { + jsg::JsRef contextToken; + jsg::JsRef emscriptenRuntime; +}; + +class SetupEmscripten: public jsg::Object { +public: + SetupEmscripten() {}; + SetupEmscripten(jsg::Lock& js, const jsg::Url&) {} + + jsg::JsValue getModule(jsg::Lock& js); + + JSG_RESOURCE_TYPE(SetupEmscripten) { + JSG_METHOD(getModule); + } + +private: + // Reference to the api value of the emscripten module. + // Used for visitForGc when no js is currently running. + kj::Maybe&> emscriptenModule; + void visitForGc(jsg::GcVisitor& visitor); +}; + +#define EW_SETUP_EMSCRIPTEN_ISOLATE_TYPES api::pyodide::SetupEmscripten + +template +void registerSetupEmscriptenModule(Registry& registry, auto featureFlags) { + registry.template addBuiltinModule( + "internal:setup-emscripten", workerd::jsg::ModuleRegistry::Type::INTERNAL); +} + +template +kj::Own getInternalSetupEmscriptenModuleBundle(auto featureFlags) { + jsg::modules::ModuleBundle::BuiltinBuilder builder( + jsg::modules::ModuleBundle::BuiltinBuilder::Type::BUILTIN_ONLY); + static const auto kSpecifier = "internal:setup-emscripten"_url; + builder.addObject(kSpecifier); + return builder.finish(); +} + +EmscriptenRuntime initializeEmscriptenRuntime(jsg::Lock& js, bool isWorkerd); + +} // namespace workerd::api::pyodide diff --git a/src/workerd/api/rtti.c++ b/src/workerd/api/rtti.c++ index f1410b33418..a438b1f2e4d 100644 --- a/src/workerd/api/rtti.c++ +++ b/src/workerd/api/rtti.c++ @@ -65,6 +65,7 @@ F("unsafe", EW_UNSAFE_ISOLATE_TYPES) \ F("memory-cache", EW_MEMORY_CACHE_ISOLATE_TYPES) \ F("pyodide", EW_PYODIDE_ISOLATE_TYPES) \ + F("emscripten", EW_SETUP_EMSCRIPTEN_ISOLATE_TYPES) \ F("kv", EW_KV_ISOLATE_TYPES) \ F("queue", EW_QUEUE_ISOLATE_TYPES) \ F("r2-admin", EW_R2_PUBLIC_BETA_ADMIN_ISOLATE_TYPES) \ diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index da909a8edd9..f4a527b2a3c 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -41,6 +41,9 @@ class Socket; class WebSocket; class WebSocketRequestResponsePair; class ExecutionContext; +namespace pyodide { +struct EmscriptenRuntime; +} } // namespace api class ThreadContext; @@ -294,6 +297,10 @@ class Worker::Isolate: public kj::AtomicRefcounted { return id; } + inline void setId(kj::String newId) { + id = kj::mv(newId); + } + // Parses the given code to create a new script object and returns it. kj::Own newScript(kj::StringPtr id, Script::Source source, @@ -532,6 +539,8 @@ class Worker::Api { virtual IsolateObserver& getMetrics() = 0; virtual const IsolateObserver& getMetrics() const = 0; + virtual const kj::Maybe& getEmscriptenRuntime() const = 0; + // Set the module fallback service callback, if any. using ModuleFallbackCallback = kj::Maybe>( jsg::Lock& js, diff --git a/src/workerd/jsg/promise.h b/src/workerd/jsg/promise.h index 4304dc1266b..6982c7c56a4 100644 --- a/src/workerd/jsg/promise.h +++ b/src/workerd/jsg/promise.h @@ -572,6 +572,11 @@ class PromiseWrapper { // std::nullptr_t). The getConfig allows us to handle any case using reasonable defaults. PromiseWrapper(const auto& config): config(getConfig(config)) {} + template + void updateConfiguration(MetaConfiguration&& configuration) { + config = getConfig(kj::fwd(configuration)); + } + template static constexpr const char* getName(Promise*) { return "Promise"; @@ -668,7 +673,7 @@ class PromiseWrapper { } private: - const JsgConfig config; + JsgConfig config; static bool isThenable(v8::Local context, v8::Local handle) { if (handle->IsObject()) { diff --git a/src/workerd/jsg/resource.h b/src/workerd/jsg/resource.h index 9896e0e949c..760607745f3 100644 --- a/src/workerd/jsg/resource.h +++ b/src/workerd/jsg/resource.h @@ -1349,6 +1349,11 @@ class ResourceWrapper { ResourceWrapper(MetaConfiguration&& configuration) : configuration(kj::fwd(configuration)) {} + template + void updateConfiguration(MetaConfiguration&& config) { + configuration = kj::fwd(config); + } + inline void initTypeWrapper() { TypeWrapper& wrapper = static_cast(*this); wrapper.resourceTypeMap.insert(typeid(T), diff --git a/src/workerd/jsg/setup.h b/src/workerd/jsg/setup.h index bdcde5bfc47..eb3094af183 100644 --- a/src/workerd/jsg/setup.h +++ b/src/workerd/jsg/setup.h @@ -409,6 +409,11 @@ class Isolate: public IsolateBase { dropWrappers(kj::mv(wrapper)); } + template + void updateConfiguration(MetaConfiguration&& configuration) { + wrapper->updateConfiguration(kj::fwd(configuration)); + } + kj::Exception unwrapException( v8::Local context, v8::Local exception) override { return wrapper->template unwrap( diff --git a/src/workerd/jsg/type-wrapper.h b/src/workerd/jsg/type-wrapper.h index a1c261b489e..c6299f17a4d 100644 --- a/src/workerd/jsg/type-wrapper.h +++ b/src/workerd/jsg/type-wrapper.h @@ -245,6 +245,8 @@ class TypeWrapperBase TypeWrapperBase(MetaConfiguration& config) {} inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& configuration) {} void unwrap() = delete; // StructWrapper only implements tryUnwrap(), not unwrap() }; @@ -274,6 +276,8 @@ class TypeWrapperBase, JsgKind::EXTENSION> void unwrap() = delete; // extensions only implement tryUnwrap(), not unwrap() inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& configuration) {} }; // Specialization of TypeWrapperBase for InjectConfiguration. @@ -297,6 +301,10 @@ class TypeWrapperBase, JsgKind::EXTENSI void getTemplate() = delete; inline void initTypeWrapper() {} + template + void updateConfiguration(MetaConfiguration&& config) { + configuration = kj::fwd(config); + } private: Configuration configuration; @@ -411,6 +419,13 @@ class TypeWrapper: public DynamicResourceTypeMap, (TypeWrapperBase::initTypeWrapper(), ...); } + template + void updateConfiguration(MetaConfiguration&& configuration) { + (TypeWrapperBase::updateConfiguration(kj::fwd(configuration)), ...); + MaybeWrapper::updateConfiguration(kj::fwd(configuration)); + PromiseWrapper::updateConfiguration(kj::fwd(configuration)); + } + static TypeWrapper& from(v8::Isolate* isolate) { return *reinterpret_cast(isolate->GetData(1)); } diff --git a/src/workerd/jsg/value.h b/src/workerd/jsg/value.h index 54c7fca2d63..70c6dbf0097 100644 --- a/src/workerd/jsg/value.h +++ b/src/workerd/jsg/value.h @@ -587,6 +587,10 @@ class MaybeWrapper { // The getConfig allows us to handle any case using reasonable defaults. MaybeWrapper(const auto& config): config(getConfig(config)) {} + template + void updateConfiguration(MetaConfiguration&& configuration) { + config = getConfig(kj::fwd(configuration)); + } template static constexpr decltype(auto) getName(kj::Maybe*) { return TypeWrapper::getName((kj::Decay*)nullptr); @@ -623,7 +627,7 @@ class MaybeWrapper { } private: - const JsgConfig config; + JsgConfig config; }; // ======================================================================================= diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index a0dcf2df17a..0948306aaaf 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,7 @@ JSG_DECLARE_ISOLATE_TYPE(JsgWorkerdIsolate, #ifdef WORKERD_EXPERIMENTAL_ENABLE_WEBGPU EW_WEBGPU_ISOLATE_TYPES, #endif + EW_SETUP_EMSCRIPTEN_ISOLATE_TYPES, jsg::TypeWrapperExtension, jsg::InjectConfiguration, @@ -136,6 +138,7 @@ struct WorkerdApi::Impl final { JsgWorkerdIsolate jsgIsolate; api::MemoryCacheProvider& memoryCacheProvider; const PythonConfig& pythonConfig; + kj::Maybe maybeEmscriptenRuntime; class Configuration { public: @@ -174,8 +177,16 @@ struct WorkerdApi::Impl final { limitEnforcer->getCreateParams()), memoryCacheProvider(memoryCacheProvider), pythonConfig(pythonConfig) { - jsgIsolate.runInLockScope( - [&](JsgWorkerdIsolate::Lock& lock) { limitEnforcer->customizeIsolate(lock.v8Isolate); }); + jsgIsolate.runInLockScope([&](JsgWorkerdIsolate::Lock& lock) { + limitEnforcer->customizeIsolate(lock.v8Isolate); + if (featuresParam.getPythonWorkers()) { + auto context = lock.newContext({}, lock.v8Isolate); + v8::Context::Scope scope(context.getHandle(lock)); + // Init emscripten syncronously, the python script will import setup-emscripten and + // call setEmscriptenModele + maybeEmscriptenRuntime = api::pyodide::initializeEmscriptenRuntime(lock, true); + } + }); } static v8::Local compileTextGlobal( @@ -291,6 +302,10 @@ const IsolateObserver& WorkerdApi::getMetrics() const { return *impl->observer; } +const kj::Maybe& WorkerdApi::getEmscriptenRuntime() const { + return impl->maybeEmscriptenRuntime; +} + Worker::Script::Source WorkerdApi::extractSource(kj::StringPtr name, config::Worker::Reader conf, Worker::ValidationErrorReporter& errorReporter, diff --git a/src/workerd/server/workerd-api.h b/src/workerd/server/workerd-api.h index 3f09658d220..73ebd60e1c4 100644 --- a/src/workerd/server/workerd-api.h +++ b/src/workerd/server/workerd-api.h @@ -12,7 +12,8 @@ namespace workerd { namespace api { namespace pyodide { struct PythonConfig; -} +struct EmscriptenRuntime; +} // namespace pyodide } // namespace api } // namespace workerd namespace workerd { @@ -60,6 +61,8 @@ class WorkerdApi final: public Worker::Api { IsolateObserver& getMetrics() override; const IsolateObserver& getMetrics() const override; + const kj::Maybe& getEmscriptenRuntime() const override; + static Worker::Script::Source extractSource(kj::StringPtr name, config::Worker::Reader conf, Worker::ValidationErrorReporter& errorReporter,