diff --git a/accepted/2023/wasm-browser-threads.md b/accepted/2023/wasm-browser-threads.md
index c4a15398c..4d899add5 100644
--- a/accepted/2023/wasm-browser-threads.md
+++ b/accepted/2023/wasm-browser-threads.md
@@ -1,6 +1,14 @@
# Multi-threading on a browser
-## Goals
+## Table of content
+- [Goals](#goals)
+- [Key ideas](#key-ideas)
+- [State April 2024](#state-2024-april)
+- [Design details](#design-details)
+- [State September 2023](#state-2023-sep)
+- [Alternatives](#alternatives---as-considered-2023-sep)
+
+# Goals
- CPU intensive workloads on dotnet thread pool.
- Allow user to start new managed threads using `new Thread` and join it.
- Add new C# API for creating web workers with JS interop. Allow JS async/promises via external event loop.
@@ -31,7 +39,7 @@
† Note: all the text below discusses MT build only, unless explicit about ST build.
-## Key idea in this proposal
+# Key ideas
Move all managed user code out of UI/DOM thread, so that it becomes consistent with all other threads.
@@ -54,10 +62,6 @@ Move all managed user code out of UI/DOM thread, so that it becomes consistent w
- It eats your battery
- Browser will kill your tab at random point (Aw, snap).
- It's not deterministic and you can't really test your app to prove it harmless.
-- Firefox (still) has synchronous `XMLHttpRequest` which could be captured by async code in service worker
- - it's [deprecated legacy API](https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/Synchronous_and_Asynchronous_Requests#synchronous_request)
- - [but other browsers don't](https://wpt.fyi/results/service-workers/service-worker/fetch-request-xhr-sync.https.html?label=experimental&label=master&aligned) and it's unlikely they will implement it
- - there are deployment and security challenges with it
- all the other threads/workers could synchronously block
- `Atomics.wait()` works as expected
- if we will have managed thread on the UI thread, any `lock` or Mono GC barrier could cause spin-wait
@@ -85,38 +89,54 @@ Move all managed user code out of UI/DOM thread, so that it becomes consistent w
**7)** There could be pending HTTP promise (which needs browser event loop to resolve) and blocking `.Wait` on the same thread and same task/chain. Leading to deadlock.
-# Summary
+# State 2024 April
-## (14) Deputy + emscripten dispatch to UI + JSWebWorker + without sync JSExport
+## What was implemented in Net9 - Deputy thread design
-This is Pavel's preferred design based on experiments and tests so far.
-For other possible design options [see below](#Interesting-combinations).
+For other possible design options we considered [see below](#Alternatives).
-- Emscripten startup on UI thread
- - C functions of emscripten
-- MonoVM startup on UI thread
+- Introduce dedicated web worker called "deputy thread"
+ - managed `Main()` is dispatched onto deputy thread
+- MonoVM startup on deputy thread
- non-GC C functions of mono are still available
- - there is risk that UI will be suspended by pending GC
- - it keeps `renderBatch` working as is
- - it could be later optimized for purity to **(16)**. Pavel would like this.
- - the mono startup is CPU heavy and it blocks rendering even for server side rendered UI.
- - but it's difficult to get rid of many mono [C functions we currently use](#Move-Mono-startup-to-deputy)
-- managed `Main()` would be dispatched onto dedicated web worker called "deputy thread"
- - because the UI thread would be mostly idling, it could
+- Emscripten startup stays on UI thread
+ - C functions of emscripten
+ - download of assets and into WASM memory
+- UI/DOM thread
+ - because the UI thread would be mostly idling, it could:
- render UI, keep debugger working
- dynamically create pthreads
-- sync JSExports would not be supported on UI thread
- - later sync calls could opt-in and we implement **(13)** via spin-wait
-- JS interop only on dedicated `JSWebWorker`
-
-## Sidecar options
-
-There are few downsides to them
-- if we keep main managed thread and emscripten thread the same, pthreads can't be created dynamically
- - we could upgrade it to design **(15)** and have extra thread for running managed `Main()`
-- we will have to implement extra layer of dispatch from UI to sidecar
- - this could be pure JS via `postMessage`, which is slow and can't do spin-wait.
- - we could have `SharedArrayBuffer` for the messages, but we would have to implement (another?) marshaling.
+ - UI thread stays attached to Mono VM for Blazor's reasons (for Net9)
+ - it keeps `renderBatch` working as is, bu it's far from ideal
+ - there is risk that UI could be suspended by pending GC
+ - It would be ideal change Blazor so that it doesn't touch managed objects via naked pointers during render.
+ - we strive to detach the UI thread from Mono
+- I/O thread
+ - is helper thread which allows `Task` to be resolved by UI's `Promise` even when deputy thread is blocked in `.Wait`
+- JS interop from any thread is marshaled to UI thread's JavaScript
+- HTTP and WS clients are implemented in JS of UI thread
+- There is draft of `JSWebWorker` API
+ - it allows C# users to create dedicated JS thread
+ - the `JSImport` calls are dispatched to it if you are on the that thread
+ - or if you pass `JSObject` proxy with affinity to that thread as `JSImport` parameter.
+ - The API was not made public in Net9 yet
+- calling synchronous `JSExports` is not supported on UI thread
+ - this could be changed by configuration option but it's dangerous.
+- calling asynchronous `JSExports` is supported
+- calling asynchronous `JSImport` is supported
+- calling synchronous `JSImport` is supported without synchronous callback to C#
+- Strings are marshaled by value
+ - as opposed to by reference optimization we have in single-threaded build
+- Emscripten VFS and other syscalls
+ - file system operations are single-threaded and always marshaled to UI thread
+- Emscripten pool of pthreads
+ - browser threads are expensive (as compared to normal OS)
+ - creation of `WebWorker` requires UI thread to do it
+ - there is quite complex and slow setup for `WebWorker` to become pthread and then to attach as Mono thread.
+ - that's why Emscripten pre-allocates pthreads
+ - this allows `pthread_create` to be synchronous and faster
+
+# Design details
## Define terms
- UI thread
@@ -148,23 +168,8 @@ There are few downsides to them
- we already have prototype of the similar functionality
- which can spin-wait
-# Details
-
-## JSImport and marshaled JS functions
-- both sync and async could be called on all `JSWebWorker` threads
-- both sync and async could be called on main managed thread (even when running on UI)
- - unless there is loop back to blocking `JSExport`, it could not deadlock
-
-## JSExport & C# delegates
-- async could be called on all `JSWebWorker` threads
-- sync could be called on `JSWebWorker`
-- sync could be called on from UI thread is problematic
- - with spin-wait in UI in JS it has **2)** problems
- - with spin-wait in UI when emscripten is there could also deadlock the rest of the app
- - this means that combination of sync JSExport and deputy design is dangerous
-
## Proxies - thread affinity
-- all of them have thread affinity
+- all proxies of JS objects have thread affinity
- all of them need to be used and disposed on correct thread
- how to dispatch to correct thread is one of the questions here
- all of them are registered to 2 GCs
@@ -204,6 +209,8 @@ There are few downsides to them
- there is `JSSynchronizationContext`` installed on it
- so that user code could dispatch back to it, in case that it needs to call `JSObject` proxy (with thread affinity)
- this thread needs to throw on any `.Wait` because of the problem **7**
+- alternatively we could disable C# code on this thread and treat it similar to UI thread
+- alternatively we could have I/O threads
## HTTP and WS clients
- are implemented in terms of `JSObject` and `Promise` proxies
@@ -219,24 +226,251 @@ There are few downsides to them
- other unknowing users are `XmlUrlResolver`, `XmlDownloadManager`, `X509ResourceClient`, ...
- because we could have blocking wait now, we could also implement synchronous APIs of HTTP/WS
- so that existing user code bases would just work without change
- - at the moment they throw PNSE
- this would also require separate thread, doing the async job
+ - we could use I/O thread for it
## JSImport calls on threads without JSWebWorker
- those are
- thread-pool threads
- main managed thread in deputy design
-- what should happen when it calls JSImport directly ?
-- what should happen when it calls HTTP/WS clients ?
-- we could dispatch it to UI thread
+- we dispatch it to UI thread
- easy to understand default behavior
- - downside is blocking the UI and emscripten loops with CPU intensive activity
- - in sidecar design, also extra copy of buffers
-- we could instead create dedicated `JSWebWorker` managed thread
- - more difficult to reason about
- - this extra worker could also serve all the sync-to-async jobs
-# Dispatching call, who is responsible
+## Performance
+As compared to ST build for dotnet wasm:
+- the dispatch between threads (caused by JS object thread affinity) will have negative performance impact on the JS interop
+- in case of HTTP/WS clients used via Streams, it could be surprizing
+- browser performance is lower when working with SharedArrayBuffer
+- Mono performance is lower because there are GC safe-points and locks in the VM code
+- startup is slower because creation of WebWorker instances is slow
+- VFS access is slow because it's dispatched to UI thread
+- console output is slow because it's POSIX stream is dispatched to UI thread, call per line
+
+# State 2023 September
+ - we already ship MT version of the runtime in the wasm-tools workload.
+ - It's enabled by `true` and it requires COOP HTTP headers.
+ - It will serve extra file `dotnet.native.worker.js`.
+ - This will also start in Blazor project, but UI rendering would not work.
+ - we have pre-allocated pool of browser Web Workers which are mapped to pthread dynamically.
+ - we can configure pthread to keep running after synchronous thread_main finished. That's necessary to run any async tasks involving JavaScript interop.
+ - legacy interop has problems with GC boundaries.
+ - JSImport & JSExport work
+ - There is private JSSynchronizationContext implementation which is too synchronous
+ - There is draft of public C# API for creating JSWebWorker with JS interop. It must be dedicated un-managed resource, because we could not cleanup JS state created by user code.
+ - There is MT version of HTTP & WS clients, which could be called from any thread but it's also too synchronous implementation.
+ - Many unit tests fail on MT https://github.com/dotnet/runtime/pull/91536
+ - there are MT C# ref assemblies, which don't throw PNSE for MT build of the runtime for blocking APIs.
+
+# Alternatives - as considered 2023 Sep
+- how to deal with blocking C# code on UI thread
+ - **A)** pretend it's not a problem (this we already have)
+ - **B)** move user C# code to web worker
+ - **C)** move all Mono to web worker
+ - **D)** like **A)** just move call of the C# `Main()` to `JSWebWorker`
+- how to deal with blocking in synchronous JS calls from UI thread (like `onClick` callback)
+ - **D)** pretend it's not a problem (this we already have)
+ - **E)** throw PNSE when synchronous JSExport is called on UI thread
+ - **F)** dispatch calls to synchronous JSExport to web worker and spin-wait on JS side of UI thread.
+- how to implement JS interop between managed main thread and UI thread (DOM)
+ - **G)** put it out of scope for MT, manually implement what Blazor needs
+ - **H)** pure JS dispatch between threads, [comlink](https://github.com/GoogleChromeLabs/comlink) style
+ - **I)** C/emscripten dispatch of infrastructure to marshal individual parameters
+ - **J)** C/emscripten dispatch of method binding and invoke, but marshal parameters on UI thread
+ - **K)** pure C# dispatch between threads
+- how to implement JS interop on non-main web worker
+ - **L)** disable it for all non-main threads
+ - **M)** disable it for managed thread pool threads
+ - **N)** allow it only for threads created as dedicated resource `WebWorker` via new API
+ - **O)** enables it on all workers (let user deal with JS state)
+- how to dispatch calls to the right JS thread context
+ - **P)** via `SynchronizationContext` before `JSImport` stub, synchronously, stack frames
+ - **Q)** via `SynchronizationContext` inside `JSImport` C# stub
+ - **R)** via `emscripten_dispatch_to_thread_async` inside C code of ``
+- how to implement GC/dispose of `JSObject` proxies
+ - **S)** per instance: synchronous dispatch the call to correct thread via `SynchronizationContext`
+ - **T)** per instance: async schedule the cleanup
+ - at the detach of the thread. We already have `forceDisposeProxies`
+ - could target managed thread be paused during GC ?
+- where to instantiate initial user JS modules (like Blazor's)
+ - **U)** in the UI thread
+ - **V)** in the deputy/sidecar thread
+- where to instantiate `JSHost.ImportAsync` modules
+ - **W)** in the UI thread
+ - **X)** in the deputy/sidecar thread
+ - **Y)** allow it only for dedicated `JSWebWorker` threads
+ - **Z)** disable it
+ - same for `JSHost.GlobalThis`, `JSHost.DotnetInstance`
+- how to implement Blazor's `renderBatch`
+ - **a)** keep as is, wrap it with GC pause, use legacy JS interop on UI thread
+ - **b)** extract some of the legacy JS interop into Blazor codebase
+ - **c)** switch to Blazor server mode. Web worker create the batch of bytes and UI thread apply it to DOM
+- where to create HTTP+WS JS objects
+ - **d)** in the UI thread
+ - **e)** in the managed main thread
+ - **f)** in first calling `JSWebWorker` managed thread
+- how to dispatch calls to HTTP+WS JS objects
+ - **g)** try to stick to the same thread via `ConfigureAwait(false)`.
+ - doesn't really work. `Task` migrate too freely
+ - **h)** via C# `SynchronizationContext`
+ - **i)** via `emscripten_dispatch_to_thread_async`
+ - **j)** via `postMessage`
+ - **k)** same whatever we choose for `JSImport`
+ - note there are some synchronous calls on WS
+- where to create the emscripten instance
+ - **l)** could be on the UI thread
+ - **m)** could be on the "sidecar" thread
+- where to start the Mono VM
+ - **n)** could be on the UI thread
+ - **o)** could be on the "sidecar" thread
+- where to run the C# main entrypoint
+ - **p)** could be on the UI thread
+ - **q)** could be on the "deputy" or "sidecar" thread
+- where to implement sync-to-async: crypto/DLL download/HTTP APIs/
+ - **r)** out of scope
+ - **s)** in the UI thread
+ - **t)** in a dedicated web worker
+ - **z)** in the sidecar or deputy
+- where to marshal JSImport/JSExport parameters/return/exception
+ - **u)** could be only values types, proxies out of scope
+ - **v)** could be on UI thread (with deputy design and Mono there)
+ - **w)** could be on sidecar (with double proxies of parameters via comlink)
+ - **x)** could be on sidecar (with comlink calls per parameter)
+
+## Interesting combinations
+
+### (8) Minimal support
+- **A,D,G,L,P,S,U,Y,a,f,h,l,n,p,v**
+- this is what we [already have today](#Current-state-2023-Sep)
+- it could deadlock or die,
+- JS interop on threads requires lot of user code attention
+- Keeps problems **1,2,3,4**
+
+### (9) Sidecar + no JS interop + narrow Blazor support
+- **C,E,G,L,P,S,U,Z,c,d,h,m,o,q,u**
+- minimal effort, low risk, low capabilities
+- move both emscripten and Mono VM sidecar thread
+- no user code JS interop on any thread
+- internal solutions for Blazor needs
+- Ignores problems **1,2,3,4,5**
+
+### (10) Sidecar + only async just JS proxies UI + JSWebWorker + Blazor WASM server
+- **C,E,H,N,P,S,U,W+Y,c,e+f,h+k,m,o,q,w**
+- no C or managed code on UI thread
+ - this architectural clarity is major selling point for sidecar design
+- no support for blocking sync JSExport calls from UI thread (callbacks)
+ - it will throw PNSE
+- this will create double proxy for `Task`, `JSObject`, `Func<>` etc
+ - difficult to GC, difficult to debug
+- double marshaling of parameters
+- Solves **1,2** for managed code.
+- Avoids **1,2** for JS callback
+ - emscripten main loop stays responsive only when main managed thread is idle
+- Solves **3,4,5**
+
+### (11) Sidecar + async & sync just JS proxies UI + JSWebWorker + Blazor WASM server
+- **C,F,H,N,P,S,U,W+Y,c,e+f,h+k,m,o,q,w**
+- no C or managed code on UI thread
+- support for blocking sync JSExport calls from UI thread (callbacks)
+ - at blocking the UI is at least well isolated from runtime code
+ - it makes responsibility for sync call clear
+- this will create double proxy for `Task`, `JSObject`, `Func<>` etc
+ - difficult to GC, difficult to debug
+- double marshaling of parameters
+- Solves **1,2** for managed code
+ - unless there is sync `JSImport`->`JSExport` call
+- Ignores **1,2** for JS callback
+ - emscripten main loop stays responsive only when main managed thread is idle
+- Solves **3,4,5**
+
+### (12) Deputy + managed dispatch to UI + JSWebWorker + with sync JSExport
+- **B,F,K,N,Q,S/T,U,W,a/b/c,d+f,h,l,n,s/z,v**
+- this uses `JSSynchronizationContext` to dispatch calls to UI thread
+ - this is "dirty" as compared to sidecar because some managed code is actually running on UI thread
+ - it needs to also use `SynchronizationContext` for `JSExport` and callbacks, to dispatch to deputy.
+- blazor render could be both legacy render or Blazor server style
+ - because we have both memory and mono on the UI thread
+- Solves **1,2** for managed code
+ - unless there is sync `JSImport`->`JSExport` call
+- Ignores **1,2** for JS callback
+ - emscripten main loop could deadlock on sync JSExport
+- Solves **3,4,5**
+
+### (13) Deputy + emscripten dispatch to UI + JSWebWorker + with sync JSExport
+- **B,F,J,N,R,T,U,W,a/b/c,d+f,i,l,n,s,v**
+- is variation of **(12)**
+ - with emscripten dispatch and marshaling in UI thread
+- this uses `emscripten_dispatch_to_thread_async` for `call_entry_point`, `complete_task`, `cwraps.mono_wasm_invoke_method_bound`, `mono_wasm_invoke_bound_function`, `mono_wasm_invoke_import`, `call_delegate_method` to get to the UI thread.
+- it uses other `cwraps` locally on UI thread, like `mono_wasm_new_root`, `stringToMonoStringRoot`, `malloc`, `free`, `create_task_callback_method`
+ - it means that interop related managed runtime code is running on the UI thread, but not the user code.
+ - it means that parameter marshalling is fast (compared to sidecar)
+ - this deputy design is major selling point #2
+ - it still needs to enter GC barrier and so it could block UI for GC run shortly
+- blazor render could be both legacy render or Blazor server style
+ - because we have both memory and mono on the UI thread
+- Solves **1,2** for managed code
+ - unless there is sync `JSImport`->`JSExport` call
+- Ignores **1,2** for JS callback
+ - emscripten main loop could deadlock on sync JSExport
+- Solves **3,4,5**
+
+### (14) Deputy + emscripten dispatch to UI + JSWebWorker + without sync JSExport
+- **B,F,J,N,R,T,U,W,a/b/c,d+f,i,l,n,s,v**
+- is variation of **(13)**
+ - without support for synchronous JSExport
+- Solves **1,2** for managed code
+ - emscripten main loop stays responsive
+ - unless there is sync `JSImport`->`JSExport` call
+- Avoids **2** for JS callback
+ - by throwing PNSE
+- Solves **3,4,5**
+
+### (15) Deputy + Sidecar + UI thread
+- 2 levels of indirection.
+- benefit: blocking JSExport from UI thread doesn't block emscripten loop
+- downside: complex and more resource intensive
+
+### (16) Deputy without Mono, no GC barrier breach for interop
+- variation on **(13)** or **(14)** where we get rid of per-parameter calls to Mono
+- benefit: get closer to purity of sidecar design without loosing perf
+ - this could be done later as purity optimization
+- in this design the mono could be started on deputy thread
+ - this will keep UI responsive during startup
+- UI would not be mono attached thread.
+- See [details](#Get-rid-of-Mono-GC-boundary-breach)
+
+Related Net8 tracking https://github.com/dotnet/runtime/issues/85592
+
+### (17) Emscripten em_queue in deputy, managed UI thread
+- is interesting because it avoids cross-thread dispatch to UI
+ - including double dispatch in Blazor's `RendererSynchronizationContext`
+- avoids solving **1,2**
+- low level hacking of emscripten design assumptions
+
+### (18) Soft deputy
+- keep both Mono and emscripten in the UI thread
+- use `SynchronizationContext` to do the dispatch
+- make it easy and default to run any user code in deputy thread
+ - all Blazor events and callbacks like `onClick` to deputy
+ - move SignalR to deputy
+ - move Blazor entry point to deputy
+- hope that UI thread is mostly idle
+ - enable dynamic thread allocation
+ - throw exceptions in dev loop when UI thread does `lock` or `.Wait` in user code
+
+### (19) Single threaded build in a WebWorker
+- this already works well in Net8
+- when the developer is able to start dotnet in the worker himself and also handle all the messaging.
+- there are known existing examples in the community
+
+## Sidecar options
+There are few downsides to them
+- if we keep main managed thread and emscripten thread the same, pthreads can't be created dynamically
+ - we could upgrade it to design **(15)** and have extra thread for running managed `Main()`
+- we will have to implement extra layer of dispatch from UI to sidecar
+ - this could be pure JS via `postMessage`, which is slow and can't do spin-wait.
+ - we could have `SharedArrayBuffer` for the messages, but we would have to implement (another?) marshaling.
+
+## Dispatching call, who is responsible
- User code
- this is difficult and complex task which many will fail to do right
- it can't be user code for HTTP/WS clients because there is no direct call via Streams
@@ -254,9 +488,8 @@ There are few downsides to them
- this is just the UI -> deputy dispatch, which is not C# code
- Mono/C/JS internal layer
- see `emscripten_dispatch_to_thread_async` below
-- TODO: API SynCContext as parameter of `JSImport`
-# Dispatching JSImport - what should happen
+## Dispatching JSImport - what should happen
- when there is no extra code-gen flag
- for backward compatibility, dispatch handled by user
- assert that we are on `JSWebWorker` or main thread
@@ -272,7 +505,7 @@ There are few downsides to them
- assert all parameters have same affinity
- could be called from any thread, including thread pool
-# Dispatching JSImport in deputy design - how to do it
+## Dispatching JSImport in deputy design - how to do it
- how to dispatch to UI in deputy design ?
- A) double dispatch, C# -> main, emscripten -> UI
- B) make whole dispatch emscripten only, implement blocking wait in C for emscripten sync calls.
@@ -299,8 +532,7 @@ There are few downsides to them
- store the `JSHandle` on JS side (thread static) associated with method ID
- TODO: double dispatch in Blazor
-
-# Dispatching JSExport - what should happen
+## Dispatching JSExport - alternatives
- when caller is UI, we need to dispatch back to managed thread
- preferably deputy or sidecar thread
- when caller is `JSWebWorker`,
@@ -308,7 +540,7 @@ There are few downsides to them
- when caller is callback from HTTP/WS we could dispatch to any managed thread
- callers are not from managed thread pool, by design. Because we don't want any JS code running there.
-# Dispatching call - options
+## Dispatching call - alternatives
- `JSSynchronizationContext` - in deputy design
- this would not work for dispatch to UI thread as it doesn't have sync context
- is implementation of `SynchronizationContext` installed to
@@ -331,8 +563,6 @@ There are few downsides to them
- `emscripten_dispatch_to_thread_async` - in deputy design
- can dispatch async call to C function on the timer loop of target pthread
- doesn't block and doesn't propagate results and exceptions
- - this would not work in sidecar design
- - because UI is not pthread there
- from JS (UI) to C# managed main
- only necessary for deputy/sidecar, not for HTTP
- async
@@ -396,47 +626,6 @@ There are few downsides to them
- doesn't block and doesn't propagate exceptions
- this is slow
-## Move Mono startup to deputy
-- related to design **(16)**
-- Get rid of Mono GC boundary breach
-- `Task`/`Promise`
- - improved in https://github.com/dotnet/runtime/pull/93010
-- `MonoString`
- - `monoStringToString`, `stringToMonoStringRoot`
- - `mono_wasm_string_get_data_ref`
- - `mono_wasm_string_from_utf16_ref`
- - `get_string_root` -> `mono_wasm_new_external_root`
- - we could start passing just a buffer instead of `MonoString`
- - we will lose the optimization for interned strings
-- managed instances in `MonoArray`, like `MonoString`, `JSObject` or `System.Object`
- - `mono_wasm_register_root`, `mono_wasm_deregister_root`
- - `Interop.Runtime.DeregisterGCRoot`, `Interop.Runtime.RegisterGCRoot`
-- this is about GC and Dispose(): `ManagedObject`, `ErrorObject`
- - `release_js_owned_object_by_gc_handle`, `setup_managed_proxy`, `teardown_managed_proxy`
- - `JavaScriptExports.ReleaseJSOwnedObjectByGCHandle`, `CreateTaskCallback`
-- this is about GC and Dispose(): `JSObject`, `JSException`
- - `Interop.Runtime.ReleaseCSOwnedObject`
-- `mono_wasm_get_assembly_exports` -> `__Register_`
- - `mono_wasm_assembly_load`, `mono_wasm_assembly_find_class`, `mono_wasm_assembly_find_method`
- - this logic could be moved to deputy or sidecar thread
-- `mono_wasm_bind_js_function`, `mono_wasm_bind_cs_function`
- - `mono_wasm_new_external_root`
-- `invoke_method_and_handle_exception`
- - `mono_wasm_new_root`
-- not problem for deputy design: `Module.stackAlloc`, `Module.stackSave`, `Module.stackRestore`
-- what's overall perf impact for Blazor's `renderBatch` ?
-
-## Performance
-As compared to ST build for dotnet wasm:
-- the dispatch between threads (caused by JS object thread affinity) will have negative performance impact on the JS interop
-- in case of HTTP/WS clients used via Streams, it could be surprizing
-- browser performance is lower when working with SharedArrayBuffer
-- Mono performance is lower because there are GC safe-points and locks in the VM code
-- startup is slower because creation of WebWorker instances is slow
-- VFS access is slow because it's dispatched to UI thread
-- console output is slow because it's POSIX stream is dispatched to UI thread, call per `put_char`
-- any VFS access is slow because it dispatched to UI thread
-
## Spin-waiting in JS
- if we want to keep synchronous JS APIs to work on UI thread, we have to spin-wait
- we probably should have opt-in configuration flag for this
@@ -457,11 +646,6 @@ As compared to ST build for dotnet wasm:
- it could still deadlock if there is synchronous JSImport call to UI thread while UI thread is spin-waiting on it.
- this would be clearly user code mistake
-## Debugging
-- VS debugger would work as usual
-- Chrome dev tools would only see the events coming from `postMessage` or `Atomics.waitAsync`
-- Chrome dev tools debugging C# could be bit different, it possibly works already. The C# code would be in different node of the "source" tree view
-
## Blazor
- as compared to single threaded runtime, the major difference would be no synchronous callbacks.
- for example from DOM `onClick`. This is one of the reasons people prefer ST WASM over Blazor Server.
@@ -482,10 +666,6 @@ As compared to ST build for dotnet wasm:
- `JSImport` used for logging: `globalThis.console.debug`, `globalThis.console.error`, `globalThis.console.info`, `globalThis.console.warn`, `Blazor._internal.dotNetCriticalError`
- probably could be any JS context
-## Virtual filesystem
-- we use emscripten's VFS, which is JavaScript implementation in the UI thread.
-- the POSIX operations are synchronously dispatched to UI thread.
-
## WebPack, Rollup friendly
- it's not clear how to make this single-file
- because web workers need to start separate script(s) via `new Worker('./dotnet.js', {type: 'module'})`
@@ -506,293 +686,6 @@ As compared to ST build for dotnet wasm:
- we could synchronously wait for another thread to do async operations
- to fetch another DLL which was not pre-downloaded
-## New pthreads
-- with deputy design we could set `PTHREAD_POOL_SIZE_STRICT=0` and enable threads to be created dynamically
-
-# Current state 2023 Sep
- - we already ship MT version of the runtime in the wasm-tools workload.
- - It's enabled by `true` and it requires COOP HTTP headers.
- - It will serve extra file `dotnet.native.worker.js`.
- - This will also start in Blazor project, but UI rendering would not work.
- - we have pre-allocated pool of browser Web Workers which are mapped to pthread dynamically.
- - we can configure pthread to keep running after synchronous thread_main finished. That's necessary to run any async tasks involving JavaScript interop.
- - legacy interop has problems with GC boundaries.
- - JSImport & JSExport work
- - There is private JSSynchronizationContext implementation which is too synchronous
- - There is draft of public C# API for creating JSWebWorker with JS interop. It must be dedicated un-managed resource, because we could not cleanup JS state created by user code.
- - There is MT version of HTTP & WS clients, which could be called from any thread but it's also too synchronous implementation.
- - Many unit tests fail on MT https://github.com/dotnet/runtime/pull/91536
- - there are MT C# ref assemblies, which don't throw PNSE for MT build of the runtime for blocking APIs.
-
-## Implementation options (only some combinations are possible)
-- how to deal with blocking C# code on UI thread
- - **A)** pretend it's not a problem (this we already have)
- - **B)** move user C# code to web worker
- - **C)** move all Mono to web worker
- - **D)** like **A)** just move call of the C# `Main()` to `JSWebWorker`
-- how to deal with blocking in synchronous JS calls from UI thread (like `onClick` callback)
- - **D)** pretend it's not a problem (this we already have)
- - **E)** throw PNSE when synchronous JSExport is called on UI thread
- - **F)** dispatch calls to synchronous JSExport to web worker and spin-wait on JS side of UI thread.
-- how to implement JS interop between managed main thread and UI thread (DOM)
- - **G)** put it out of scope for MT, manually implement what Blazor needs
- - **H)** pure JS dispatch between threads, [comlink](https://github.com/GoogleChromeLabs/comlink) style
- - **I)** C/emscripten dispatch of infrastructure to marshal individual parameters
- - **J)** C/emscripten dispatch of method binding and invoke, but marshal parameters on UI thread
- - **K)** pure C# dispatch between threads
-- how to implement JS interop on non-main web worker
- - **L)** disable it for all non-main threads
- - **M)** disable it for managed thread pool threads
- - **N)** allow it only for threads created as dedicated resource `WebWorker` via new API
- - **O)** enables it on all workers (let user deal with JS state)
-- how to dispatch calls to the right JS thread context
- - **P)** via `SynchronizationContext` before `JSImport` stub, synchronously, stack frames
- - **Q)** via `SynchronizationContext` inside `JSImport` C# stub
- - **R)** via `emscripten_dispatch_to_thread_async` inside C code of ``
-- how to implement GC/dispose of `JSObject` proxies
- - **S)** per instance: synchronous dispatch the call to correct thread via `SynchronizationContext`
- - **T)** per instance: async schedule the cleanup
- - at the detach of the thread. We already have `forceDisposeProxies`
- - could target managed thread be paused during GC ?
-- where to instantiate initial user JS modules (like Blazor's)
- - **U)** in the UI thread
- - **V)** in the deputy/sidecar thread
-- where to instantiate `JSHost.ImportAsync` modules
- - **W)** in the UI thread
- - **X)** in the deputy/sidecar thread
- - **Y)** allow it only for dedicated `JSWebWorker` threads
- - **Z)** disable it
- - same for `JSHost.GlobalThis`, `JSHost.DotnetInstance`
-- how to implement Blazor's `renderBatch`
- - **a)** keep as is, wrap it with GC pause, use legacy JS interop on UI thread
- - **b)** extract some of the legacy JS interop into Blazor codebase
- - **c)** switch to Blazor server mode. Web worker create the batch of bytes and UI thread apply it to DOM
-- where to create HTTP+WS JS objects
- - **d)** in the UI thread
- - **e)** in the managed main thread
- - **f)** in first calling `JSWebWorker` managed thread
-- how to dispatch calls to HTTP+WS JS objects
- - **g)** try to stick to the same thread via `ConfigureAwait(false)`.
- - doesn't really work. `Task` migrate too freely
- - **h)** via C# `SynchronizationContext`
- - **i)** via `emscripten_dispatch_to_thread_async`
- - **j)** via `postMessage`
- - **k)** same whatever we choose for `JSImport`
- - note there are some synchronous calls on WS
-- where to create the emscripten instance
- - **l)** could be on the UI thread
- - **m)** could be on the "sidecar" thread
-- where to start the Mono VM
- - **n)** could be on the UI thread
- - **o)** could be on the "sidecar" thread
-- where to run the C# main entrypoint
- - **p)** could be on the UI thread
- - **q)** could be on the "deputy" or "sidecar" thread
-- where to implement sync-to-async: crypto/DLL download/HTTP APIs/
- - **r)** out of scope
- - **s)** in the UI thread
- - **t)** in a dedicated web worker
- - **z)** in the sidecar or deputy
-- where to marshal JSImport/JSExport parameters/return/exception
- - **u)** could be only values types, proxies out of scope
- - **v)** could be on UI thread (with deputy design and Mono there)
- - **w)** could be on sidecar (with double proxies of parameters via comlink)
- - **x)** could be on sidecar (with comlink calls per parameter)
-
-# Interesting combinations
-
-## (8) Minimal support
-- **A,D,G,L,P,S,U,Y,a,f,h,l,n,p,v**
-- this is what we [already have today](#Current-state-2023-Sep)
-- it could deadlock or die,
-- JS interop on threads requires lot of user code attention
-- Keeps problems **1,2,3,4**
-
-## (9) Sidecar + no JS interop + narrow Blazor support
-- **C,E,G,L,P,S,U,Z,c,d,h,m,o,q,u**
-- minimal effort, low risk, low capabilities
-- move both emscripten and Mono VM sidecar thread
-- no user code JS interop on any thread
-- internal solutions for Blazor needs
-- Ignores problems **1,2,3,4,5**
-
-## (10) Sidecar + only async just JS proxies UI + JSWebWorker + Blazor WASM server
-- **C,E,H,N,P,S,U,W+Y,c,e+f,h+k,m,o,q,w**
-- no C or managed code on UI thread
- - this architectural clarity is major selling point for sidecar design
-- no support for blocking sync JSExport calls from UI thread (callbacks)
- - it will throw PNSE
-- this will create double proxy for `Task`, `JSObject`, `Func<>` etc
- - difficult to GC, difficult to debug
-- double marshaling of parameters
-- Solves **1,2** for managed code.
-- Avoids **1,2** for JS callback
- - emscripten main loop stays responsive only when main managed thread is idle
-- Solves **3,4,5**
-
-## (11) Sidecar + async & sync just JS proxies UI + JSWebWorker + Blazor WASM server
-- **C,F,H,N,P,S,U,W+Y,c,e+f,h+k,m,o,q,w**
-- no C or managed code on UI thread
-- support for blocking sync JSExport calls from UI thread (callbacks)
- - at blocking the UI is at least well isolated from runtime code
- - it makes responsibility for sync call clear
-- this will create double proxy for `Task`, `JSObject`, `Func<>` etc
- - difficult to GC, difficult to debug
-- double marshaling of parameters
-- Solves **1,2** for managed code
- - unless there is sync `JSImport`->`JSExport` call
-- Ignores **1,2** for JS callback
- - emscripten main loop stays responsive only when main managed thread is idle
-- Solves **3,4,5**
-
-## (12) Deputy + managed dispatch to UI + JSWebWorker + with sync JSExport
-- **B,F,K,N,Q,S/T,U,W,a/b/c,d+f,h,l,n,s/z,v**
-- this uses `JSSynchronizationContext` to dispatch calls to UI thread
- - this is "dirty" as compared to sidecar because some managed code is actually running on UI thread
- - it needs to also use `SynchronizationContext` for `JSExport` and callbacks, to dispatch to deputy.
-- blazor render could be both legacy render or Blazor server style
- - because we have both memory and mono on the UI thread
-- Solves **1,2** for managed code
- - unless there is sync `JSImport`->`JSExport` call
-- Ignores **1,2** for JS callback
- - emscripten main loop could deadlock on sync JSExport
-- Solves **3,4,5**
-
-## (13) Deputy + emscripten dispatch to UI + JSWebWorker + with sync JSExport
-- **B,F,J,N,R,T,U,W,a/b/c,d+f,i,l,n,s,v**
-- is variation of **(12)**
- - with emscripten dispatch and marshaling in UI thread
-- this uses `emscripten_dispatch_to_thread_async` for `call_entry_point`, `complete_task`, `cwraps.mono_wasm_invoke_method_bound`, `mono_wasm_invoke_bound_function`, `mono_wasm_invoke_import`, `call_delegate_method` to get to the UI thread.
-- it uses other `cwraps` locally on UI thread, like `mono_wasm_new_root`, `stringToMonoStringRoot`, `malloc`, `free`, `create_task_callback_method`
- - it means that interop related managed runtime code is running on the UI thread, but not the user code.
- - it means that parameter marshalling is fast (compared to sidecar)
- - this deputy design is major selling point #2
- - it still needs to enter GC barrier and so it could block UI for GC run shortly
-- blazor render could be both legacy render or Blazor server style
- - because we have both memory and mono on the UI thread
-- Solves **1,2** for managed code
- - unless there is sync `JSImport`->`JSExport` call
-- Ignores **1,2** for JS callback
- - emscripten main loop could deadlock on sync JSExport
-- Solves **3,4,5**
-
-## (14) Deputy + emscripten dispatch to UI + JSWebWorker + without sync JSExport
-- **B,F,J,N,R,T,U,W,a/b/c,d+f,i,l,n,s,v**
-- is variation of **(13)**
- - without support for synchronous JSExport
-- Solves **1,2** for managed code
- - emscripten main loop stays responsive
- - unless there is sync `JSImport`->`JSExport` call
-- Avoids **2** for JS callback
- - by throwing PNSE
-- Solves **3,4,5**
-
-## (15) Deputy + Sidecar + UI thread
-- 2 levels of indirection.
-- benefit: blocking JSExport from UI thread doesn't block emscripten loop
-- downside: complex and more resource intensive
-
-## (16) Deputy without Mono, no GC barrier breach for interop
-- variation on **(13)** or **(14)** where we get rid of per-parameter calls to Mono
-- benefit: get closer to purity of sidecar design without loosing perf
- - this could be done later as purity optimization
-- in this design the mono could be started on deputy thread
- - this will keep UI responsive during startup
-- UI would not be mono attached thread.
-- See [details](#Get-rid-of-Mono-GC-boundary-breach)
-
-Related Net8 tracking https://github.com/dotnet/runtime/issues/85592
-
-## (17) Emscripten em_queue in deputy, managed UI thread
-- is interesting because it avoids cross-thread dispatch to UI
- - including double dispatch in Blazor's `RendererSynchronizationContext`
-- avoids solving **1,2**
-- low level hacking of emscripten design assumptions
-
-## (18) Soft deputy
-- keep both Mono and emscripten in the UI thread
-- use `SynchronizationContext` to do the dispatch
-- make it easy and default to run any user code in deputy thread
- - all Blazor events and callbacks like `onClick` to deputy
- - move SignalR to deputy
- - move Blazor entry point to deputy
-- hope that UI thread is mostly idle
- - enable dynamic thread allocation
- - throw exceptions in dev loop when UI thread does `lock` or `.Wait` in user code
-
-## Scratch pad
-
-current generated `JSImport` in Net7, Net8
-
-```cs
-
-[JSImport(Dispatch.UI)]
-public static partial Task WebSocketReceive(JSObject webSocket, nint bufferPtr, int bufferLength);
-
-[JSImport(Dispatch.Params)]
-public static partial Task WebSocketReceive(JSObject webSocket, nint bufferPtr, int bufferLength);
-
-[DebuggerNonUserCode]
-public static partial Task WebSocketReceive(JSObject webSocket, nint bufferPtr, int bufferLength)
-{
- if (__signature_WebSocketReceive_1144640460 == null)
- {
- __signature_WebSocketReceive_1144640460 = JSFunctionBinding.BindJSFunction("INTERNAL.ws_wasm_receive", null, new JSMarshalerType[] {
- JSMarshalerType.Task(),
- JSMarshalerType.JSObject,
- JSMarshalerType.IntPtr,
- JSMarshalerType.Int32
- });
- }
-
- Span __arguments_buffer = stackalloc JSMarshalerArgument[5];
- ref JSMarshalerArgument __arg_exception = ref __arguments_buffer[0];
- __arg_exception.Initialize();
- ref JSMarshalerArgument __arg_return = ref __arguments_buffer[1];
- __arg_return.Initialize();
- Task __retVal;
-
- ref JSMarshalerArgument __bufferLength_native__js_arg = ref __arguments_buffer[4];
- ref JSMarshalerArgument __bufferPtr_native__js_arg = ref __arguments_buffer[3];
- ref JSMarshalerArgument __webSocket_native__js_arg = ref __arguments_buffer[2];
-
- __bufferLength_native__js_arg.ToJS(bufferLength);
- __bufferPtr_native__js_arg.ToJS(bufferPtr);
- __webSocket_native__js_arg.ToJS(webSocket);
-
- JSFunctionBinding.InvokeJS(__signature_WebSocketReceive_1144640460, __arguments_buffer);
-
- __arg_return.ToManaged(out __retVal);
-
- return __retVal;
-}
-
-[ThreadStaticAttribute]
-static JSFunctionBinding __signature_WebSocketReceive_1144640460;
-
-[DebuggerNonUserCode]
-internal static unsafe void __Wrapper_Dummy_1616792047(JSMarshalerArgument* __arguments_buffer)
-{
- Task meaningPromise;
- ref JSMarshalerArgument __arg_exception = ref __arguments_buffer[0];
- ref JSMarshalerArgument __arg_return = ref __arguments_buffer[1];
-
- ref JSMarshalerArgument __meaningPromise_native__js_arg = ref __arguments_buffer[2];
- try
- {
-
- __meaningPromise_native__js_arg.ToManaged(out meaningPromise,
- static (ref JSMarshalerArgument __task_result_arg, out int __task_result) =>
- {
- __task_result_arg.ToManaged(out __task_result);
- });
- Sample.Test.Dummy(meaningPromise);
- }
- catch (global::System.Exception ex)
- {
- __arg_exception.ToJS(ex);
- }
-}
-```
-
-
+## Remove Mono from UI thread
+- Get rid of Mono GC boundary breach
+- see https://github.com/dotnet/runtime/issues/100411