smp is back again

spawn one worker per kernel task, but only one task per virtual cpu can be running at a time
tombl · Mar 20, 2024 · e0a97fe · e0a97fe
1 parent 9b3889e
commit e0a97fe
Show file tree

Hide file tree

Showing 22 changed files with 375 additions and 395 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,10 @@
 
 - [x] builds
 - [ ] boots
+  - [ ] port musl
+  - [ ] port busybox
 - [ ] smp
+  - [ ] futex
 - [ ] mmu
 - [ ] highmem
 - [ ] virtio
diff --git a/arch/wasm/Kconfig b/arch/wasm/Kconfig
@@ -1,6 +1,6 @@
 config WASM
 	def_bool y
-	depends on !SMP && !BINFMT_ELF && !MMU && !MODULES && !COREDUMP && !SECCOMP && !UPROBES && !COMPAT
+	depends on !BINFMT_ELF && !MMU && !MODULES && !COREDUMP && !SECCOMP && !UPROBES && !COMPAT
 	select ARCH_HAS_BINFMT_WASM
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_NO_PREEMPT
@@ -9,14 +9,14 @@ config WASM
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select BUG
 	select FLATMEM
-	select FORCE_NR_CPUS if SMP
 	select GENERIC_ATOMIC64
 	select GENERIC_CSUM
 	select GENERIC_HWEIGHT
+	select GENERIC_SMP_IDLE_THREAD
 	select OF
 	select OF_EARLY_FLATTREE
 	select PAGE_SIZE_64KB
-	# select SMP
+	select SMP
 	select THREAD_INFO_IN_TASK
 	select UACCESS_MEMCPY if !MMU
 	select USE_PER_CPU_TLS
@@ -32,7 +32,7 @@ config EARLY_PRINTK
 	def_bool y
 
 config SMP
-	def_bool n
+	def_bool y
 
 config NR_CPUS
 	int

diff --git a/arch/wasm/include/asm/globals.h b/arch/wasm/include/asm/globals.h
@@ -0,0 +1,20 @@
+#ifndef _WASM_GLOBALS
+#define _WASM_GLOBALS
+
+#include <linux/compiler_attributes.h>
+
+__asm__(".globaltype __stack_pointer, i32\n");
+static void __always_inline set_stack_pointer(void *ptr)
+{
+	__asm__ volatile("local.get %0\n"
+			 "global.set __stack_pointer" ::"r"(ptr));
+}
+
+__asm__(".globaltype __tls_base, i32\n");
+static void __always_inline set_tls_base(void *ptr)
+{
+	__asm__ volatile("local.get %0\n"
+			 "global.set __tls_base" ::"r"(ptr));
+}
+
+#endif
diff --git a/arch/wasm/include/asm/sigcontext.h b/arch/wasm/include/asm/sigcontext.h
@@ -2,7 +2,6 @@
 #define _WASM_SIGCONTEXT_H
 
 struct pt_regs {
-	void* current_stack;
 	int (*fn)(void*);
 	void* fn_arg;
 };

diff --git a/arch/wasm/include/asm/sysmem.h b/arch/wasm/include/asm/sysmem.h
@@ -1,9 +1,11 @@
 #ifndef _WASM_SYSMEM_H
 #define _WASM_SYSMEM_H
 
+#include <linux/types.h>
+
 void zones_init(void);
 void early_tls_init(void);
 void smp_tls_prepare(void);
-void smp_tls_init(int cpu);
+void smp_tls_init(int cpu, bool init);
 
 #endif
diff --git a/arch/wasm/include/asm/thread_info.h b/arch/wasm/include/asm/thread_info.h
@@ -1,6 +1,8 @@
 #ifndef _WASM_THREAD_INFO_H
 #define _WASM_THREAD_INFO_H
 
+#include <linux/types.h>
+
 /* THREAD_SIZE is the size of the task_struct + kernel stack
  * This is asserted in setup, but the stack should be 1 page,
  * and a task_struct should be *way* less than a page big. */
@@ -11,15 +13,14 @@
 struct thread_info {
 	unsigned long flags;
 	int preempt_count;
-	struct task_struct *from_sched;
-	// unsigned int cpu;
-	unsigned int instance_id;
-	void* jmpbuf;
+	int cpu; // this is for the kernel
+	atomic_t running_cpu; // negative means unscheduled
 };
 
 #define INIT_THREAD_INFO(tsk)                                    \
 	{                                                        \
 		.flags = 0, .preempt_count = INIT_PREEMPT_COUNT, \
+		.running_cpu = ATOMIC_INIT(0),                   \
 	}
 
 #define TIF_SYSCALL_TRACE 0 /* syscall trace active */

diff --git a/arch/wasm/include/asm/wasm_imports.h b/arch/wasm/include/asm/wasm_imports.h
@@ -25,9 +25,11 @@ import("get_now_nsec") unsigned long long wasm_get_now_nsec(void);
 
 import("get_stacktrace") void wasm_get_stacktrace(char *buf, size_t size);
 
-// import("new_kernel_instance") unsigned int wasm_new_kernel_instance(void);
-// import("poll_kernel_instance") void wasm_poll_kernel_instance(unsigned int id);
-// import("poll_yield") void wasm_poll_yield(void);
+struct task_struct;
+import("new_worker") void wasm_new_worker(struct task_struct *task, char *comm,
+					  size_t comm_len);
+
+import("bringup_secondary") void wasm_bringup_secondary(int cpu, struct task_struct *idle);
 
 #undef import
 

diff --git a/arch/wasm/kernel/Makefile b/arch/wasm/kernel/Makefile
@@ -6,7 +6,6 @@ obj-y += \
 	irq.o \
 	process.o \
 	ptrace.o \
-	setjmp.o \
 	setup.o \
 	stacktrace.o \
 	time.o

diff --git a/arch/wasm/kernel/process.c b/arch/wasm/kernel/process.c
@@ -1,87 +1,56 @@
+#include <asm/delay.h>
+#include <asm/globals.h>
+#include <asm/sysmem.h>
 #include <asm/wasm_imports.h>
-#include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/task.h>
 
-__asm__(".globaltype __stack_pointer, i32\n");
-
-int setjmp(void *buf) __attribute__((returns_twice));
-void longjmp(void *buf, int val) __attribute__((noreturn));
-
-static void *get_stack_pointer(void)
-{
-	void *ptr;
-	__asm__ volatile("global.get __stack_pointer\n"
-			 "local.set %0"
-			 : "=r"(ptr));
-	return ptr;
-}
-
-static void __always_inline set_stack_pointer(void *ptr)
-{
-	__asm__ volatile("local.get %0\n"
-			 "global.set __stack_pointer" ::"r"(ptr));
-}
-
-static struct task_struct *prev = &init_task;
-
-inline static struct task_struct *__switch_to_inner(struct task_struct *from,
-						    struct task_struct *to)
-{
-	struct pt_regs *from_regs = task_pt_regs(from);
-	struct pt_regs *to_regs = task_pt_regs(to);
-	struct thread_info *from_info = task_thread_info(from);
-	struct thread_info *to_info = task_thread_info(to);
-
-	if (setjmp(from_info->jmpbuf) == 0) {
-		set_stack_pointer(to_regs->current_stack);
-
-		if (to_info->from_sched)
-			schedule_tail(to_info->from_sched);
-		to_info->from_sched = NULL;
-
-		if (to_regs->fn) {
-			int (*fn)(void *) = to_regs->fn;
-			int result;
-
-			to_regs->fn = NULL;
-			pr_info("call %p(%p)\n", fn, to_regs->fn_arg);
-
-			// callback returns if the kernel thread execs a process?
-			result = fn(to_regs->fn_arg);
-			pr_info("call %p(%p) = %u\n", fn, to_regs->fn_arg,
-				result);
-		} else {
-			pr_info("longjmp %p to %u\n", to_info->jmpbuf, to->pid);
-			longjmp(to_info->jmpbuf, 1);
-		}
-	} else {
-		pr_info("free %p\n", from_info->jmpbuf);
-		kfree(from_info->jmpbuf);
-	}
-
-	return prev;
-}
+// TODO(wasm): replace __builtin_wasm_memory_atomic with completion?
 
 struct task_struct *__switch_to(struct task_struct *from,
 				struct task_struct *to)
 {
-	struct pt_regs *from_regs = task_pt_regs(from);
-	struct pt_regs *to_regs = task_pt_regs(to);
 	struct thread_info *from_info = task_thread_info(from);
 	struct thread_info *to_info = task_thread_info(to);
-
-	from_regs->current_stack = get_stack_pointer();
-	from_info->jmpbuf = kmalloc(16, 0);
-
-	pr_info("alloc %p for %u\n", from_info->jmpbuf, from->pid);
-
-	current = to;
-	to_info->from_sched = prev;
-	prev = from;
-
-	return __switch_to_inner(from, to);
+	int cpu, other_cpu;
+
+	cpu = atomic_xchg_release(&from_info->running_cpu, -1);
+	BUG_ON(cpu < 0); // current process must be scheduled to a cpu
+
+	// give the current cpu to the new worker
+	other_cpu = atomic_xchg_acquire(&to_info->running_cpu, cpu);
+	BUG_ON(other_cpu >= 0); // new process should not have had a cpu
+
+	// wake the other worker:
+	// pr_info("wake cpu=%i task=%p\n", cpu, to);
+	// memory.atomic.notify returns how many waiters were notified
+	// 0 is fine, because it means the worker isn't running yet
+	// 1 is great, because it means someone is waiting for this number
+	// 2+ means there's an issue, because I asked for only 1
+	BUG_ON(__builtin_wasm_memory_atomic_notify(
+		       &to_info->running_cpu.counter, 1) > 1);
+
+	// pr_info("waiting cpu=%i task=%p in switch\n", cpu, from);
+
+	// sleep this worker:
+	/* memory.atomic.wait32 returns:
+	 * 0 -> the thread blocked and was woken
+		= we slept and were woken
+	 * 1 -> the value at the pointer didn't match the passed value
+	 	= somebody gave us their cpu straight await
+	 * 2 -> the thread blocked but timed out
+	 	= not possible because we pass an infinite timeout
+	*/
+	__builtin_wasm_memory_atomic_wait32(&from_info->running_cpu.counter,
+					    /* block if the value is: */ -1,
+					    /* timeout: */ -1);
+
+	// pr_info("woke up cpu=%i task=%p in switch\n", cpu, from);
+
+	BUG_ON(cpu < 0); // we should be given a new cpu
+
+	return from;
 }
 
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -90,13 +59,50 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 
 	memset(childregs, 0, sizeof(struct pt_regs));
 
-	childregs->current_stack = childregs - 1;
+	atomic_set(&task_thread_info(p)->running_cpu, -1);
 
 	if (!args->fn)
 		panic("can't copy userspace thread"); // yet
 
 	childregs->fn = args->fn;
 	childregs->fn_arg = args->fn_arg;
 
+	pr_info("spawning task=%p\n", p);
+	wasm_new_worker(p, p->comm, strnlen(p->comm, TASK_COMM_LEN));
+
 	return 0;
 }
+
+__attribute__((export_name("task"))) void _start_task(struct task_struct *task)
+{
+	struct thread_info *info = task_thread_info(task);
+	struct pt_regs *regs = task_pt_regs(task);
+	int cpu;
+
+	set_stack_pointer(task_pt_regs(task) - 1);
+
+	early_printk("                       waiting cpu=%i task=%p in entry\n",
+		     atomic_read(&info->running_cpu), task);
+
+	// if we don't currently have a cpu, wait for one
+	__builtin_wasm_memory_atomic_wait32(&info->running_cpu.counter,
+					    /* block if the value is: */ -1,
+					    /* timeout: */ -1);
+
+	cpu = atomic_read(&info->running_cpu);
+
+	early_printk("                       woke up cpu=%i task=%p in entry\n",
+		     cpu, task);
+
+	smp_tls_init(cpu, false);
+
+	schedule_tail(current);
+
+	current = task;
+
+	// callback returns only if the kernel thread execs a process
+	regs->fn(regs->fn_arg);
+
+	// call into userspace?
+	panic("can't call userspace\n");
+}