From c14a09bcd18330c8307034ff8a0934ebd6d4b4f6 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Tue, 7 May 2024 09:33:13 +0200 Subject: [PATCH] Support v8.2.3 and v9.0.0 Signed-off-by: David Gageot --- docker-bake.hcl | 4 +- patches/aports.config | 4 +- ...execve-call-qemu-via-proc-self-exe-t.patch | 92 ++++++++ ...nux-user-lookup-user-program-in-PATH.patch | 76 ++++++ ...in-execve-should-be-relative-to-work.patch | 103 ++++++++ ...support-loading-scripts-with-shebang.patch | 221 ++++++++++++++++++ ...ipt-path-as-argv0-in-shebang-handler.patch | 26 +++ ...se-GLib-to-remember-the-program-name.patch | 26 +++ .../0007-fix-execvp-PATH-handling.patch | 59 +++++ ...execve-call-qemu-via-proc-self-exe-t.patch | 92 ++++++++ ...nux-user-lookup-user-program-in-PATH.patch | 76 ++++++ ...in-execve-should-be-relative-to-work.patch | 103 ++++++++ ...support-loading-scripts-with-shebang.patch | 221 ++++++++++++++++++ ...ipt-path-as-argv0-in-shebang-handler.patch | 26 +++ ...se-GLib-to-remember-the-program-name.patch | 26 +++ .../0007-fix-execvp-PATH-handling.patch | 59 +++++ 16 files changed, 1211 insertions(+), 3 deletions(-) create mode 100644 patches/buildkit-direct-execve-v8.2/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0002-linux-user-lookup-user-program-in-PATH.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0003-linux-user-path-in-execve-should-be-relative-to-work.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0004-linux-user-support-loading-scripts-with-shebang.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0005-set-script-path-as-argv0-in-shebang-handler.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0006-linux-user-use-GLib-to-remember-the-program-name.patch create mode 100644 patches/buildkit-direct-execve-v8.2/0007-fix-execvp-PATH-handling.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0002-linux-user-lookup-user-program-in-PATH.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0004-linux-user-support-loading-scripts-with-shebang.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0005-set-script-path-as-argv0-in-shebang-handler.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch create mode 100644 patches/buildkit-direct-execve-v9.0/0007-fix-execvp-PATH-handling.patch diff --git a/docker-bake.hcl b/docker-bake.hcl index f5da15a8..0f145ef2 100644 --- a/docker-bake.hcl +++ b/docker-bake.hcl @@ -5,7 +5,7 @@ variable "QEMU_REPO" { default = "https://github.com/qemu/qemu" } variable "QEMU_VERSION" { - default = "v8.1.5" + default = "v9.0.0" } variable "QEMU_PATCHES" { default = "cpu-max-arm" @@ -59,7 +59,7 @@ target "buildkit" { inherits = ["mainline"] args = { BINARY_PREFIX = "buildkit-" - QEMU_PATCHES = "${QEMU_PATCHES},buildkit-direct-execve-v8.1" + QEMU_PATCHES = "${QEMU_PATCHES},buildkit-direct-execve-v8.2" QEMU_PRESERVE_ARGV0 = "" } cache-from = ["${REPO}:buildkit-master"] diff --git a/patches/aports.config b/patches/aports.config index 020eb2cc..06eb689d 100644 --- a/patches/aports.config +++ b/patches/aports.config @@ -1,3 +1,5 @@ +9.0.0,382eed598cc014306bb8bae4f5520946664614cf +8.2.3,214985d4bad8ce1064ce9f5bd3afc207c7166fad 8.1.50,e9d411e67e815ab0fcf1d00885cb55dd0f99e810 8.0.50,6225632b267a3d2bf6700a8fce41df60a68c187b 7.2.50,ed7a3122a32f53094f51e55abe68d416910e01ad @@ -9,4 +11,4 @@ 5.2.90,75a54675dc421cadfb9c2fbb567dc2b335e0a50e 5.1.90,8ffc0fe905f21e472724f58b101d61271a6571ff 5.0.90,87ee9a5a8a925d4d9e566a9829231781f80ebcc5 -0.0.0,f238bdae4d755f6e7ab6ce0b9a2a71dc833eb106 \ No newline at end of file +0.0.0,f238bdae4d755f6e7ab6ce0b9a2a71dc833eb106 diff --git a/patches/buildkit-direct-execve-v8.2/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch b/patches/buildkit-direct-execve-v8.2/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch new file mode 100644 index 00000000..a311b526 --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch @@ -0,0 +1,92 @@ +From 43aea3054fcbae1bfbfbb90edf1e3b56f439066f Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Fri, 8 Sep 2023 10:47:29 +0200 +Subject: [PATCH] linux-user: have execve call qemu via /proc/self/exe to not + rely on binfmt_misc + +It is assumed that when a guest program calls execve syscall it wants to +execute a program on the same guest architecture and not the host architecture. + +Previously, such a guest program would have execve syscall error out with: +"exec format error". + +A common solution is to register the qemu binary in binfmt_misc but that is not a +userland-friendly solution, requiring to modify kernel state. + +This patch injects /proc/self/exe as the first parameter and the qemu program name +as argv[0] to execve. + +Signed-off-by: Tibor Vass +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 44 +++++++++++++++++++++++++++++++------------- + 1 file changed, 31 insertions(+), 13 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 9ee124c583..6ed502eb6c 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8442,10 +8442,37 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, + envc++; + } + +- argp = g_new0(char *, argc + 1); ++ argp = g_new0(char *, argc + 4); + envp = g_new0(char *, envc + 1); + +- for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { ++ if (!(p = lock_user_string(pathname))) ++ goto execve_efault; ++ ++ /* if pathname is /proc/self/exe then retrieve the path passed to qemu via command line */ ++ if (is_proc_myself(p, "exe")) { ++ CPUState *cpu = env_cpu((CPUArchState *)cpu_env); ++ TaskState *ts = cpu->opaque; ++ p = ts->bprm->filename; ++ } ++ ++ /* retrieve guest argv0 */ ++ if (get_user_ual(addr, guest_argp)) ++ goto execve_efault; ++ ++ /* ++ * From the guest, the call ++ * execve(pathname, [argv0, argv1], envp) ++ * on the host, becomes: ++ * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) ++ * where qemu_progname is the error message prefix for qemu ++ */ ++ argp[0] = (char*)error_get_progname(); ++ argp[1] = (char*)"-0"; ++ argp[2] = (char*)lock_user_string(addr); ++ argp[3] = p; ++ ++ /* copy guest argv1 onwards to host argv4 onwards */ ++ for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) { + goto execve_efault; + } +@@ -8484,18 +8511,9 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, + * before the execve completes and makes it the other + * program's problem. + */ +- p = lock_user_string(pathname); +- if (!p) { +- goto execve_efault; +- } +- +- const char *exe = p; +- if (is_proc_myself(p, "exe")) { +- exe = exec_path; +- } + ret = is_execveat +- ? safe_execveat(dirfd, exe, argp, envp, flags) +- : safe_execve(exe, argp, envp); ++ ? safe_execveat(dirfd, "/proc/self/exe", argp, envp, flags) ++ : safe_execve("/proc/self/exe", argp, envp); + ret = get_errno(ret); + + unlock_user(p, pathname, 0); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0002-linux-user-lookup-user-program-in-PATH.patch b/patches/buildkit-direct-execve-v8.2/0002-linux-user-lookup-user-program-in-PATH.patch new file mode 100644 index 00000000..30e103c5 --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0002-linux-user-lookup-user-program-in-PATH.patch @@ -0,0 +1,76 @@ +From d83023eb7a0574cad224c7d88ac8dcf9d745afa3 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Tue, 2 Jun 2020 10:39:48 +0000 +Subject: [PATCH] linux-user: lookup user program in PATH + +Signed-off-by: Tibor Vass +--- + linux-user/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/linux-user/main.c b/linux-user/main.c +index fbc9bcfd5f..30f163de81 100644 +--- a/linux-user/main.c ++++ b/linux-user/main.c +@@ -558,6 +558,45 @@ static void usage(int exitcode) + exit(exitcode); + } + ++/* ++ * path_lookup searches for an executable filename in the directories named by the PATH environment variable. ++ * Returns a copy of filename if it is an absolute path or could not find a match. ++ * Caller is responsible to free returned string. ++ * Adapted from musl's execvp implementation. ++ */ ++static char *path_lookup(char *filename) { ++ const char *p, *z, *path = getenv("PATH"); ++ size_t l, k; ++ struct stat buf; ++ ++ /* if PATH is not set or filename is absolute path return filename */ ++ if (!path || !filename || filename[0] == '/') ++ return strndup(filename, NAME_MAX+1); ++ ++ k = strnlen(filename, NAME_MAX+1); ++ if (k > NAME_MAX) { ++ errno = ENAMETOOLONG; ++ return NULL; ++ } ++ l = strnlen(path, PATH_MAX-1)+1; ++ ++ for (p = path; ; p = z) { ++ char *b = calloc(l+k+1, sizeof(char)); ++ z = strchrnul(p, ':'); ++ if (z-p >= l) { ++ if (!*z++) break; ++ continue; ++ } ++ memcpy(b, p, z-p); ++ b[z-p] = '/'; ++ memcpy(b+(z-p)+(z>p), filename, k+1); ++ if (!stat(b, &buf) && !(buf.st_mode & S_IFDIR) && (buf.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) ++ return b; ++ if (!*z++) break; ++ } ++ return strndup(filename, NAME_MAX+1); ++} ++ + static int parse_args(int argc, char **argv) + { + const char *r; +@@ -623,7 +662,11 @@ static int parse_args(int argc, char **argv) + exit(EXIT_FAILURE); + } + +- exec_path = argv[optind]; ++ /* not freeing exec_path as it is needed for the lifetime of the process */ ++ if (!(exec_path = path_lookup(argv[optind]))) { ++ (void) fprintf(stderr, "qemu: could not find user program %s: %s\n", exec_path, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + + return optind; + } +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0003-linux-user-path-in-execve-should-be-relative-to-work.patch b/patches/buildkit-direct-execve-v8.2/0003-linux-user-path-in-execve-should-be-relative-to-work.patch new file mode 100644 index 00000000..b07415eb --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0003-linux-user-path-in-execve-should-be-relative-to-work.patch @@ -0,0 +1,103 @@ +From 8fd15aa673a7241f8aeeb64fff5633b973913ae3 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 20:54:37 +0200 +Subject: [PATCH] linux-user: path in execve should be relative to working dir + +Fixes regression introduced in parent commit where PATH handling was introduced. + +When guest calls execve(filename, argp, envp) filename can be relative in which +case Linux makes it relative to the working directory. + +However, since execve is now handled by exec-ing qemu process again, filename +would first get looked up in PATH in main() before calling host's execve. + +With this change, if filename is relative and exists in working directory as +well as in PATH, working directory will get precedence over PATH if guest is +doing an execve syscall, but not if relative filename comes from qemu's argv. + +Signed-off-by: Tibor Vass +Signed-off-by: CrazyMax +--- + include/qemu/path.h | 1 + + linux-user/syscall.c | 9 +++++++-- + util/path.c | 32 ++++++++++++++++++++++++++++++++ + 3 files changed, 40 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/path.h b/include/qemu/path.h +index c6292a9709..a81fb51e1f 100644 +--- a/include/qemu/path.h ++++ b/include/qemu/path.h +@@ -3,5 +3,6 @@ + + void init_paths(const char *prefix); + const char *path(const char *pathname); ++const char *prepend_workdir_if_relative(const char *path); + + #endif +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 947af70611..0ce9f207be 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8444,12 +8444,17 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + * execve(pathname, [argv0, argv1], envp) + * on the host, becomes: + * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) +- * where qemu_progname is the error message prefix for qemu ++ * where qemu_progname is the error message prefix for qemu. ++ * Note: if pathname is relative, it will be prepended with the current working directory. + */ + argp[0] = (char*)error_get_progname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); +- argp[3] = p; ++ argp[3] = (char*)prepend_workdir_if_relative(p); ++ if (!argp[3]) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } + + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { +diff --git a/util/path.c b/util/path.c +index 8e174eb436..06fe2663b8 100644 +--- a/util/path.c ++++ b/util/path.c +@@ -68,3 +68,35 @@ const char *path(const char *name) + qemu_mutex_unlock(&lock); + return ret; + } ++ ++/* Prepends working directory if path is relative. ++ * If path is absolute, it is returned as-is without any allocation. ++ * Otherwise, caller is responsible to free returned path. ++ * Returns NULL and sets errno upon error. ++ * Note: realpath is not called to let the kernel do the rest of the resolution. ++ */ ++const char *prepend_workdir_if_relative(const char *path) ++{ ++ char buf[PATH_MAX]; ++ char *p; ++ int i, j, k; ++ ++ if (!path || path[0] == '/') return path; ++ ++ if (!getcwd(buf, PATH_MAX)) return NULL; ++ i = strlen(buf); ++ j = strlen(path); ++ k = i + 1 + j + 1; /* workdir + '/' + path + '\0' */ ++ if (i + j > PATH_MAX) { ++ errno = ERANGE; ++ return NULL; ++ } ++ if (!(p = malloc(k * sizeof(char*)))) return NULL; ++ ++ p[0] = '\0'; ++ ++ if (!strncat(p, buf, i)) return NULL; ++ if (!strncat(p, "/", 1)) return NULL; ++ if (!strncat(p, path, j)) return NULL; ++ return p; ++} +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0004-linux-user-support-loading-scripts-with-shebang.patch b/patches/buildkit-direct-execve-v8.2/0004-linux-user-support-loading-scripts-with-shebang.patch new file mode 100644 index 00000000..6288333e --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0004-linux-user-support-loading-scripts-with-shebang.patch @@ -0,0 +1,221 @@ +From 14efa42c9bc061ffcad00bfa4a643e73f9a056ee Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Thu, 18 Jun 2020 20:57:22 +0000 +Subject: [PATCH] linux-user: support loading scripts with shebang (#!) + +The interpreter is assumed to be compatible with the target architecture. + +The script loading logic is taken from Linux source code to match logic as closely as possible. + +An interpreter can itself be a script (#!/other.script), and thus load another interpreter. +This happens in a loop therefore the loading chain of interpreter-scripts is limited to 5 like in Linux. + +Warning: there might be issues with m68k, mips, and mips64 architectures +since the cpu_model returned by those architectures (see linux-user/$arch/target_elf.h) +is dependent on the ELF header of the payload, but in this case the payload +is a script and not a binary. + This could be fixed either by moving the loading logic or +parts of it to before the cpu_model is set, so that the final ELF binary is available. +An alternative fix is to avoid the loop altogether and call qemu binary again with different arguments. +The downside is that it would require one extra exec syscall per interpreter. + +Signed-off-by: Tibor Vass +Signed-off-by: Tonis Tiigi +--- + linux-user/elfload.c | 2 +- + linux-user/linuxload.c | 137 ++++++++++++++++++++++++++++++++++++----- + linux-user/loader.h | 2 + + 3 files changed, 124 insertions(+), 17 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index c45da4d633..3c27aef5d4 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3219,10 +3219,10 @@ uint32_t get_elf_eflags(int fd) + return 0; + } + ret = read(fd, &ehdr, sizeof(ehdr)); ++ offset = lseek(fd, offset, SEEK_SET); /* reset seek regardless of error */ + if (ret < sizeof(ehdr)) { + return 0; + } +- offset = lseek(fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + return 0; + } +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 2ed5fc45ed..354650ef90 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -128,7 +128,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct linux_binprm *bprm) + { +- int retval; ++ int retval, depth; + + bprm->fd = fdexec; + bprm->filename = (char *)filename; +@@ -137,24 +137,33 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + bprm->envc = count(envp); + bprm->envp = envp; + +- retval = prepare_binprm(bprm); +- +- if (retval >= 0) { +- if (bprm->buf[0] == 0x7f +- && bprm->buf[1] == 'E' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'F') { +- retval = load_elf_binary(bprm, infop); ++ for (depth = 0; ; depth++) { ++ if (depth > 5) { ++ return -ELOOP; ++ } ++ retval = prepare_binprm(bprm); ++ if (retval >= 0) { ++ if (bprm->buf[0] == 0x7f ++ && bprm->buf[1] == 'E' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'F') { ++ retval = load_elf_binary(bprm, infop); + #if defined(TARGET_HAS_BFLT) +- } else if (bprm->buf[0] == 'b' +- && bprm->buf[1] == 'F' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'T') { +- retval = load_flt_binary(bprm, infop); ++ } else if (bprm->buf[0] == 'b' ++ && bprm->buf[1] == 'F' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'T') { ++ retval = load_flt_binary(bprm, infop); + #endif +- } else { +- return -ENOEXEC; ++ } else if (bprm->buf[0] == '#' ++ && bprm->buf[1] == '!') { ++ retval = load_script(bprm); ++ if (retval >= 0) continue; ++ } else { ++ return -ENOEXEC; ++ } + } ++ break; + } + + if (retval >= 0) { +@@ -165,3 +174,99 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + + return retval; + } ++ ++static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } ++static inline const char *next_non_spacetab(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (!spacetab(*first)) ++ return first; ++ return NULL; ++} ++static inline const char *next_terminator(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (spacetab(*first) || !*first) ++ return first; ++ return NULL; ++} ++ ++/* ++ * Reads the interpreter (shebang #!) line and modifies bprm object accordingly ++ * This is a modified version of Linux's load_script function. ++*/ ++int load_script(struct linux_binprm *bprm) ++{ ++ const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; ++ int execfd, i, argc_delta; ++ ++ buf_end = bprm->buf + sizeof(bprm->buf) - 1; ++ i_end = (const char*)memchr(bprm->buf, '\n', sizeof(bprm->buf)); ++ if (!i_end) { ++ i_end = next_non_spacetab(bprm->buf + 2, buf_end); ++ if (!i_end) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* Entire buf is spaces/tabs */ ++ } ++ /* ++ * If there is no later space/tab/NUL we must assume the ++ * interpreter path is truncated. ++ */ ++ if (!next_terminator(i_end, buf_end)) { ++ perror("script_prepare_binprm: truncated interpreter path"); ++ return -ENOEXEC; ++ } ++ i_end = buf_end; ++ } ++ /* Trim any trailing spaces/tabs from i_end */ ++ while (spacetab(i_end[-1])) ++ i_end--; ++ *((char *)i_end) = '\0'; ++ /* Skip over leading spaces/tabs */ ++ i_name = next_non_spacetab(bprm->buf+2, i_end); ++ if (!i_name || (i_name == i_end)) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* No interpreter name found */ ++ } ++ ++ /* Is there an optional argument? */ ++ i_arg = NULL; ++ i_sep = next_terminator(i_name, i_end); ++ if (i_sep && (*i_sep != '\0')) { ++ i_arg = next_non_spacetab(i_sep, i_end); ++ *((char *)i_sep) = '\0'; ++ } ++ ++ /* ++ * OK, we've parsed out the interpreter name and ++ * (optional) argument. ++ * Splice in (1) the interpreter's name for argv[0] ++ * (2) (optional) argument to interpreter ++ * (3) filename of shell script (replace argv[0]) ++ * (4) user arguments (argv[1:]) ++ */ ++ ++ execfd = open(i_name, O_RDONLY); ++ if (execfd < 0) { ++ perror("script_prepare_binprm: could not open script"); ++ return -ENOEXEC; /* Could not open interpreter */ ++ } ++ ++ argc_delta = 1 /* extra filename */ + (i_arg ? 1 : 0); ++ bprm->argc += argc_delta; ++ bprm->argv = realloc(bprm->argv, sizeof(char*) * (bprm->argc + 1)); ++ ++ /* shift argv by argc_delta */ ++ for (i = bprm->argc; i >= argc_delta; i--) ++ bprm->argv[i] = bprm->argv[i-argc_delta]; ++ ++ bprm->argv[0] = (char *)strdup(i_name); ++ if (i_arg) ++ bprm->argv[1] = (char *)strdup(i_arg); ++ ++ bprm->fd = execfd; /* not closing fd as it is needed for the duration of the program */ ++ bprm->filename = (char *)strdup(i_name); /* replace filename with script interpreter */ ++ /* envc and envp are kept unchanged */ ++ ++ return 0; ++} +diff --git a/linux-user/loader.h b/linux-user/loader.h +index f375ee0679..f3f3b9ce1b 100644 +--- a/linux-user/loader.h ++++ b/linux-user/loader.h +@@ -56,4 +56,6 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src, + + extern unsigned long guest_stack_size; + ++int load_script(struct linux_binprm *bprm); ++ + #endif /* LINUX_USER_LOADER_H */ +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0005-set-script-path-as-argv0-in-shebang-handler.patch b/patches/buildkit-direct-execve-v8.2/0005-set-script-path-as-argv0-in-shebang-handler.patch new file mode 100644 index 00000000..6ba6f6b8 --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0005-set-script-path-as-argv0-in-shebang-handler.patch @@ -0,0 +1,26 @@ +From baadf95fbcc53dc609480c8432569b01c2ab60a9 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Thu, 26 Aug 2021 01:18:32 +0200 +Subject: [PATCH] set script path as argv0 in shebang handler + +Signed-off-by: Tonis Tiigi +--- + linux-user/linuxload.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 354650ef90..161a0b05bf 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -246,6 +246,8 @@ int load_script(struct linux_binprm *bprm) + * (4) user arguments (argv[1:]) + */ + ++ bprm->argv[0] = bprm->filename; ++ + execfd = open(i_name, O_RDONLY); + if (execfd < 0) { + perror("script_prepare_binprm: could not open script"); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0006-linux-user-use-GLib-to-remember-the-program-name.patch b/patches/buildkit-direct-execve-v8.2/0006-linux-user-use-GLib-to-remember-the-program-name.patch new file mode 100644 index 00000000..23ad119a --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0006-linux-user-use-GLib-to-remember-the-program-name.patch @@ -0,0 +1,26 @@ +From 840dd52102da8ba46a8d4d55ceae099a98fdd137 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 20:57:04 +0200 +Subject: [PATCH] linux-user: use GLib to remember the program name + +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 0ce9f207be..173d56ba5e 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8447,7 +8447,7 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + * where qemu_progname is the error message prefix for qemu. + * Note: if pathname is relative, it will be prepended with the current working directory. + */ +- argp[0] = (char*)error_get_progname(); ++ argp[0] = (char*)g_get_prgname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); + argp[3] = (char*)prepend_workdir_if_relative(p); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v8.2/0007-fix-execvp-PATH-handling.patch b/patches/buildkit-direct-execve-v8.2/0007-fix-execvp-PATH-handling.patch new file mode 100644 index 00000000..e342b5d2 --- /dev/null +++ b/patches/buildkit-direct-execve-v8.2/0007-fix-execvp-PATH-handling.patch @@ -0,0 +1,59 @@ +From ff69bb120a7c59ccd1778560b46f63c751a8e701 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 21:04:19 +0200 +Subject: [PATCH] fix execvp PATH handling + +When the execvp syscall is invoked, the system PATH should be searched +for the executable to invoke, with the first match in the PATH being +invoked. However, the call being modified to inject qemu breaks this +behaviour, as it's not till _after_ qemu is invoked that the presence or +executability is checked, which is too late. ENOENT and EACCESS aren't +returned from the execve call, which stops execvp looping over the PATH +and aborts the entire process. + +This is resolved by testing the target command prior to executing it via +qemu, returning the appropriate error codes. + +Signed-off-by: David Ackroyd +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 173d56ba5e..3fb6ff1c58 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8399,6 +8399,7 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + abi_ulong addr; + char **q; + void *p; ++ struct stat st; + + argc = 0; + +@@ -8456,6 +8457,21 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + goto execve_end; + } + ++ /* ++ * Check whether executable up front, as running once the qemu process is started these failures ++ * will happen internally there, and only exposed as a non-zero exit code for qemu. ++ */ ++ ret = get_errno(stat(argp[3], &st)); ++ if (is_error(ret)) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } ++ ++ if ((st.st_mode & S_IFDIR) || !(st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { ++ ret = TARGET_EACCES; ++ goto execve_end; ++ } ++ + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) { +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch b/patches/buildkit-direct-execve-v9.0/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch new file mode 100644 index 00000000..a311b526 --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch @@ -0,0 +1,92 @@ +From 43aea3054fcbae1bfbfbb90edf1e3b56f439066f Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Fri, 8 Sep 2023 10:47:29 +0200 +Subject: [PATCH] linux-user: have execve call qemu via /proc/self/exe to not + rely on binfmt_misc + +It is assumed that when a guest program calls execve syscall it wants to +execute a program on the same guest architecture and not the host architecture. + +Previously, such a guest program would have execve syscall error out with: +"exec format error". + +A common solution is to register the qemu binary in binfmt_misc but that is not a +userland-friendly solution, requiring to modify kernel state. + +This patch injects /proc/self/exe as the first parameter and the qemu program name +as argv[0] to execve. + +Signed-off-by: Tibor Vass +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 44 +++++++++++++++++++++++++++++++------------- + 1 file changed, 31 insertions(+), 13 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 9ee124c583..6ed502eb6c 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8442,10 +8442,37 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, + envc++; + } + +- argp = g_new0(char *, argc + 1); ++ argp = g_new0(char *, argc + 4); + envp = g_new0(char *, envc + 1); + +- for (gp = guest_argp, q = argp; gp; gp += sizeof(abi_ulong), q++) { ++ if (!(p = lock_user_string(pathname))) ++ goto execve_efault; ++ ++ /* if pathname is /proc/self/exe then retrieve the path passed to qemu via command line */ ++ if (is_proc_myself(p, "exe")) { ++ CPUState *cpu = env_cpu((CPUArchState *)cpu_env); ++ TaskState *ts = cpu->opaque; ++ p = ts->bprm->filename; ++ } ++ ++ /* retrieve guest argv0 */ ++ if (get_user_ual(addr, guest_argp)) ++ goto execve_efault; ++ ++ /* ++ * From the guest, the call ++ * execve(pathname, [argv0, argv1], envp) ++ * on the host, becomes: ++ * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) ++ * where qemu_progname is the error message prefix for qemu ++ */ ++ argp[0] = (char*)error_get_progname(); ++ argp[1] = (char*)"-0"; ++ argp[2] = (char*)lock_user_string(addr); ++ argp[3] = p; ++ ++ /* copy guest argv1 onwards to host argv4 onwards */ ++ for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) { + goto execve_efault; + } +@@ -8484,18 +8511,9 @@ static int do_execv(CPUArchState *cpu_env, int dirfd, + * before the execve completes and makes it the other + * program's problem. + */ +- p = lock_user_string(pathname); +- if (!p) { +- goto execve_efault; +- } +- +- const char *exe = p; +- if (is_proc_myself(p, "exe")) { +- exe = exec_path; +- } + ret = is_execveat +- ? safe_execveat(dirfd, exe, argp, envp, flags) +- : safe_execve(exe, argp, envp); ++ ? safe_execveat(dirfd, "/proc/self/exe", argp, envp, flags) ++ : safe_execve("/proc/self/exe", argp, envp); + ret = get_errno(ret); + + unlock_user(p, pathname, 0); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0002-linux-user-lookup-user-program-in-PATH.patch b/patches/buildkit-direct-execve-v9.0/0002-linux-user-lookup-user-program-in-PATH.patch new file mode 100644 index 00000000..30e103c5 --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0002-linux-user-lookup-user-program-in-PATH.patch @@ -0,0 +1,76 @@ +From d83023eb7a0574cad224c7d88ac8dcf9d745afa3 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Tue, 2 Jun 2020 10:39:48 +0000 +Subject: [PATCH] linux-user: lookup user program in PATH + +Signed-off-by: Tibor Vass +--- + linux-user/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/linux-user/main.c b/linux-user/main.c +index fbc9bcfd5f..30f163de81 100644 +--- a/linux-user/main.c ++++ b/linux-user/main.c +@@ -558,6 +558,45 @@ static void usage(int exitcode) + exit(exitcode); + } + ++/* ++ * path_lookup searches for an executable filename in the directories named by the PATH environment variable. ++ * Returns a copy of filename if it is an absolute path or could not find a match. ++ * Caller is responsible to free returned string. ++ * Adapted from musl's execvp implementation. ++ */ ++static char *path_lookup(char *filename) { ++ const char *p, *z, *path = getenv("PATH"); ++ size_t l, k; ++ struct stat buf; ++ ++ /* if PATH is not set or filename is absolute path return filename */ ++ if (!path || !filename || filename[0] == '/') ++ return strndup(filename, NAME_MAX+1); ++ ++ k = strnlen(filename, NAME_MAX+1); ++ if (k > NAME_MAX) { ++ errno = ENAMETOOLONG; ++ return NULL; ++ } ++ l = strnlen(path, PATH_MAX-1)+1; ++ ++ for (p = path; ; p = z) { ++ char *b = calloc(l+k+1, sizeof(char)); ++ z = strchrnul(p, ':'); ++ if (z-p >= l) { ++ if (!*z++) break; ++ continue; ++ } ++ memcpy(b, p, z-p); ++ b[z-p] = '/'; ++ memcpy(b+(z-p)+(z>p), filename, k+1); ++ if (!stat(b, &buf) && !(buf.st_mode & S_IFDIR) && (buf.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) ++ return b; ++ if (!*z++) break; ++ } ++ return strndup(filename, NAME_MAX+1); ++} ++ + static int parse_args(int argc, char **argv) + { + const char *r; +@@ -623,7 +662,11 @@ static int parse_args(int argc, char **argv) + exit(EXIT_FAILURE); + } + +- exec_path = argv[optind]; ++ /* not freeing exec_path as it is needed for the lifetime of the process */ ++ if (!(exec_path = path_lookup(argv[optind]))) { ++ (void) fprintf(stderr, "qemu: could not find user program %s: %s\n", exec_path, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + + return optind; + } +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch b/patches/buildkit-direct-execve-v9.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch new file mode 100644 index 00000000..b07415eb --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch @@ -0,0 +1,103 @@ +From 8fd15aa673a7241f8aeeb64fff5633b973913ae3 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 20:54:37 +0200 +Subject: [PATCH] linux-user: path in execve should be relative to working dir + +Fixes regression introduced in parent commit where PATH handling was introduced. + +When guest calls execve(filename, argp, envp) filename can be relative in which +case Linux makes it relative to the working directory. + +However, since execve is now handled by exec-ing qemu process again, filename +would first get looked up in PATH in main() before calling host's execve. + +With this change, if filename is relative and exists in working directory as +well as in PATH, working directory will get precedence over PATH if guest is +doing an execve syscall, but not if relative filename comes from qemu's argv. + +Signed-off-by: Tibor Vass +Signed-off-by: CrazyMax +--- + include/qemu/path.h | 1 + + linux-user/syscall.c | 9 +++++++-- + util/path.c | 32 ++++++++++++++++++++++++++++++++ + 3 files changed, 40 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/path.h b/include/qemu/path.h +index c6292a9709..a81fb51e1f 100644 +--- a/include/qemu/path.h ++++ b/include/qemu/path.h +@@ -3,5 +3,6 @@ + + void init_paths(const char *prefix); + const char *path(const char *pathname); ++const char *prepend_workdir_if_relative(const char *path); + + #endif +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 947af70611..0ce9f207be 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8444,12 +8444,17 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + * execve(pathname, [argv0, argv1], envp) + * on the host, becomes: + * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) +- * where qemu_progname is the error message prefix for qemu ++ * where qemu_progname is the error message prefix for qemu. ++ * Note: if pathname is relative, it will be prepended with the current working directory. + */ + argp[0] = (char*)error_get_progname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); +- argp[3] = p; ++ argp[3] = (char*)prepend_workdir_if_relative(p); ++ if (!argp[3]) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } + + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { +diff --git a/util/path.c b/util/path.c +index 8e174eb436..06fe2663b8 100644 +--- a/util/path.c ++++ b/util/path.c +@@ -68,3 +68,35 @@ const char *path(const char *name) + qemu_mutex_unlock(&lock); + return ret; + } ++ ++/* Prepends working directory if path is relative. ++ * If path is absolute, it is returned as-is without any allocation. ++ * Otherwise, caller is responsible to free returned path. ++ * Returns NULL and sets errno upon error. ++ * Note: realpath is not called to let the kernel do the rest of the resolution. ++ */ ++const char *prepend_workdir_if_relative(const char *path) ++{ ++ char buf[PATH_MAX]; ++ char *p; ++ int i, j, k; ++ ++ if (!path || path[0] == '/') return path; ++ ++ if (!getcwd(buf, PATH_MAX)) return NULL; ++ i = strlen(buf); ++ j = strlen(path); ++ k = i + 1 + j + 1; /* workdir + '/' + path + '\0' */ ++ if (i + j > PATH_MAX) { ++ errno = ERANGE; ++ return NULL; ++ } ++ if (!(p = malloc(k * sizeof(char*)))) return NULL; ++ ++ p[0] = '\0'; ++ ++ if (!strncat(p, buf, i)) return NULL; ++ if (!strncat(p, "/", 1)) return NULL; ++ if (!strncat(p, path, j)) return NULL; ++ return p; ++} +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0004-linux-user-support-loading-scripts-with-shebang.patch b/patches/buildkit-direct-execve-v9.0/0004-linux-user-support-loading-scripts-with-shebang.patch new file mode 100644 index 00000000..6288333e --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0004-linux-user-support-loading-scripts-with-shebang.patch @@ -0,0 +1,221 @@ +From 14efa42c9bc061ffcad00bfa4a643e73f9a056ee Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Thu, 18 Jun 2020 20:57:22 +0000 +Subject: [PATCH] linux-user: support loading scripts with shebang (#!) + +The interpreter is assumed to be compatible with the target architecture. + +The script loading logic is taken from Linux source code to match logic as closely as possible. + +An interpreter can itself be a script (#!/other.script), and thus load another interpreter. +This happens in a loop therefore the loading chain of interpreter-scripts is limited to 5 like in Linux. + +Warning: there might be issues with m68k, mips, and mips64 architectures +since the cpu_model returned by those architectures (see linux-user/$arch/target_elf.h) +is dependent on the ELF header of the payload, but in this case the payload +is a script and not a binary. + This could be fixed either by moving the loading logic or +parts of it to before the cpu_model is set, so that the final ELF binary is available. +An alternative fix is to avoid the loop altogether and call qemu binary again with different arguments. +The downside is that it would require one extra exec syscall per interpreter. + +Signed-off-by: Tibor Vass +Signed-off-by: Tonis Tiigi +--- + linux-user/elfload.c | 2 +- + linux-user/linuxload.c | 137 ++++++++++++++++++++++++++++++++++++----- + linux-user/loader.h | 2 + + 3 files changed, 124 insertions(+), 17 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index c45da4d633..3c27aef5d4 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3219,10 +3219,10 @@ uint32_t get_elf_eflags(int fd) + return 0; + } + ret = read(fd, &ehdr, sizeof(ehdr)); ++ offset = lseek(fd, offset, SEEK_SET); /* reset seek regardless of error */ + if (ret < sizeof(ehdr)) { + return 0; + } +- offset = lseek(fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + return 0; + } +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 2ed5fc45ed..354650ef90 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -128,7 +128,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct linux_binprm *bprm) + { +- int retval; ++ int retval, depth; + + bprm->fd = fdexec; + bprm->filename = (char *)filename; +@@ -137,24 +137,33 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + bprm->envc = count(envp); + bprm->envp = envp; + +- retval = prepare_binprm(bprm); +- +- if (retval >= 0) { +- if (bprm->buf[0] == 0x7f +- && bprm->buf[1] == 'E' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'F') { +- retval = load_elf_binary(bprm, infop); ++ for (depth = 0; ; depth++) { ++ if (depth > 5) { ++ return -ELOOP; ++ } ++ retval = prepare_binprm(bprm); ++ if (retval >= 0) { ++ if (bprm->buf[0] == 0x7f ++ && bprm->buf[1] == 'E' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'F') { ++ retval = load_elf_binary(bprm, infop); + #if defined(TARGET_HAS_BFLT) +- } else if (bprm->buf[0] == 'b' +- && bprm->buf[1] == 'F' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'T') { +- retval = load_flt_binary(bprm, infop); ++ } else if (bprm->buf[0] == 'b' ++ && bprm->buf[1] == 'F' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'T') { ++ retval = load_flt_binary(bprm, infop); + #endif +- } else { +- return -ENOEXEC; ++ } else if (bprm->buf[0] == '#' ++ && bprm->buf[1] == '!') { ++ retval = load_script(bprm); ++ if (retval >= 0) continue; ++ } else { ++ return -ENOEXEC; ++ } + } ++ break; + } + + if (retval >= 0) { +@@ -165,3 +174,99 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + + return retval; + } ++ ++static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } ++static inline const char *next_non_spacetab(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (!spacetab(*first)) ++ return first; ++ return NULL; ++} ++static inline const char *next_terminator(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (spacetab(*first) || !*first) ++ return first; ++ return NULL; ++} ++ ++/* ++ * Reads the interpreter (shebang #!) line and modifies bprm object accordingly ++ * This is a modified version of Linux's load_script function. ++*/ ++int load_script(struct linux_binprm *bprm) ++{ ++ const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; ++ int execfd, i, argc_delta; ++ ++ buf_end = bprm->buf + sizeof(bprm->buf) - 1; ++ i_end = (const char*)memchr(bprm->buf, '\n', sizeof(bprm->buf)); ++ if (!i_end) { ++ i_end = next_non_spacetab(bprm->buf + 2, buf_end); ++ if (!i_end) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* Entire buf is spaces/tabs */ ++ } ++ /* ++ * If there is no later space/tab/NUL we must assume the ++ * interpreter path is truncated. ++ */ ++ if (!next_terminator(i_end, buf_end)) { ++ perror("script_prepare_binprm: truncated interpreter path"); ++ return -ENOEXEC; ++ } ++ i_end = buf_end; ++ } ++ /* Trim any trailing spaces/tabs from i_end */ ++ while (spacetab(i_end[-1])) ++ i_end--; ++ *((char *)i_end) = '\0'; ++ /* Skip over leading spaces/tabs */ ++ i_name = next_non_spacetab(bprm->buf+2, i_end); ++ if (!i_name || (i_name == i_end)) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* No interpreter name found */ ++ } ++ ++ /* Is there an optional argument? */ ++ i_arg = NULL; ++ i_sep = next_terminator(i_name, i_end); ++ if (i_sep && (*i_sep != '\0')) { ++ i_arg = next_non_spacetab(i_sep, i_end); ++ *((char *)i_sep) = '\0'; ++ } ++ ++ /* ++ * OK, we've parsed out the interpreter name and ++ * (optional) argument. ++ * Splice in (1) the interpreter's name for argv[0] ++ * (2) (optional) argument to interpreter ++ * (3) filename of shell script (replace argv[0]) ++ * (4) user arguments (argv[1:]) ++ */ ++ ++ execfd = open(i_name, O_RDONLY); ++ if (execfd < 0) { ++ perror("script_prepare_binprm: could not open script"); ++ return -ENOEXEC; /* Could not open interpreter */ ++ } ++ ++ argc_delta = 1 /* extra filename */ + (i_arg ? 1 : 0); ++ bprm->argc += argc_delta; ++ bprm->argv = realloc(bprm->argv, sizeof(char*) * (bprm->argc + 1)); ++ ++ /* shift argv by argc_delta */ ++ for (i = bprm->argc; i >= argc_delta; i--) ++ bprm->argv[i] = bprm->argv[i-argc_delta]; ++ ++ bprm->argv[0] = (char *)strdup(i_name); ++ if (i_arg) ++ bprm->argv[1] = (char *)strdup(i_arg); ++ ++ bprm->fd = execfd; /* not closing fd as it is needed for the duration of the program */ ++ bprm->filename = (char *)strdup(i_name); /* replace filename with script interpreter */ ++ /* envc and envp are kept unchanged */ ++ ++ return 0; ++} +diff --git a/linux-user/loader.h b/linux-user/loader.h +index f375ee0679..f3f3b9ce1b 100644 +--- a/linux-user/loader.h ++++ b/linux-user/loader.h +@@ -56,4 +56,6 @@ abi_long memcpy_to_target(abi_ulong dest, const void *src, + + extern unsigned long guest_stack_size; + ++int load_script(struct linux_binprm *bprm); ++ + #endif /* LINUX_USER_LOADER_H */ +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0005-set-script-path-as-argv0-in-shebang-handler.patch b/patches/buildkit-direct-execve-v9.0/0005-set-script-path-as-argv0-in-shebang-handler.patch new file mode 100644 index 00000000..6ba6f6b8 --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0005-set-script-path-as-argv0-in-shebang-handler.patch @@ -0,0 +1,26 @@ +From baadf95fbcc53dc609480c8432569b01c2ab60a9 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Thu, 26 Aug 2021 01:18:32 +0200 +Subject: [PATCH] set script path as argv0 in shebang handler + +Signed-off-by: Tonis Tiigi +--- + linux-user/linuxload.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 354650ef90..161a0b05bf 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -246,6 +246,8 @@ int load_script(struct linux_binprm *bprm) + * (4) user arguments (argv[1:]) + */ + ++ bprm->argv[0] = bprm->filename; ++ + execfd = open(i_name, O_RDONLY); + if (execfd < 0) { + perror("script_prepare_binprm: could not open script"); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch b/patches/buildkit-direct-execve-v9.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch new file mode 100644 index 00000000..23ad119a --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0006-linux-user-use-GLib-to-remember-the-program-name.patch @@ -0,0 +1,26 @@ +From 840dd52102da8ba46a8d4d55ceae099a98fdd137 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 20:57:04 +0200 +Subject: [PATCH] linux-user: use GLib to remember the program name + +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 0ce9f207be..173d56ba5e 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8447,7 +8447,7 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + * where qemu_progname is the error message prefix for qemu. + * Note: if pathname is relative, it will be prepended with the current working directory. + */ +- argp[0] = (char*)error_get_progname(); ++ argp[0] = (char*)g_get_prgname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); + argp[3] = (char*)prepend_workdir_if_relative(p); +-- +2.34.0 + diff --git a/patches/buildkit-direct-execve-v9.0/0007-fix-execvp-PATH-handling.patch b/patches/buildkit-direct-execve-v9.0/0007-fix-execvp-PATH-handling.patch new file mode 100644 index 00000000..e342b5d2 --- /dev/null +++ b/patches/buildkit-direct-execve-v9.0/0007-fix-execvp-PATH-handling.patch @@ -0,0 +1,59 @@ +From ff69bb120a7c59ccd1778560b46f63c751a8e701 Mon Sep 17 00:00:00 2001 +From: CrazyMax +Date: Wed, 3 May 2023 21:04:19 +0200 +Subject: [PATCH] fix execvp PATH handling + +When the execvp syscall is invoked, the system PATH should be searched +for the executable to invoke, with the first match in the PATH being +invoked. However, the call being modified to inject qemu breaks this +behaviour, as it's not till _after_ qemu is invoked that the presence or +executability is checked, which is too late. ENOENT and EACCESS aren't +returned from the execve call, which stops execvp looping over the PATH +and aborts the entire process. + +This is resolved by testing the target command prior to executing it via +qemu, returning the appropriate error codes. + +Signed-off-by: David Ackroyd +Signed-off-by: CrazyMax +--- + linux-user/syscall.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 173d56ba5e..3fb6ff1c58 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8399,6 +8399,7 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + abi_ulong addr; + char **q; + void *p; ++ struct stat st; + + argc = 0; + +@@ -8456,6 +8457,21 @@ static int do_execveat(CPUArchState *cpu_env, int dirfd, + goto execve_end; + } + ++ /* ++ * Check whether executable up front, as running once the qemu process is started these failures ++ * will happen internally there, and only exposed as a non-zero exit code for qemu. ++ */ ++ ret = get_errno(stat(argp[3], &st)); ++ if (is_error(ret)) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } ++ ++ if ((st.st_mode & S_IFDIR) || !(st.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { ++ ret = TARGET_EACCES; ++ goto execve_end; ++ } ++ + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) { +-- +2.34.0 +