From 84091706de4ae63e9b67a2b514b95f4521fd5c27 Mon Sep 17 00:00:00 2001 From: Moe-hacker Date: Fri, 22 Nov 2024 05:48:46 +0000 Subject: [PATCH] Improve rootless container support --- Changelog | 1 + README.md | 6 ++-- src/chroot.c | 12 +++----- src/rootless.c | 74 +++++++++++++++++++++++++++++++++++++++++--------- src/rurienv.c | 2 +- 5 files changed, 70 insertions(+), 25 deletions(-) diff --git a/Changelog b/Changelog index 84a85cc..48a3b57 100644 --- a/Changelog +++ b/Changelog @@ -1,5 +1,6 @@ # v3.8: * Support more platforms, currently supports: arm64, armv7, armhf, riscv64, i386, loong64, s390x, ppc64le and x86_64. + * Improve rootless container support. * Add `-W` option: `--work-dir`. * Add `-A` option: `--unmask-dirs`. * Add `-E` option: `--user`. diff --git a/README.md b/README.md index 26ec2b2..c5e117a 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Max memory: 860 KB ``` # Highlights: - Powerful Features - - Basic container features are all supported, chroot, unshare with pivot_root, capability control, basic cgroup support, no_new_privs, auto set environment variables and change user/workdir, built-in seccomp profile, basic support for rootless container, and even more... + - Basic container features are all supported, chroot, unshare with pivot_root, capability control, basic cgroup support, no_new_privs, auto set environment variables and change user/workdir, built-in seccomp profile, run rootless container, and even more... - Built-in support for binfmt_misc & QEMU, so you can run multi-arch container easily. - Powerful mount option, you can mount image/partition as root, and you can choose every mountpoint to be read-only or rw. - Built-in support for config file. @@ -34,9 +34,9 @@ Max memory: 860 KB - Flexibility, for More Platform - From Android devices to IoT gadgets, from amd64 to s390x, ruri can provide basic container support on almost every Linux platforms, the only thing it need is root privilege. - Secure by Design + - Built-in rootless container support. - Built-in security options, to make container more secure. - - Built-in read-only filesystem options for more protection. - - Built-in rootless mode support. + - Built-in read-only filesystem options for more protection. - Simple for Beginner - You can just use ruri as an instead of `chroot` command, and ruri will do all things for you. - Ruri empowers you with the freedom to configure everything, but that do not means you need to learn every option to use it. diff --git a/src/chroot.c b/src/chroot.c index 404d5e2..a5a461d 100644 --- a/src/chroot.c +++ b/src/chroot.c @@ -638,10 +638,6 @@ void ruri_run_rootless_chroot_container(struct RURI_CONTAINER *_Nonnull containe mount_mountpoints(container); // Copy qemu binary into container. copy_qemu_binary(container); - // Store container info. - if (container->use_rurienv) { - ruri_store_info(container); - } // If `-R` option is set, make / read-only. if (container->ro_root) { mount(container->container_dir, container->container_dir, NULL, MS_BIND | MS_REMOUNT | MS_RDONLY, NULL); @@ -660,11 +656,11 @@ void ruri_run_rootless_chroot_container(struct RURI_CONTAINER *_Nonnull containe // Check binary used. check_binary(container); // chroot(2) into container. - if (try_pivot_root(container) == -1) { - chdir(container->container_dir); - chroot("."); - chdir("/"); + chdir(container->container_dir); + if (chroot(".") == -1) { + ruri_error("{red}Error: failed to chroot(2) into container QwQ\n"); } + chdir("/"); // Change to the work dir. if (container->work_dir != NULL) { if (chdir(container->work_dir) == -1 && !container->no_warnings) { diff --git a/src/rootless.c b/src/rootless.c index 6d62282..4277598 100644 --- a/src/rootless.c +++ b/src/rootless.c @@ -212,6 +212,7 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container) /* * Setup namespaces and run rootless container. */ + ruri_read_info(container, container->container_dir); uid_t uid = geteuid(); gid_t gid = getegid(); bool set_id_map_succeed = false; @@ -222,23 +223,63 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container) // to change the parent process's id map. pid_t pid_1 = fork(); if (pid_1 > 0) { - // Enable user namespace. - try_unshare(CLONE_NEWUSER); - int stat = 0; - waitpid(pid_1, &stat, 0); - if (WEXITSTATUS(stat) == 0) { + if (container->ns_pid < 0) { + // Enable user namespace. + try_unshare(CLONE_NEWUSER); + int stat = 0; + waitpid(pid_1, &stat, 0); + if (WEXITSTATUS(stat) == 0) { + set_id_map_succeed = true; + } + } else { + char user_ns[PATH_MAX] = { '\0' }; + sprintf(user_ns, "/proc/%d/ns/user", container->ns_pid); + int user_ns_fd = open(user_ns, O_RDONLY | O_CLOEXEC); + if (user_ns_fd < 0) { + ruri_error("{red}Failed to open %s\n", user_ns); + } + if (setns(user_ns_fd, CLONE_NEWUSER) == -1) { + ruri_error("{red}Failed to setns(2) to %s\n", user_ns); + } set_id_map_succeed = true; } } else { - // To ensure that unshare(2) finished in parent process. - usleep(1000); - int stat = try_setup_idmap(ppid, uid, gid); - exit(stat); + if (container->ns_pid < 0) { + // To ensure that unshare(2) finished in parent process. + usleep(1000); + int stat = try_setup_idmap(ppid, uid, gid); + exit(stat); + } else { + exit(0); + } + } + if (container->ns_pid > 0 && set_id_map_succeed) { + char mnt_ns[PATH_MAX] = { '\0' }; + sprintf(mnt_ns, "/proc/%d/ns/mnt", container->ns_pid); + int mnt_ns_fd = open(mnt_ns, O_RDONLY | O_CLOEXEC); + if (mnt_ns_fd < 0) { + ruri_error("{red}Failed to open %s\n", mnt_ns); + } + if (setns(mnt_ns_fd, CLONE_NEWNS) == -1) { + ruri_error("{red}Failed to setns(2) to %s\n", mnt_ns); + } + close(mnt_ns_fd); + char pid_ns[PATH_MAX] = { '\0' }; + sprintf(pid_ns, "/proc/%d/ns/pid", container->ns_pid); + int pid_ns_fd = open(pid_ns, O_RDONLY | O_CLOEXEC); + if (pid_ns_fd < 0) { + ruri_error("{red}Failed to open %s\n", pid_ns); + } + if (setns(pid_ns_fd, CLONE_NEWPID) == -1) { + ruri_error("{red}Failed to setns(2) to %s\n", pid_ns); + } + close(pid_ns_fd); + } else { + // We need to own mount namespace. + try_unshare(CLONE_NEWNS); + // Seems we need to own a new pid namespace for mount procfs. + try_unshare(CLONE_NEWPID); } - // We need to own mount namespace. - try_unshare(CLONE_NEWNS); - // Seems we need to own a new pid namespace for mount procfs. - try_unshare(CLONE_NEWPID); // fork(2) into new namespaces we created. pid_t pid = fork(); if (pid > 0) { @@ -246,6 +287,12 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container) ruri_warning("{yellow}Check if uidmap is installed on your host, command like su will run failed without uidmap.\n"); set_id_map(uid, gid); } + usleep(1000); + container->ns_pid = pid; + if (container->use_rurienv && !container->just_chroot) { + ruri_store_info(container); + } + // Wait for child process to exit. int stat = 0; waitpid(pid, &stat, 0); exit(stat); @@ -256,6 +303,7 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container) if (!container->just_chroot) { init_rootless_container(container); } + usleep(1000); ruri_run_rootless_chroot_container(container); } } diff --git a/src/rurienv.c b/src/rurienv.c index 49c936a..e30d9e1 100644 --- a/src/rurienv.c +++ b/src/rurienv.c @@ -294,7 +294,7 @@ struct RURI_CONTAINER *ruri_read_info(struct RURI_CONTAINER *_Nullable container container->cpupercent = INIT_VALUE; // Check if ns_pid is a ruri process. // If not, that means the container is not running. - if (container->enable_unshare && !is_ruri_pid(k2v_get_key(int, "ns_pid", buf))) { + if ((container->enable_unshare || container->rootless) && !is_ruri_pid(k2v_get_key(int, "ns_pid", buf))) { ruri_log("{base}pid %d is not a ruri process.\n", k2v_get_key(int, "ns_pid", buf)); // Unset immutable flag of .rurienv. fd = open(file, O_RDONLY | O_CLOEXEC);