Skip to content

Commit

Permalink
Improve rootless container support
Browse files Browse the repository at this point in the history
  • Loading branch information
Moe-hacker committed Nov 22, 2024
1 parent 6b00bf3 commit 8409170
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 25 deletions.
1 change: 1 addition & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# v3.8:
* Support more platforms, currently supports: arm64, armv7, armhf, riscv64, i386, loong64, s390x, ppc64le and x86_64.
* Improve rootless container support.
* Add `-W` option: `--work-dir`.
* Add `-A` option: `--unmask-dirs`.
* Add `-E` option: `--user`.
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Max memory: 860 KB
```
# Highlights:
- Powerful Features
- Basic container features are all supported, chroot, unshare with pivot_root, capability control, basic cgroup support, no_new_privs, auto set environment variables and change user/workdir, built-in seccomp profile, basic support for rootless container, and even more...
- Basic container features are all supported, chroot, unshare with pivot_root, capability control, basic cgroup support, no_new_privs, auto set environment variables and change user/workdir, built-in seccomp profile, run rootless container, and even more...
- Built-in support for binfmt_misc & QEMU, so you can run multi-arch container easily.
- Powerful mount option, you can mount image/partition as root, and you can choose every mountpoint to be read-only or rw.
- Built-in support for config file.
Expand All @@ -34,9 +34,9 @@ Max memory: 860 KB
- Flexibility, for More Platform
- From Android devices to IoT gadgets, from amd64 to s390x, ruri can provide basic container support on almost every Linux platforms, the only thing it need is root privilege.
- Secure by Design
- Built-in rootless container support.
- Built-in security options, to make container more secure.
- Built-in read-only filesystem options for more protection.
- Built-in rootless mode support.
- Built-in read-only filesystem options for more protection.
- Simple for Beginner
- You can just use ruri as an instead of `chroot` command, and ruri will do all things for you.
- Ruri empowers you with the freedom to configure everything, but that do not means you need to learn every option to use it.
Expand Down
12 changes: 4 additions & 8 deletions src/chroot.c
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,6 @@ void ruri_run_rootless_chroot_container(struct RURI_CONTAINER *_Nonnull containe
mount_mountpoints(container);
// Copy qemu binary into container.
copy_qemu_binary(container);
// Store container info.
if (container->use_rurienv) {
ruri_store_info(container);
}
// If `-R` option is set, make / read-only.
if (container->ro_root) {
mount(container->container_dir, container->container_dir, NULL, MS_BIND | MS_REMOUNT | MS_RDONLY, NULL);
Expand All @@ -660,11 +656,11 @@ void ruri_run_rootless_chroot_container(struct RURI_CONTAINER *_Nonnull containe
// Check binary used.
check_binary(container);
// chroot(2) into container.
if (try_pivot_root(container) == -1) {
chdir(container->container_dir);
chroot(".");
chdir("/");
chdir(container->container_dir);
if (chroot(".") == -1) {
ruri_error("{red}Error: failed to chroot(2) into container QwQ\n");
}
chdir("/");
// Change to the work dir.
if (container->work_dir != NULL) {
if (chdir(container->work_dir) == -1 && !container->no_warnings) {
Expand Down
74 changes: 61 additions & 13 deletions src/rootless.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container)
/*
* Setup namespaces and run rootless container.
*/
ruri_read_info(container, container->container_dir);
uid_t uid = geteuid();
gid_t gid = getegid();
bool set_id_map_succeed = false;
Expand All @@ -222,30 +223,76 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container)
// to change the parent process's id map.
pid_t pid_1 = fork();
if (pid_1 > 0) {
// Enable user namespace.
try_unshare(CLONE_NEWUSER);
int stat = 0;
waitpid(pid_1, &stat, 0);
if (WEXITSTATUS(stat) == 0) {
if (container->ns_pid < 0) {
// Enable user namespace.
try_unshare(CLONE_NEWUSER);
int stat = 0;
waitpid(pid_1, &stat, 0);
if (WEXITSTATUS(stat) == 0) {
set_id_map_succeed = true;
}
} else {
char user_ns[PATH_MAX] = { '\0' };
sprintf(user_ns, "/proc/%d/ns/user", container->ns_pid);
int user_ns_fd = open(user_ns, O_RDONLY | O_CLOEXEC);
if (user_ns_fd < 0) {
ruri_error("{red}Failed to open %s\n", user_ns);
}
if (setns(user_ns_fd, CLONE_NEWUSER) == -1) {
ruri_error("{red}Failed to setns(2) to %s\n", user_ns);
}
set_id_map_succeed = true;
}
} else {
// To ensure that unshare(2) finished in parent process.
usleep(1000);
int stat = try_setup_idmap(ppid, uid, gid);
exit(stat);
if (container->ns_pid < 0) {
// To ensure that unshare(2) finished in parent process.
usleep(1000);
int stat = try_setup_idmap(ppid, uid, gid);
exit(stat);
} else {
exit(0);
}
}
if (container->ns_pid > 0 && set_id_map_succeed) {
char mnt_ns[PATH_MAX] = { '\0' };
sprintf(mnt_ns, "/proc/%d/ns/mnt", container->ns_pid);
int mnt_ns_fd = open(mnt_ns, O_RDONLY | O_CLOEXEC);
if (mnt_ns_fd < 0) {
ruri_error("{red}Failed to open %s\n", mnt_ns);
}
if (setns(mnt_ns_fd, CLONE_NEWNS) == -1) {
ruri_error("{red}Failed to setns(2) to %s\n", mnt_ns);
}
close(mnt_ns_fd);
char pid_ns[PATH_MAX] = { '\0' };
sprintf(pid_ns, "/proc/%d/ns/pid", container->ns_pid);
int pid_ns_fd = open(pid_ns, O_RDONLY | O_CLOEXEC);
if (pid_ns_fd < 0) {
ruri_error("{red}Failed to open %s\n", pid_ns);
}
if (setns(pid_ns_fd, CLONE_NEWPID) == -1) {
ruri_error("{red}Failed to setns(2) to %s\n", pid_ns);
}
close(pid_ns_fd);
} else {
// We need to own mount namespace.
try_unshare(CLONE_NEWNS);
// Seems we need to own a new pid namespace for mount procfs.
try_unshare(CLONE_NEWPID);
}
// We need to own mount namespace.
try_unshare(CLONE_NEWNS);
// Seems we need to own a new pid namespace for mount procfs.
try_unshare(CLONE_NEWPID);
// fork(2) into new namespaces we created.
pid_t pid = fork();
if (pid > 0) {
if (!set_id_map_succeed && !container->no_warnings) {
ruri_warning("{yellow}Check if uidmap is installed on your host, command like su will run failed without uidmap.\n");
set_id_map(uid, gid);
}
usleep(1000);
container->ns_pid = pid;
if (container->use_rurienv && !container->just_chroot) {
ruri_store_info(container);
}
// Wait for child process to exit.
int stat = 0;
waitpid(pid, &stat, 0);
exit(stat);
Expand All @@ -256,6 +303,7 @@ void ruri_run_rootless_container(struct RURI_CONTAINER *_Nonnull container)
if (!container->just_chroot) {
init_rootless_container(container);
}
usleep(1000);
ruri_run_rootless_chroot_container(container);
}
}
2 changes: 1 addition & 1 deletion src/rurienv.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ struct RURI_CONTAINER *ruri_read_info(struct RURI_CONTAINER *_Nullable container
container->cpupercent = INIT_VALUE;
// Check if ns_pid is a ruri process.
// If not, that means the container is not running.
if (container->enable_unshare && !is_ruri_pid(k2v_get_key(int, "ns_pid", buf))) {
if ((container->enable_unshare || container->rootless) && !is_ruri_pid(k2v_get_key(int, "ns_pid", buf))) {
ruri_log("{base}pid %d is not a ruri process.\n", k2v_get_key(int, "ns_pid", buf));
// Unset immutable flag of .rurienv.
fd = open(file, O_RDONLY | O_CLOEXEC);
Expand Down

0 comments on commit 8409170

Please sign in to comment.