From 10db894dcacff8ee507234c2930b3a9a1188cef7 Mon Sep 17 00:00:00 2001 From: Lucas Jacques Date: Thu, 19 Dec 2024 18:30:42 +0100 Subject: [PATCH] feat: add ravel jailer --- Makefile | 9 +- agent/machinerunner/destroy.go | 12 +- agent/machinerunner/prepare.go | 2 +- agent/machinerunner/run.go | 7 +- client/daemon.go | 2 +- cmd/jailer/jailer.go | 15 ++ core/config/config.go | 40 ++++- core/config/runtime.go | 7 +- core/instance/interfaces.go | 6 +- core/jailer/chroot.go | 60 +++++++ core/jailer/flags.go | 68 ++++++++ core/jailer/jail.go | 266 +++++++++++++++++++++++++++++ core/jailer/jaildir.go | 181 ++++++++++++++++++++ core/jailer/jailer.go | 253 +++++++++++++++++++++++++++ core/jailer/run.go | 74 ++++++++ core/jailer/util.go | 44 +++++ core/networking/tap/prepare.go | 4 +- core/networking/tap/tap.go | 4 +- go.mod | 26 +-- go.sum | 61 ++++--- internal/cgroups/cgroups.go | 16 ++ internal/resources/resources.go | 20 +++ pkg/cloudhypervisor/vmm.go | 68 ++------ runtime/instance_create.go | 11 +- runtime/instancerunner/vmrunner.go | 49 +++--- runtime/logging/instance_logger.go | 16 +- runtime/networking.go | 7 +- runtime/runtime.go | 36 +++- runtime/state.go | 2 - runtime/user.go | 60 +++++++ runtime/vm/builder.go | 241 +++++++++++++++++++------- runtime/vm/dir.go | 46 ----- runtime/vm/filesystem.go | 19 ++- runtime/vm/initrd.go | 37 ++-- runtime/vm/vm.go | 56 ++---- 35 files changed, 1495 insertions(+), 330 deletions(-) create mode 100644 cmd/jailer/jailer.go create mode 100644 core/jailer/chroot.go create mode 100644 core/jailer/flags.go create mode 100644 core/jailer/jail.go create mode 100644 core/jailer/jaildir.go create mode 100644 core/jailer/jailer.go create mode 100644 core/jailer/run.go create mode 100644 core/jailer/util.go create mode 100644 internal/cgroups/cgroups.go create mode 100644 internal/resources/resources.go create mode 100644 runtime/user.go delete mode 100644 runtime/vm/dir.go diff --git a/Makefile b/Makefile index 75d242e..f60c18a 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,14 @@ -build-init: - CGO_ENABLED=0 go build -o bin/ravel-init -ldflags="-s -w" cmd/ravel-init/*.go +run-raveld: + sudo go run cmd/ravel/ravel.go daemon -c ravel.toml +run-api: + air build-ravel: CGO_ENABLED=0 go build -o bin/ravel cmd/ravel/ravel.go +build-jailer: + CGO_ENABLED=0 go build -o bin/jailer cmd/jailer/jailer.go + install-ravel: build-ravel sudo cp ./bin/ravel /usr/bin/ravel protoc: diff --git a/agent/machinerunner/destroy.go b/agent/machinerunner/destroy.go index 8434dab..834a5ee 100644 --- a/agent/machinerunner/destroy.go +++ b/agent/machinerunner/destroy.go @@ -18,10 +18,7 @@ func (m *MachineRunner) Destroy(ctx context.Context, force bool) error { status := m.state.Status() if status == api.MachineStatusStopped { - if err := m.state.PushDestroyEvent(api.OriginUser, force, "requested by user"); err != nil { - return err - } - go m.destroyImpl(ctx) + go m.destroyImpl(ctx, force, "requested by user") return nil } @@ -37,12 +34,15 @@ func (m *MachineRunner) Destroy(ctx context.Context, force bool) error { return err } - go m.destroyImpl(ctx) + go m.destroyImpl(ctx, force, "requested by user") return nil } -func (m *MachineRunner) destroyImpl(ctx context.Context) error { +func (m *MachineRunner) destroyImpl(ctx context.Context, force bool, reason string) error { + if err := m.state.PushDestroyEvent(api.OriginUser, force, reason); err != nil { + return err + } err := m.runtime.DestroyInstance(ctx, m.state.InstanceId()) if err != nil && !errdefs.IsNotFound(err) { slog.Error("failed to destroy instance", "instance", m.state.InstanceId(), "error", err) diff --git a/agent/machinerunner/prepare.go b/agent/machinerunner/prepare.go index 2f45c68..5ed4fb0 100644 --- a/agent/machinerunner/prepare.go +++ b/agent/machinerunner/prepare.go @@ -48,5 +48,5 @@ func (m *MachineRunner) onPrepareFailed(msg string) { slog.Error("Failed to push PrepareFailed event", "error", err) } - m.destroyImpl(context.Background()) + m.destroyImpl(context.Background(), true, "prepare failed") } diff --git a/agent/machinerunner/run.go b/agent/machinerunner/run.go index 668fb45..b7805b0 100644 --- a/agent/machinerunner/run.go +++ b/agent/machinerunner/run.go @@ -6,6 +6,7 @@ import ( "time" "github.com/valyentdev/ravel/api" + "github.com/valyentdev/ravel/core/errdefs" "github.com/valyentdev/ravel/core/instance" ) @@ -21,6 +22,10 @@ func (m *MachineRunner) Run() { ctx := context.Background() updates, err := m.runtime.WatchInstanceState(ctx, m.state.InstanceId()) if err != nil { + if errdefs.IsNotFound(err) { + m.destroyImpl(ctx, true, "instance not found") + } + slog.Error("failed to watch instance state", "machine_id", m.state.Id(), "error", err) return } @@ -52,7 +57,7 @@ func (m *MachineRunner) recover(ctx context.Context) bool { return false case api.MachineStatusDestroying: - err := m.destroyImpl(ctx) + err := m.destroyImpl(ctx, true, "recovering from destroy") if err != nil { slog.Error("failed to destroy machine", "machine_id", m.state.Id(), "error", err) } diff --git a/client/daemon.go b/client/daemon.go index d727ca5..7be5c51 100644 --- a/client/daemon.go +++ b/client/daemon.go @@ -34,7 +34,7 @@ func NewDaemonClient(socket string) *DaemonClient { func (a *DaemonClient) CreateInstance(ctx context.Context, options daemon.InstanceOptions) (*instance.Instance, error) { var instance instance.Instance - err := a.client.Post(ctx, "/instances", options, httpclient.WithJSONBody(&options)) + err := a.client.Post(ctx, "/instances", nil, httpclient.WithJSONBody(&options)) if err != nil { return nil, err } diff --git a/cmd/jailer/jailer.go b/cmd/jailer/jailer.go new file mode 100644 index 0000000..1a52597 --- /dev/null +++ b/cmd/jailer/jailer.go @@ -0,0 +1,15 @@ +package main + +import ( + "log/slog" + "os" + + "github.com/valyentdev/ravel/core/jailer" +) + +func main() { + if err := jailer.Run(); err != nil { + slog.Error("jailer run failed", "error", err) + os.Exit(1) + } +} diff --git a/core/config/config.go b/core/config/config.go index e6c3770..20d7747 100644 --- a/core/config/config.go +++ b/core/config/config.go @@ -2,6 +2,8 @@ package config import ( "encoding/json" + "errors" + "fmt" "os" "strings" @@ -33,6 +35,41 @@ type RavelConfig struct { Registries registry.RegistriesConfig `json:"registries" toml:"registries"` } +// never display data because it contains secrets +func fmtDecodeError(err *toml.DecodeError) error { + line, column := err.Position() + + return fmt.Errorf("%s %s at %s", err.Error(), strings.Join(err.Key(), "."), fmt.Sprintf("line %d, column %d", line, column)) + +} + +func joinErrors(errs ...error) error { + var errStr string + + for _, err := range errs { + errStr += err.Error() + "\n" + } + return errors.New(errStr) +} + +func buildTomlError(err error) error { + smeErr, ok := err.(*toml.StrictMissingError) + if ok { + var errs []error + for _, e := range smeErr.Errors { + errs = append(errs, fmtDecodeError(&e)) + } + return joinErrors(errs...) + } + + decodeErr, ok := err.(*toml.DecodeError) + if ok { + return joinErrors(fmtDecodeError(decodeErr)) + } + + return fmt.Errorf("toml error: %w", err) +} + func ReadFile(path string) (RavelConfig, error) { var config RavelConfig @@ -46,7 +83,8 @@ func ReadFile(path string) (RavelConfig, error) { decoder = decoder.DisallowUnknownFields() err = decoder.Decode(&config) if err != nil { - return config, err + tomlErr := err.(*toml.StrictMissingError) + return config, buildTomlError(tomlErr) } } else { err = json.Unmarshal(bytes, &config) diff --git a/core/config/runtime.go b/core/config/runtime.go index 6f20eab..5cc8d99 100644 --- a/core/config/runtime.go +++ b/core/config/runtime.go @@ -1,7 +1,8 @@ package config type RuntimeConfig struct { - InitBinary string `json:"init_binary" toml:"init_binary"` - LinuxKernel string `json:"linux_kernel" toml:"linux_kernel"` - Snapshotter string `json:"snapshotter" toml:"snapshotter"` + CloudHypervisorBinary string `json:"cloud_hypervisor_binary" toml:"cloud_hypervisor_binary"` + JailerBinary string `json:"jailer_binary" toml:"jailer_binary"` + InitBinary string `json:"init_binary" toml:"init_binary"` + LinuxKernel string `json:"linux_kernel" toml:"linux_kernel"` } diff --git a/core/instance/interfaces.go b/core/instance/interfaces.go index 0fb44ca..cb25e2b 100644 --- a/core/instance/interfaces.go +++ b/core/instance/interfaces.go @@ -4,7 +4,6 @@ import ( "context" "time" - "github.com/containerd/containerd/v2/client" "github.com/valyentdev/ravel/api" ) @@ -33,7 +32,7 @@ type Handle struct { } type VM interface { - Start(ctx context.Context) (Handle, error) + Start(ctx context.Context) error Exec(ctx context.Context, cmd []string, timeout time.Duration) (*api.ExecResult, error) Run() ExitResult WaitExit(ctx context.Context) bool @@ -43,9 +42,8 @@ type VM interface { } type Builder interface { - PrepareInstance(ctx context.Context, instance *Instance, image client.Image) error BuildInstanceVM(ctx context.Context, instance *Instance) (VM, error) - RecoverInstanceVM(ctx context.Context, instance *Instance) (VM, Handle, error) + RecoverInstanceVM(ctx context.Context, instance *Instance) (VM, error) CleanupInstanceVM(ctx context.Context, instance *Instance) error CleanupInstance(ctx context.Context, instance *Instance) error } diff --git a/core/jailer/chroot.go b/core/jailer/chroot.go new file mode 100644 index 0000000..2425c83 --- /dev/null +++ b/core/jailer/chroot.go @@ -0,0 +1,60 @@ +package jailer + +import ( + "fmt" + "path/filepath" + "syscall" + + "golang.org/x/sys/unix" +) + +func chroot(path string) error { + rootDir := "/" + + err := unix.Mount("", rootDir, "", syscall.MS_SLAVE|syscall.MS_REC, "") + if err != nil { + return fmt.Errorf("failed to mount: %w", err) + } + + err = unix.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REC, "") + if err != nil { + return err + } + + err = syscall.Chdir(path) + if err != nil { + return fmt.Errorf("failed to chdir: %w", err) + } + + err = syscall.Mkdir("old_root", 0755) + if err != nil { + return fmt.Errorf("failed to create old_root: %w", err) + } + + oldRootAbs, err := filepath.Abs("./old_root") + if err != nil { + return fmt.Errorf("failed to get absolute path of old root: %w", err) + } + + err = syscall.PivotRoot(path, oldRootAbs) + if err != nil { + return fmt.Errorf("failed to pivot_root: %w", err) + } + + err = syscall.Chdir("/") + if err != nil { + return fmt.Errorf("failed to chdir: %w", err) + } + + err = syscall.Unmount("old_root", syscall.MNT_DETACH) + if err != nil { + return fmt.Errorf("failed to unmount old_root: %w", err) + } + + err = syscall.Rmdir("old_root") + if err != nil { + return fmt.Errorf("failed to remove old_root: %w", err) + } + + return nil +} diff --git a/core/jailer/flags.go b/core/jailer/flags.go new file mode 100644 index 0000000..97ab6d6 --- /dev/null +++ b/core/jailer/flags.go @@ -0,0 +1,68 @@ +package jailer + +import ( + "flag" + "os" + "os/exec" + "strconv" +) + +func setupJailerFlags(config *JailerConfig) *flag.FlagSet { + jailerFlagSet := flag.NewFlagSet("", flag.ExitOnError) + jailerFlagSet.Bool("help h", false, "Show help") + jailerFlagSet.IntVar(&config.Uid, "uid", 0, "UID of the process") + jailerFlagSet.IntVar(&config.Gid, "gid", 0, "GID of the process") + jailerFlagSet.StringVar(&config.Netns, "netns", "", "Network namespace to join") + jailerFlagSet.StringVar(&config.NewRoot, "new-root", "", "New root directory") + jailerFlagSet.BoolVar(&config.NewPid, "new-pid", false, "Create new PID namespace") + jailerFlagSet.IntVar(&config.NoFiles, "rlimit-nofiles", 0, "Number of open files") + jailerFlagSet.IntVar(&config.Fsize, "rlimit-fsize", 0, "File size limit") + jailerFlagSet.BoolVar(&config.MountProc, "mount-proc", false, "Mount /proc inside the jail") + jailerFlagSet.StringVar(&config.Cgroup, "cgroup", "", "CGroup to join") + return jailerFlagSet +} + +func makeCmd(jailer string, command []string, opts *options) *exec.Cmd { + args := []string{ + jailer, + "exec", + "--uid", strconv.FormatInt(int64(opts.Uid), 10), + "--gid", strconv.FormatInt(int64(opts.Gid), 10), + "--new-root", opts.NewRoot, + } + + if opts.netns != "" { + args = append(args, "--netns", opts.netns) + } + + if opts.newPidNS { + args = append(args, "--new-pid") + } + + if opts.mountProc { + args = append(args, "--mount-proc") + } + + if opts.setRlimits { + args = append(args, "--rlimit-fsize", strconv.FormatInt(int64(opts.fsize), 10)) + args = append(args, "--rlimit-nofiles", strconv.FormatInt(int64(opts.noFiles), 10)) + } + + if opts.cgroup != "" { + args = append(args, "--cgroup", opts.cgroup) + } + + args = append(args, "--") + args = append(args, command...) + + cmd := &exec.Cmd{ + Path: jailer, + Args: args, + Env: []string{}, + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + return cmd +} diff --git a/core/jailer/jail.go b/core/jailer/jail.go new file mode 100644 index 0000000..54bb8e6 --- /dev/null +++ b/core/jailer/jail.go @@ -0,0 +1,266 @@ +package jailer + +import ( + "fmt" + "os" + "os/exec" + "path" + "syscall" + + "golang.org/x/sys/unix" +) + +type Jail struct { + jailer string + *jailDir + options *options +} + +func (j *Jail) Command(cmd string, args ...string) *exec.Cmd { + return makeCmd(j.jailer, append([]string{cmd}, args...), j.options) +} + +type JailConfig struct { + Uid int + Gid int + NewRoot string +} + +type Device struct { + Path string // Path to the device in the jail + Mode uint32 // Mode of the device + Dev uint64 // Device number +} + +type file struct { + Src string // Absolute path to the source file in the host + Dst string // Destination file relative in the jail + Mode uint32 + Readonly bool +} + +type options struct { + JailConfig + setRlimits bool + noFiles int + fsize int + devices []Device + cgroup string + netns string + copyFiles []file + hardLinks []file + newPidNS bool + mountProc bool +} + +func WithCopyFile(src string, dst string, mode uint32) Opt { + return func(o *options) error { + o.copyFiles = append(o.copyFiles, file{Src: src, Dst: dst, Mode: mode}) + return nil + } +} + +func WithBinary(src string, dst string) Opt { + return func(o *options) error { + o.copyFiles = append(o.copyFiles, file{Src: src, Dst: src, Mode: 0700}) + return nil + } +} + +func WithHardLink(src string, dst string, readonly bool) Opt { + return func(o *options) error { + o.hardLinks = append(o.hardLinks, file{Src: src, Dst: dst}) + return nil + } +} + +func WithNewPidNS() Opt { + return func(o *options) error { + o.newPidNS = true + return nil + } +} + +func WithNetNs(netns string) Opt { + return func(o *options) error { + o.netns = netns + return nil + } +} + +func WithCgroup(cgroup string) Opt { + return func(o *options) error { + o.cgroup = cgroup + return nil + } +} + +func WithResourceLimits(noFiles, fsize int) Opt { + return func(o *options) error { + o.setRlimits = true + o.noFiles = noFiles + o.fsize = fsize + return nil + } +} + +func WithKVM() Opt { + return func(o *options) error { + o.devices = append(o.devices, Device{ + Path: devKVMPath, + Mode: devKVMMode, + Dev: unix.Mkdev(devKVMMajor, devKVMMinor), + }) + return nil + } +} + +func WithTUN() Opt { + return func(o *options) error { + o.devices = append(o.devices, Device{ + Path: devTUNPath, + Mode: devTUNMode, + Dev: unix.Mkdev(devTUNMajor, devTUNMinor), + }) + return nil + } +} + +func WithBlockDevice(device string) Opt { + return func(o *options) error { + stat, err := os.Stat(device) + if err != nil { + return err + } + rdev := stat.Sys().(*syscall.Stat_t).Rdev + + o.devices = append(o.devices, Device{ + Path: device, + Mode: 0600 | unix.S_IFBLK, + Dev: rdev, + }) + + return nil + } +} + +func WithURandom() Opt { + return func(o *options) error { + o.devices = append(o.devices, Device{ + Path: devURandomPath, + Mode: devURandomMode, + Dev: unix.Mkdev(devURandomMajor, devURandomMinor), + }) + + return nil + } +} + +func WithMountProc() Opt { + return func(o *options) error { + o.mountProc = true + return nil + } +} + +type Opt func(*options) error + +func makeOptions(jailConfig JailConfig, opts ...Opt) (*options, error) { + o := &options{ + JailConfig: jailConfig, + } + + for _, opt := range opts { + err := opt(o) + if err != nil { + return nil, err + } + } + + return o, nil +} + +func (j *Jail) setupDevices() error { + err := j.Mkdir("/dev") + if err != nil { + return fmt.Errorf("failed to create /dev: %w", err) + } + + for _, dev := range j.options.devices { + dir := path.Dir(dev.Path) + err = j.MkdirAll(dir) + if err != nil { + return fmt.Errorf("failed to create %s: %w", dir, err) + } + + err = j.MknodAndOwn(dev.Path, dev.Mode, dev.Dev) + if err != nil { + return fmt.Errorf("failed to mknod %s: %w", dev.Path, err) + } + + } + return nil +} + +func (j *Jail) setupFiles() error { + for _, file := range j.options.copyFiles { + dir := path.Dir(file.Dst) + err := j.MkdirAll(dir) + if err != nil { + return fmt.Errorf("failed to create %s: %w", dir, err) + } + + err = j.CopyFile(file.Src, file.Dst, file.Mode) + if err != nil { + return fmt.Errorf("failed to copy file %s to %s: %w", file.Src, file.Dst, err) + } + } + + for _, file := range j.options.hardLinks { + dir := path.Dir(file.Dst) + err := j.MkdirAll(dir) + if err != nil { + return fmt.Errorf("failed to create %s: %w", dir, err) + } + err = j.HardLink(file.Src, file.Dst, file.Readonly) + if err != nil { + return fmt.Errorf("failed to hard link %s to %s: %w", file.Src, file.Dst, err) + } + } + + return nil +} + +func CleanupJailDir(dir string) error { + return os.RemoveAll(dir) +} + +func CreateJail(jailer string, jailConfig JailConfig, opts ...Opt) (*Jail, error) { + o, err := makeOptions(jailConfig, opts...) + if err != nil { + return nil, err + } + + dir, err := createJailDir(jailConfig.NewRoot, jailConfig.Uid, jailConfig.Gid) + if err != nil { + return nil, fmt.Errorf("failed to create jail dir: %w", err) + } + + j := &Jail{ + jailDir: dir, + jailer: jailer, + options: o, + } + + err = j.setupDevices() + if err != nil { + return nil, fmt.Errorf("failed to setup devices: %w", err) + } + + err = j.setupFiles() + if err != nil { + return nil, fmt.Errorf("failed to setup files: %w", err) + } + + return j, nil +} diff --git a/core/jailer/jaildir.go b/core/jailer/jaildir.go new file mode 100644 index 0000000..5311d4f --- /dev/null +++ b/core/jailer/jaildir.go @@ -0,0 +1,181 @@ +package jailer + +import ( + "fmt" + "io" + "os" + "path" + + "golang.org/x/sys/unix" +) + +const ( + devPerms = 0o600 + devKVMPath = "/dev/kvm" + devKVMMajor = 10 + devKVMMinor = 232 + devKVMMode = unix.S_IFCHR | devPerms + + devTUNPath = "/dev/net/tun" + devTUNMajor = 10 + devTUNMinor = 200 + devTUNMode = unix.S_IFCHR | devPerms + + devURandomPath = "/dev/urandom" + devURandomMajor = 1 + devURandomMinor = 9 + devURandomMode = unix.S_IFCHR | devPerms +) + +type jailDir struct { + path string + uid int + gid int +} + +func createJailDir(dir string, uid, gid int) (*jailDir, error) { + err := mkdirAndChown(dir, uid, gid, 0700) + if err != nil { + return nil, fmt.Errorf("failed to create jail directory: %w", err) + } + + err = mkdirAndChown(path.Join(dir, "/dev"), uid, gid, 0700) + if err != nil { + return nil, fmt.Errorf("failed to create jail /dev directory: %w", err) + } + + return &jailDir{ + path: dir, + uid: uid, + gid: gid, + }, nil +} + +func (j *jailDir) pathInRoot(p string) string { + return path.Join(j.path, p) +} + +func (j *jailDir) MkdirAll(dir string) error { + return mkdirAllAndChown(j.pathInRoot(dir), j.uid, j.gid, 0700) +} + +func (j *jailDir) Mkdir(dir string) error { + return mkdirAllAndChown(j.pathInRoot(dir), j.uid, j.gid, 0700) +} + +func (j *jailDir) MknodAndOwn(device string, mode uint32, dev uint64) error { + devicePath := j.pathInRoot(device) + err := unix.Mknod(devicePath, mode, int(dev)) + if err != nil { + return fmt.Errorf("failed to mknod %s: %w", devicePath, err) + } + + err = unix.Chown(devicePath, j.uid, j.gid) + if err != nil { + return fmt.Errorf("failed to chown %s: %w", devicePath, err) + } + + return nil +} + +func (j *jailDir) AddBlockDevice(device string) error { + stat, err := os.Stat(device) + if err != nil { + return err + } + rdev := stat.Sys().(*unix.Stat_t).Rdev + + err = j.MkdirAll(path.Dir(device)) + if err != nil { + return err + } + + err = unix.Mknod(device, 0600|unix.S_IFBLK, int(rdev)) + if err != nil { + return err + } + + err = unix.Chown(device, j.uid, j.gid) + if err != nil { + return err + } + + return nil +} + +func (j *jailDir) CopyFile(src, dst string, mode uint32) error { + dstPath := j.pathInRoot(dst) + srcFile, err := os.Open(src) + if err != nil { + return err + } + + defer srcFile.Close() + + dstFile, err := os.Create(dstPath) + if err != nil { + return err + } + + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return err + } + + err = unix.Chown(dstPath, j.uid, j.gid) + if err != nil { + return err + } + + err = unix.Chmod(dstPath, mode) + if err != nil { + return err + } + + return nil +} + +func (j *jailDir) CreateFile(path string, mode uint32) (*os.File, error) { + pathInRoot := j.pathInRoot(path) + file, err := os.Create(pathInRoot) + if err != nil { + return nil, err + } + + err = unix.Chown(pathInRoot, j.uid, j.gid) + if err != nil { + return nil, err + } + + err = unix.Chmod(pathInRoot, mode) + if err != nil { + return nil, err + } + + return file, nil +} + +func (j *jailDir) HardLink(src string, dst string, readonly bool) error { + dstPath := j.pathInRoot(dst) + + err := os.Link(src, dstPath) + if err != nil { + return err + } + + err = unix.Chown(dstPath, j.uid, j.gid) + if err != nil { + return err + } + + if readonly { + err = os.Chmod(dstPath, 0400) + if err != nil { + return err + } + } + + return nil +} diff --git a/core/jailer/jailer.go b/core/jailer/jailer.go new file mode 100644 index 0000000..51d1863 --- /dev/null +++ b/core/jailer/jailer.go @@ -0,0 +1,253 @@ +package jailer + +import ( + "fmt" + "math" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "syscall" + + "github.com/valyentdev/ravel/internal/cgroups" + "github.com/vishvananda/netns" + + "golang.org/x/sys/execabs" + "golang.org/x/sys/unix" +) + +const defaultFolderPerm = 0o700 + +type JailerConfig struct { + Uid int + Gid int + NewRoot string + Netns string + NewPid bool + Command []string + NoFiles int + Fsize int + MountProc bool + Cgroup string +} + +func setupRLimits(config *JailerConfig) error { + if config.NoFiles != 0 { + err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{ + Cur: uint64(config.NoFiles), + Max: uint64(config.NoFiles), + }) + if err != nil { + return fmt.Errorf("failed to set rlimit nofile: %w", err) + } + } + + if config.Fsize != 0 { + err := unix.Setrlimit(unix.RLIMIT_FSIZE, &unix.Rlimit{ + Cur: uint64(config.Fsize), + Max: uint64(config.Fsize), + }) + if err != nil { + return fmt.Errorf("failed to set rlimit fsize: %w", err) + } + } + + return nil +} + +func joinNetNs(ns string) error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + h, err := netns.GetFromName(ns) + if err != nil { + return err + } + + err = netns.Set(h) + if err != nil { + return err + } + + return nil +} + +func closeFileDescriptors() error { + _, _, err := unix.Syscall(unix.SYS_CLOSE_RANGE, 3, math.MaxUint32, unix.CLOSE_RANGE_UNSHARE) + if err != 0 { + return err + } + return nil +} + +func sanitizeProcess() error { + os.Clearenv() + err := closeFileDescriptors() + if err != nil { + return err + } + + return nil +} + +func mountProc(uid, gid int) error { + err := mkdirAndChown("/proc", uid, gid, defaultFolderPerm) + if err != nil { + return fmt.Errorf("failed to create /proc: %w", err) + } + + err = unix.Mount("proc", "/proc", "proc", 0, "") + if err != nil { + return err + } + + return nil +} + +// for cloud-hypervisor +func populateVirtualNetDevices(gid, uid int, newRoot string) error { + tocreate := [...]string{"/sys", "/sys/class", "/sys/class/net"} + + source := "/sys/devices/virtual/net" + destPath := path.Join(newRoot, "/sys/class/net") + var err error + for _, dir := range tocreate { + err := mkdirAndChown(path.Join(newRoot, dir), uid, gid, defaultFolderPerm) + if err != nil { + return fmt.Errorf("failed to create %s: %w", dir, err) + } + } + + entries, err := os.ReadDir(source) + if err != nil { + return fmt.Errorf("failed to read /sys/devices/virtual/net: %w", err) + } + + for _, entry := range entries { + err = mkdirAndChown(path.Join(destPath, entry.Name()), uid, gid, defaultFolderPerm) + if err != nil { + return fmt.Errorf("failed to create %s: %w", entry.Name(), err) + } + } + + return nil +} + +func runJailed(config *JailerConfig) error { + newRootAbsolute, err := filepath.Abs(config.NewRoot) + if err != nil { + return fmt.Errorf("failed to get absolute path of new root: %w", err) + } + + err = populateVirtualNetDevices(config.Gid, config.Uid, newRootAbsolute) + if err != nil { + return fmt.Errorf("failed to populate virtual net devices: %w", err) + } + + err = chroot(newRootAbsolute) + if err != nil { + return fmt.Errorf("failed to chroot: %w", err) + } + + if config.MountProc { + err = mountProc(config.Uid, config.Gid) + if err != nil { + return fmt.Errorf("failed to mount proc: %w", err) + } + } + + null := os.NewFile(uintptr(unix.Stdin), "/dev/null") + fifoPath := "vm.logs" + + err = mkFifo(fifoPath, config.Uid, config.Gid, 0o600) + if err != nil { + return fmt.Errorf("failed to create fifo: %w", err) + } + + fd, err := unix.Open(fifoPath, unix.O_RDWR|unix.O_NONBLOCK, 0) + if err != nil { + return fmt.Errorf("failed to open fifo: %w", err) + } + + fifo := os.NewFile(uintptr(fd), fifoPath) + defer fifo.Close() + + cmd := execabs.Cmd{ + Path: config.Command[0], + Args: config.Command, + Env: []string{}, + Stdin: null, + Stderr: fifo, + Stdout: fifo, + SysProcAttr: &unix.SysProcAttr{ + Credential: &syscall.Credential{ + Uid: uint32(config.Uid), + Gid: uint32(config.Gid), + }, + }, + } + + err = cmd.Run() + if err != nil { + fmt.Println("Failed to run command: ", err) + return err + } + + return nil +} + +func execJailed(config *JailerConfig) error { + err := sanitizeProcess() + if err != nil { + return fmt.Errorf("failed to sanitize process: %w", err) + } + + err = setupRLimits(config) + if err != nil { + return fmt.Errorf("failed to setup rlimits: %w", err) + } + + if config.Netns != "" { + err := joinNetNs(config.Netns) + if err != nil { + return fmt.Errorf("failed to join network namespace: %w", err) + } + } + + cloneFlags := syscall.CLONE_NEWNS + if config.NewPid { + cloneFlags |= syscall.CLONE_NEWPID + } + + if config.Cgroup != "" { + err := cgroups.JoinCgroup(config.Cgroup) + if err != nil { + return fmt.Errorf("failed to join cgroup: %w", err) + } + } + + err = reexec(uintptr(cloneFlags)) + if err != nil { + return err + } + + return nil +} + +func reexec(cloneFlags uintptr) error { + cmd := exec.Command("/proc/self/exe", append([]string{"run"}, os.Args[2:]...)...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = os.Stdin + + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: cloneFlags, + } + + err := cmd.Run() + if err != nil { + return err + } + return nil +} diff --git a/core/jailer/run.go b/core/jailer/run.go new file mode 100644 index 0000000..50a08be --- /dev/null +++ b/core/jailer/run.go @@ -0,0 +1,74 @@ +package jailer + +import ( + "fmt" + "os" + "slices" +) + +func validateConfig(config *JailerConfig) error { + if config.NewRoot == "" { + return fmt.Errorf("missing new root directory") + } + if len(config.Command) == 0 { + return fmt.Errorf("missing command") + } + if config.Fsize < 0 { + return fmt.Errorf("invalid file size limit") + } + + if config.NoFiles < 0 { + return fmt.Errorf("invalid number of open files") + } + + return nil +} + +func parseArgs() (*JailerConfig, error) { + args := os.Args[2:] + + sepIdx := slices.Index(args, "--") + if sepIdx == -1 { + return nil, fmt.Errorf("missing '--' separator and command") + } + if sepIdx+1 >= len(args) { + return nil, fmt.Errorf("missing command") + } + + jailerArgs := args[:sepIdx] + command := args[sepIdx+1:] + + var config JailerConfig + jailerFlagSet := setupJailerFlags(&config) + err := jailerFlagSet.Parse(jailerArgs) + if err != nil { + return nil, err + } + + config.Command = command + + return &config, validateConfig(&config) +} + +func Run() error { + if len(os.Args) < 2 { + return fmt.Errorf("usage: %s -- ", os.Args[0]) + } + + action := os.Args[1] + + config, err := parseArgs() + if err != nil { + return err + } + + if action == "exec" { + return execJailed(config) + } + + if action == "run" { + return runJailed(config) + } + + return fmt.Errorf("unknown action: %s", action) +} diff --git a/core/jailer/util.go b/core/jailer/util.go new file mode 100644 index 0000000..efb9e96 --- /dev/null +++ b/core/jailer/util.go @@ -0,0 +1,44 @@ +package jailer + +import ( + "os" + + "golang.org/x/sys/unix" +) + +func mkdirAndChown(path string, uid, gid int, perm os.FileMode) error { + err := os.Mkdir(path, perm) + if err != nil { + return err + } + + err = os.Chown(path, uid, gid) + if err != nil { + return err + } + + return nil +} + +func mkdirAllAndChown(path string, uid, gid int, perm os.FileMode) error { + err := os.MkdirAll(path, perm) + if err != nil { + return err + } + + err = os.Chown(path, uid, gid) + if err != nil { + return err + } + + return nil +} + +func mkFifo(path string, uid, gid int, mode uint32) error { + err := unix.Mkfifo(path, mode) + if err != nil { + return err + } + + return os.Chown(path, uid, gid) +} diff --git a/core/networking/tap/prepare.go b/core/networking/tap/prepare.go index cfdbd2b..bde2ce8 100644 --- a/core/networking/tap/prepare.go +++ b/core/networking/tap/prepare.go @@ -4,9 +4,9 @@ import ( "github.com/valyentdev/ravel/core/instance" ) -func PrepareInstanceTapDevice(id string, config instance.NetworkingConfig) (string, error) { +func PrepareInstanceTapDevice(id string, config instance.NetworkingConfig, uid, gid int) (string, error) { tapName := config.TapDevice - err := createTap(tapName) + err := createTap(tapName, uint32(uid), uint32(gid)) if err != nil { return "", err } diff --git a/core/networking/tap/tap.go b/core/networking/tap/tap.go index 0b75d06..f3cb058 100644 --- a/core/networking/tap/tap.go +++ b/core/networking/tap/tap.go @@ -6,8 +6,10 @@ import ( "github.com/vishvananda/netlink" ) -func createTap(name string) error { +func createTap(name string, uid, gid uint32) error { tap := &netlink.Tuntap{ + Owner: uid, + Group: gid, LinkAttrs: netlink.LinkAttrs{ Name: name, }, diff --git a/go.mod b/go.mod index 8b1ae72..e5bba23 100755 --- a/go.mod +++ b/go.mod @@ -5,7 +5,9 @@ go 1.23.2 replace github.com/valyentdev/ravel/api => ./api require ( + github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8 github.com/cloudflare/cfssl v1.6.5 + github.com/containerd/cgroups/v3 v3.0.4 github.com/containerd/containerd/v2 v2.0.0-rc.0 github.com/containerd/errdefs v0.1.0 github.com/coreos/go-iptables v0.7.0 @@ -26,7 +28,7 @@ require ( github.com/valyentdev/ravel-init v0.1.0 go.etcd.io/bbolt v1.3.11 google.golang.org/grpc v1.67.1 - google.golang.org/protobuf v1.35.1 + google.golang.org/protobuf v1.35.2 sigs.k8s.io/yaml v1.4.0 ) @@ -40,7 +42,7 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/Microsoft/hcsshim v0.12.0 // indirect - github.com/containerd/cgroups/v3 v3.0.3 // indirect + github.com/cilium/ebpf v0.16.0 // indirect github.com/containerd/continuity v0.4.3 // indirect github.com/containerd/fifo v1.1.0 // indirect github.com/containerd/log v0.1.0 // indirect @@ -49,6 +51,7 @@ require ( github.com/containerd/stargz-snapshotter/estargz v0.14.3 // indirect github.com/containerd/ttrpc v1.2.3 // indirect github.com/containerd/typeurl/v2 v2.1.1 // indirect + github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.5.0 // indirect github.com/docker/cli v27.1.1+incompatible // indirect @@ -59,6 +62,7 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/certificate-transparency-go v1.1.7 // indirect @@ -103,8 +107,9 @@ require ( go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect - golang.org/x/mod v0.17.0 // indirect - golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect + golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect + golang.org/x/mod v0.22.0 // indirect + golang.org/x/tools v0.27.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/klog/v2 v2.120.1 // indirect @@ -113,7 +118,6 @@ require ( ) require ( - github.com/containerd/console v1.0.4 github.com/mdlayher/vsock v1.2.1 github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/runc v1.1.15 // indirect @@ -121,10 +125,10 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/valyentdev/ravel/api v0.0.0-00010101000000-000000000000 github.com/vishvananda/netlink v1.3.0 - github.com/vishvananda/netns v0.0.4 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.30.0 - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 - golang.org/x/text v0.19.0 // indirect + github.com/vishvananda/netns v0.0.4 + golang.org/x/crypto v0.29.0 // indirect + golang.org/x/net v0.31.0 + golang.org/x/sync v0.9.0 // indirect + golang.org/x/sys v0.27.0 + golang.org/x/text v0.20.0 // indirect ) diff --git a/go.sum b/go.sum index b86f501..4ac5881 100755 --- a/go.sum +++ b/go.sum @@ -19,15 +19,17 @@ github.com/Microsoft/hcsshim v0.12.0 h1:rbICA+XZFwrBef2Odk++0LjFvClNCJGRK+fsrP25 github.com/Microsoft/hcsshim v0.12.0/go.mod h1:RZV12pcHCXQ42XnlQ3pz6FZfmrC1C+R4gaOHhRNML1g= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8 h1:SjZ2GvvOononHOpK84APFuMvxqsk3tEIaKH/z4Rpu3g= +github.com/c9s/goprocinfo v0.0.0-20210130143923-c95fcf8c64a8/go.mod h1:uEyr4WpAH4hio6LFriaPkL938XnrvLpNPmQHBdrmbIE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= +github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudflare/cfssl v1.6.5 h1:46zpNkm6dlNkMZH/wMW22ejih6gIaJbzL2du6vD7ZeI= github.com/cloudflare/cfssl v1.6.5/go.mod h1:Bk1si7sq8h2+yVEDrFJiz3d7Aw+pfjjJSZVaD+Taky4= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= -github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= -github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn4ro= -github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= +github.com/containerd/cgroups/v3 v3.0.4 h1:2fs7l3P0Qxb1nKWuJNFiwhp2CqiKzho71DQkDrHJIo4= +github.com/containerd/cgroups/v3 v3.0.4/go.mod h1:SA5DLYnXO8pTGYiAHXz94qvLQTKfVM5GEVisn4jpins= github.com/containerd/containerd/v2 v2.0.0-rc.0 h1:61vGd5A/r8FikzYqc7aj0OFZ82IUV3BGNwticthSO28= github.com/containerd/containerd/v2 v2.0.0-rc.0/go.mod h1:AjRQvzfCgifYCel6+K8/OK5mw6PaQFAGEHOgN0caPAM= github.com/containerd/continuity v0.4.3 h1:6HVkalIp+2u1ZLH1J/pYX2oBVXlJZvh1X1A7bEZ9Su8= @@ -50,6 +52,8 @@ github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9Fqctt github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= github.com/coreos/go-iptables v0.7.0 h1:XWM3V+MPRr5/q51NuWSgU0fqMad64Zyxs8ZUoMsamr8= github.com/coreos/go-iptables v0.7.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= +github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/danielgtaylor/huma/v2 v2.26.0 h1:lON4pIcckuSQJNDi6WkOu0sS7mxvlNkTAGbc3BrRXTc= @@ -87,9 +91,14 @@ github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= +github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= +github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -148,6 +157,11 @@ github.com/jackc/tern/v2 v2.2.3 h1:UWD24+m3zP7eRSlX9vYg2tb6Bf0V161IdOuo4YWWyd4= github.com/jackc/tern/v2 v2.2.3/go.mod h1:EStqJVUowhII9OpCTcZISE1BfpGlwE4oq0oQtHAGuuI= github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= +github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= +github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= +github.com/jsimonetti/rtnetlink v1.3.5 h1:hVlNQNRlLDGZz31gBPicsG7Q53rnlsz1l1Ix/9XlpVA= +github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM= +github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46 h1:veS9QfglfvqAw2e+eeNT/SbGySq8ajECXJ9e4fPoLhY= @@ -169,6 +183,8 @@ github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= +github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos= github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ= github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= @@ -318,6 +334,8 @@ go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGX go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco= go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI= go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU= +go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -326,9 +344,11 @@ golang.org/x/crypto v0.0.0-20201124201722-c8d3bf9c5392/go.mod h1:jdWPYTVW3xRLrWP golang.org/x/crypto v0.0.0-20201208171446-5f87f3452ae9/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -336,8 +356,8 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= -golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -354,8 +374,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -364,8 +384,8 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -381,13 +401,12 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220906165534-d0df966e6959/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -399,8 +418,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -411,8 +430,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= -golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= +golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -442,8 +461,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= -google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= diff --git a/internal/cgroups/cgroups.go b/internal/cgroups/cgroups.go new file mode 100644 index 0000000..a3d38e9 --- /dev/null +++ b/internal/cgroups/cgroups.go @@ -0,0 +1,16 @@ +package cgroups + +import ( + "os" + + "github.com/containerd/cgroups/v3/cgroup2" +) + +func JoinCgroup(group string) error { + m, err := cgroup2.Load(group) + if err != nil { + return err + } + + return m.AddProc(uint64(os.Getpid())) +} diff --git a/internal/resources/resources.go b/internal/resources/resources.go new file mode 100644 index 0000000..ea78336 --- /dev/null +++ b/internal/resources/resources.go @@ -0,0 +1,20 @@ +package resources + +import ( + "errors" + + "github.com/c9s/goprocinfo/linux" +) + +func GetHostCPUFrequency() (int64, error) { + infos, err := linux.ReadCPUInfo("/proc/cpuinfo") + if err != nil { + return 0, err + } + + if len(infos.Processors) == 0 { + return 0, errors.New("no CPU info found") + } + + return int64(infos.Processors[0].MHz), nil +} diff --git a/pkg/cloudhypervisor/vmm.go b/pkg/cloudhypervisor/vmm.go index 1985ca5..4c7d0e6 100644 --- a/pkg/cloudhypervisor/vmm.go +++ b/pkg/cloudhypervisor/vmm.go @@ -6,75 +6,29 @@ import ( "fmt" "net" "net/http" - "os/exec" - "syscall" "time" ) var ErrVMMUnavailable = errors.New("vmm is not available") type VMM struct { - options *vmmOpts - client *ClientWithResponses - httpClient *http.Client + client *ClientWithResponses } -type vmmOpts struct { - sysProcAttr *syscall.SysProcAttr - cloudHypervisorBinaryPath string - args []string +type VMMConfig struct { + CloudHypervisorBinaryPath string + Socket string + AdditionalArgs []string } -type VMMOpt func(*vmmOpts) error - -func WithSysProcAttr(sysProcAttr *syscall.SysProcAttr) VMMOpt { - return func(o *vmmOpts) error { - o.sysProcAttr = sysProcAttr - return nil - } -} - -func WithCloudHypervisorBinaryPath(path string) VMMOpt { - return func(o *vmmOpts) error { - o.cloudHypervisorBinaryPath = path - return nil - } -} - -func (vmm *VMM) StartVMM(ctx context.Context) error { - cmd := exec.Command(vmm.options.cloudHypervisorBinaryPath, vmm.options.args...) - if vmm.options.sysProcAttr != nil { - cmd.SysProcAttr = vmm.options.sysProcAttr - } - - err := cmd.Start() - if err != nil { - return fmt.Errorf("failed to start cloud-hypervisor process: %w", err) - } - - return nil -} - -func NewVMM(socket string, opts ...VMMOpt) (*VMM, error) { - client, conn, err := newCHClient(socket) +func NewVMMClient(socket string) (*VMM, error) { + client, err := newCHClient(socket) if err != nil { return nil, err } - options := vmmOpts{ - cloudHypervisorBinaryPath: "cloud-hypervisor", - args: []string{"--api-socket", socket}, - } - for _, opt := range opts { - if err := opt(&options); err != nil { - return nil, fmt.Errorf("failed to apply option: %w", err) - } - } - vmm := &VMM{ - httpClient: conn, - client: client, - options: &options, + client: client, } return vmm, nil @@ -118,7 +72,7 @@ func (v *VMM) PingVMM(ctx context.Context) (VmmPingResponse, error) { return *res.JSON200, nil } -func newCHClient(socket string) (*ClientWithResponses, *http.Client, error) { +func newCHClient(socket string) (*ClientWithResponses, error) { httpClient := &http.Client{ Transport: &http.Transport{ DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { @@ -129,8 +83,8 @@ func newCHClient(socket string) (*ClientWithResponses, *http.Client, error) { client, err := NewClientWithResponses("http://localhost/api/v1", WithHTTPClient(httpClient)) if err != nil { - return nil, nil, err + return nil, err } - return client, httpClient, nil + return client, nil } diff --git a/runtime/instance_create.go b/runtime/instance_create.go index 80304a0..dd6c0b3 100644 --- a/runtime/instance_create.go +++ b/runtime/instance_create.go @@ -83,16 +83,7 @@ func (r *Runtime) CreateInstance(ctx context.Context, opt instance.InstanceOptio CreatedAt: time.Now(), } - if err := r.instanceBuilder.PrepareInstance(ctx, &i, image); err != nil { - return nil, fmt.Errorf("failed to prepare instance: %w", err) - } - defer func() { - if err != nil { - r.instanceBuilder.CleanupInstance(ctx, &i) - } - }() - - if err := r.instancesStore.PutInstance(i); err != nil { + if err = r.instancesStore.PutInstance(i); err != nil { return nil, fmt.Errorf("failed to save instance: %w", err) } diff --git a/runtime/instancerunner/vmrunner.go b/runtime/instancerunner/vmrunner.go index 8bbb35c..a3f70c1 100644 --- a/runtime/instancerunner/vmrunner.go +++ b/runtime/instancerunner/vmrunner.go @@ -2,6 +2,7 @@ package instancerunner import ( "context" + "fmt" "log/slog" "sync/atomic" "time" @@ -16,9 +17,7 @@ type vmRunner struct { networking instance.NetworkingService vmBuilder instance.Builder logger *logging.InstanceLogger - - i instance.Instance - + i instance.Instance hasStarted atomic.Bool vm instance.VM waitCh chan struct{} @@ -50,14 +49,18 @@ func newVMRunner( } func (r *vmRunner) Recover() error { - vm, h, err := r.vmBuilder.RecoverInstanceVM(context.Background(), &r.i) + vm, err := r.vmBuilder.RecoverInstanceVM(context.Background(), &r.i) if err != nil { slog.Error("failed to recover vm", "error", err) + cerr := r.vmBuilder.CleanupInstanceVM(context.Background(), &r.i) + if cerr != nil { + slog.Error("failed to cleanup vm", "error", cerr) + } return err } r.vm = vm - go r.run(h) + go r.run() r.hasStarted.Store(true) return nil @@ -100,12 +103,12 @@ func (r *vmRunner) Start() error { return err } defer func() { - if err != nil { - err := r.networking.CleanupInstanceNetwork(r.i.Id, r.i.Network) - if err != nil { - slog.Error("failed to cleanup instance network", "error", err) - } - } + // if err != nil { + // err := r.networking.CleanupInstanceNetwork(r.i.Id, r.i.Network) + // if err != nil { + // slog.Error("failed to cleanup instance network", "error", err) + // } + // } }() slog.Debug("building vm") @@ -126,30 +129,34 @@ func (r *vmRunner) Start() error { r.vm = vm slog.Debug("starting vm") - h, err := vm.Start(ctx) + err = vm.Start(ctx) if err != nil { return err } r.hasStarted.Store(true) - go r.run(h) + go r.run() return nil } -func (r *vmRunner) run(h instance.Handle) { - if h.Console != "" { - err := r.logger.Start(h.Console) - if err != nil { - slog.Error("failed to start logger", "error", err) - } +func getLogFile(id string) string { + return fmt.Sprintf("/var/lib/ravel/instances/%s/vm.logs", id) +} - defer r.logger.Stop() +func (r *vmRunner) run() { + err := r.logger.Start(getLogFile(r.i.Id)) + if err != nil { + slog.Error("failed to start logger", "error", err) + err = nil // ignore we must continue } + + defer r.logger.Stop() + result := r.vm.Run() r.exitResult = result - err := r.vmBuilder.CleanupInstanceVM(context.Background(), &r.i) + err = r.vmBuilder.CleanupInstanceVM(context.Background(), &r.i) if err != nil { slog.Error("failed to cleanup vm", "error", err) } diff --git a/runtime/logging/instance_logger.go b/runtime/logging/instance_logger.go index e45851e..541edf0 100644 --- a/runtime/logging/instance_logger.go +++ b/runtime/logging/instance_logger.go @@ -2,11 +2,11 @@ package logging import ( "bufio" + "io" "os" "sync" "time" - "github.com/containerd/console" "github.com/valyentdev/ravel/api" "github.com/valyentdev/ravel/pkg/pubsub" ) @@ -46,12 +46,7 @@ func NewInstanceLogger(instanceId string) *InstanceLogger { } func (m *InstanceLogger) Start(path string) error { - file, err := os.Open(path) - if err != nil { - return err - } - - pty, err := console.ConsoleFromFile(file) + file, err := os.OpenFile(path, os.O_RDONLY, os.ModeNamedPipe) if err != nil { return err } @@ -60,22 +55,21 @@ func (m *InstanceLogger) Start(path string) error { go func() { m.bc.Start() - m.startReadingPty(pty) + m.startReading(file) file.Close() - pty.Close() m.bc.Stop() }() return nil } -func (m *InstanceLogger) startReadingPty(pty console.Console) { +func (m *InstanceLogger) startReading(r io.Reader) { for { select { case <-m.stop: return default: - reader := bufio.NewReaderSize(pty, 4096) + reader := bufio.NewReaderSize(r, 4096) line, _, err := reader.ReadLine() if err != nil { diff --git a/runtime/networking.go b/runtime/networking.go index 92974c7..e55883b 100644 --- a/runtime/networking.go +++ b/runtime/networking.go @@ -7,15 +7,17 @@ import ( "github.com/valyentdev/ravel/core/networking" "github.com/valyentdev/ravel/core/networking/tap" "github.com/valyentdev/ravel/internal/id" + "github.com/valyentdev/ravel/runtime/vm" ) type networkService struct { localSubnetAllocator *networking.BasicSubnetAllocator + jailerUser vm.User } var _ instance.NetworkingService = (*networkService)(nil) -func newNetworkService() *networkService { +func newNetworkService(user vm.User) *networkService { localSubnetAllocator, err := networking.NewBasicSubnetAllocator(networking.SubnetPool{ Network: networking.Network{ Family: networking.IPv4, @@ -31,6 +33,7 @@ func newNetworkService() *networkService { return &networkService{ localSubnetAllocator: localSubnetAllocator, + jailerUser: user, } } @@ -39,7 +42,7 @@ func (n *networkService) CleanupInstanceNetwork(id string, config instance.Netwo } func (n *networkService) EnsureInstanceNetwork(id string, config instance.NetworkingConfig) error { - _, err := tap.PrepareInstanceTapDevice(id, config) + _, err := tap.PrepareInstanceTapDevice(id, config, n.jailerUser.Uid, n.jailerUser.Gid) return err } diff --git a/runtime/runtime.go b/runtime/runtime.go index 53ac016..ab3ecb3 100755 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log/slog" + "os" "github.com/containerd/containerd/v2/client" ctrderr "github.com/containerd/errdefs" @@ -11,10 +12,15 @@ import ( "github.com/valyentdev/ravel/core/daemon" "github.com/valyentdev/ravel/core/instance" "github.com/valyentdev/ravel/core/registry" + "github.com/valyentdev/ravel/internal/resources" "github.com/valyentdev/ravel/runtime/images" "github.com/valyentdev/ravel/runtime/vm" ) +func createRavelCgroup() error { + return os.MkdirAll("/sys/fs/cgroup/ravel", 0755) +} + type Runtime struct { instancesStore instance.InstanceStore imagesUsage *images.ImagesUsage @@ -28,6 +34,23 @@ type Runtime struct { var _ daemon.Runtime = (*Runtime)(nil) func New(config *config.RuntimeConfig, registries registry.RegistriesConfig, is instance.InstanceStore) (*Runtime, error) { + err := os.MkdirAll("/var/lib/ravel/instances", 0755) + if err != nil { + return nil, fmt.Errorf("failed to create instances directory: %w", err) + } + + uid, gid, err := setupRavelJailerUser() + if err != nil { + return nil, fmt.Errorf("failed to setup ravel jailer user: %w", err) + } + + frequency, err := resources.GetHostCPUFrequency() + if err != nil { + return nil, fmt.Errorf("failed to get host CPU frequency: %w", err) + } + + slog.Info("Host CPU frequency", "mhz", frequency) + ctrd, err := initContainerd() if err != nil { return nil, fmt.Errorf("failed to create containerd client: %w", err) @@ -41,14 +64,18 @@ func New(config *config.RuntimeConfig, registries registry.RegistriesConfig, is initBinary := config.InitBinary linuxKernel := config.LinuxKernel + jailer := config.JailerBinary - instanceBuilder := vm.NewBuilder("/var/run/ravel", "/var/lib/ravel", initBinary, linuxKernel, imagesService, ctrd, snapshotter) + instanceBuilder, err := vm.NewBuilder(config.CloudHypervisorBinary, jailer, initBinary, linuxKernel, imagesService, ctrd, snapshotter, frequency, vm.User{Uid: uid, Gid: gid}) + if err != nil { + return nil, fmt.Errorf("failed to create instance builder: %w", err) + } runtime := &Runtime{ instancesStore: is, imagesUsage: imageUsage, images: imagesService, - networking: newNetworkService(), + networking: newNetworkService(vm.User{Uid: uid, Gid: gid}), instanceBuilder: instanceBuilder, instances: state, } @@ -75,6 +102,11 @@ func initContainerd() (*client.Client, error) { func (r *Runtime) Start() error { slog.Info("Starting runtime") + err := createRavelCgroup() + if err != nil { + return fmt.Errorf("failed to create ravel cgroup: %w", err) + } + instances, err := r.instancesStore.LoadInstances() if err != nil { return fmt.Errorf("failed to load instances: %w", err) diff --git a/runtime/state.go b/runtime/state.go index fe2907a..0c16cb1 100644 --- a/runtime/state.go +++ b/runtime/state.go @@ -1,7 +1,6 @@ package runtime import ( - "log/slog" "sync" "github.com/valyentdev/ravel/core/errdefs" @@ -47,7 +46,6 @@ func (s *State) Delete(id string) { } func (s *State) GetInstance(id string) (*instancerunner.InstanceRunner, error) { - slog.Debug("getting instance", "id", id) s.mutex.RLock() instance, ok := s.instances[id] s.mutex.RUnlock() diff --git a/runtime/user.go b/runtime/user.go new file mode 100644 index 0000000..0a599d9 --- /dev/null +++ b/runtime/user.go @@ -0,0 +1,60 @@ +package runtime + +import ( + "errors" + "fmt" + "os/exec" + "os/user" + "strconv" +) + +const RAVEL_USER = "ravel-jailer" + +func lookupUser(name string) (*user.User, error) { + u, err := user.Lookup(name) + if err != nil { + var e user.UnknownUserError + if errors.As(err, &e) { + return nil, nil + } + return nil, fmt.Errorf("failed to lookup user: %w", err) + } + return u, nil +} + +func parseUidGid(u *user.User) (uid, gid int, err error) { + uid, err = strconv.Atoi(u.Uid) + if err != nil { + return 0, 0, fmt.Errorf("failed to convert uid to int: %w", err) + } + + gid, err = strconv.Atoi(u.Gid) + if err != nil { + return 0, 0, fmt.Errorf("failed to convert gid to int: %w", err) + } + + return uid, gid, nil +} + +func setupRavelJailerUser() (uid, gid int, err error) { + user, err := lookupUser(RAVEL_USER) + if err != nil { + return + } + if user != nil { + return parseUidGid(user) + } + + cmd := exec.Command("useradd", "-M", "-r", "-s", "/usr/sbin/nologin", RAVEL_USER) + err = cmd.Run() + if err != nil { + return 0, 0, fmt.Errorf("failed to create ravel-jailer user: %w", err) + } + + user, err = lookupUser(RAVEL_USER) + if err != nil { + return + } + + return parseUidGid(user) +} diff --git a/runtime/vm/builder.go b/runtime/vm/builder.go index e67a713..92a789d 100644 --- a/runtime/vm/builder.go +++ b/runtime/vm/builder.go @@ -5,83 +5,190 @@ import ( "errors" "fmt" "log/slog" + "os" "path" - "syscall" + "time" + "github.com/containerd/cgroups/v3/cgroup2" "github.com/containerd/containerd/v2/client" "github.com/valyentdev/ravel/core/instance" + "github.com/valyentdev/ravel/core/jailer" "github.com/valyentdev/ravel/pkg/cloudhypervisor" "github.com/valyentdev/ravel/runtime/images" ) +const ( + dataDir = "/var/lib/ravel/instances" + initRamfsPath = "/initramfs" + linuxKernelPath = "/vmlinux.bin" + chAPISocketPath = "/instance.sock" + vsockPath = "/instance.vsock" +) + +func getInstanceDir(id string) string { + return path.Join(dataDir, id) +} + +func getAPISocketPath(id string) string { + return path.Join(getInstanceDir(id), "instance.sock") +} + +func getVsockPath(id string) string { + return path.Join(getInstanceDir(id), "instance.vsock") +} + type Builder struct { - runPath Dir - dataPath Dir - initBinary string - linuxKernel string - images *images.Service - ctrd *client.Client - snapshotter string + cpuMhz int64 + chBinary string + jailerBinary string + initBinary string + initBin []byte + linuxKernel string + images *images.Service + ctrd *client.Client + snapshotter string + jailerUser User +} + +type User struct { + Uid int + Gid int } func NewBuilder( - runPath, dataPath Dir, - initBinary, linuxKernel string, + chBinary, jailerBinary, initBinary, linuxKernel string, images *images.Service, ctrd *client.Client, snapshotter string, -) *Builder { - return &Builder{ - runPath: runPath, - dataPath: dataPath, - initBinary: initBinary, - linuxKernel: linuxKernel, - images: images, - ctrd: ctrd, - snapshotter: snapshotter, + cpuMhz int64, + user User, +) (*Builder, error) { + + initBin, err := os.ReadFile(initBinary) + if err != nil { + return nil, fmt.Errorf("failed to read init binary: %w", err) } -} -func (b *Builder) getInitRDPath(instanceId string) string { - return path.Join(b.dataPath.InstanceDir(instanceId), "initrd") + _, err = cgroup2.NewManager("/sys/fs/cgroup", "/ravel", &cgroup2.Resources{}) // create cgroup2 manager + if err != nil { + return nil, fmt.Errorf("failed to create cgroup2 manager: %w", err) + } + + return &Builder{ + chBinary: chBinary, + initBinary: initBinary, + linuxKernel: linuxKernel, + jailerBinary: jailerBinary, + initBin: initBin, + images: images, + ctrd: ctrd, + snapshotter: snapshotter, + cpuMhz: cpuMhz, + jailerUser: user, + }, nil } var _ instance.Builder = (*Builder)(nil) -func (b *Builder) PrepareInstance(ctx context.Context, i *instance.Instance, image client.Image) error { - if err := createInstanceDirectories(b.dataPath, b.runPath, i.Id); err != nil { - return err +func getCgroupMemory(c *instance.InstanceGuestConfig) *cgroup2.Memory { + high := int64(c.MemoryMB * 1_000_000) + max := int64(float64(c.MemoryMB) * 1_000_000 * 1.1) // trigger OOM at 110% of the limit, should be tuned later + return &cgroup2.Memory{ + High: &high, + Max: &max, } +} - spec, err := image.Spec(ctx) - if err != nil { - return err +func getCgroupCPU(cpuMhz int64, c *instance.InstanceGuestConfig) *cgroup2.CPU { + quota := int64(float64(c.CpusMHz) / float64(cpuMhz) * 1_000_000) + period := uint64(1_000_000) + max := cgroup2.NewCPUMax("a, &period) + return &cgroup2.CPU{ + Max: max, } +} - return b.prepareInitRD(i, spec) +func getInstanceResources(cpuMhz int64, c *instance.InstanceGuestConfig) *cgroup2.Resources { + return &cgroup2.Resources{ + CPU: getCgroupCPU(cpuMhz, c), + Memory: getCgroupMemory(c), + } } // BuildInstanceVM implements instance.VMBuilder. func (b *Builder) BuildInstanceVM(ctx context.Context, instance *instance.Instance) (instance.VM, error) { - slog.Debug("building vm", "instance", instance.Id) + startTime := time.Now() image, err := b.images.GetImage(ctx, instance.ImageRef) if err != nil { return nil, err } - slog.Debug("preparing rootfs", "instance", instance.Id, "image", image.Name()) - rootfs, err := b.prepareRootFS(ctx, instance.Id, image) if err != nil { return nil, err } - vmConfig := b.getContainerMachineCHVmConfig(instance, rootfs) - vm, err := newVM(instance.Id, vmConfig, b.socketPath(instance.Id), b.vsockPath(instance.Id)) + + slog.Debug("rootfs prepared after", "time", time.Since(startTime)) + + spec, err := image.Spec(ctx) if err != nil { return nil, err } - slog.Debug("vm built", "instance", instance.Id) + cgroup, err := cgroup2.Load("/ravel") + if err != nil { + return nil, fmt.Errorf("failed to load cgroup: %w", err) + } + + _, err = cgroup.NewChild(instance.Id, getInstanceResources(b.cpuMhz, &instance.Config.Guest)) + if err != nil { + return nil, fmt.Errorf("failed to create cgroup: %w", err) + } + + jail, err := jailer.CreateJail( + b.jailerBinary, + jailer.JailConfig{ + Uid: b.jailerUser.Uid, + Gid: b.jailerUser.Gid, + NewRoot: getInstanceDir(instance.Id), + }, + jailer.WithTUN(), + jailer.WithKVM(), + jailer.WithURandom(), + jailer.WithBlockDevice(rootfs), + jailer.WithBinary(b.chBinary, "/cloud-hypervisor"), + jailer.WithHardLink(b.linuxKernel, "/vmlinux.bin", true), + jailer.WithNewPidNS(), + jailer.WithMountProc(), + jailer.WithCgroup("/ravel/"+instance.Id), + ) + if err != nil { + return nil, fmt.Errorf("failed to create jail: %w", err) + } + + slog.Debug("jail created after", "time", time.Since(startTime)) + + initrd, err := jail.CreateFile(initRamfsPath, 0700) + if err != nil { + return nil, fmt.Errorf("failed to create initrd: %w", err) + } + defer initrd.Close() + + err = b.writeInitrd(initrd, instance, spec) + if err != nil { + return nil, fmt.Errorf("failed to write initrd: %w", err) + } + + slog.Debug("initrd created after", "time", time.Since(startTime)) + + cmd := jail.Command("./cloud-hypervisor", "--api-socket", chAPISocketPath, "--log-file", "./cloud-hypervisor.log") + + vmConfig := b.getContainerMachineCHVmConfig(instance, rootfs) + vm, err := newVM(instance.Id, cmd, vmConfig) + if err != nil { + return nil, fmt.Errorf("failed to create VM: %w", err) + } + return vm, nil } @@ -90,46 +197,58 @@ func (b *Builder) CleanupInstanceVM(ctx context.Context, instance *instance.Inst if err := b.removeRootFS(instance.Id); err != nil { return err } + + cg, err := cgroup2.Load("/ravel/" + instance.Id) + if err != nil { + return fmt.Errorf("failed to load cgroup: %w", err) + } + + if err := cg.Delete(); err != nil { + return fmt.Errorf("failed to delete cgroup: %w", err) + } + + if err := os.RemoveAll(getInstanceDir(instance.Id)); err != nil { + return err + } return nil } func (b *Builder) CleanupInstance(ctx context.Context, instance *instance.Instance) error { - if err := removeInstanceDirectories(b.dataPath, b.runPath, instance.Id); err != nil { - return err + err := b.removeRootFS(instance.Id) + if err != nil { + return fmt.Errorf("failed to remove rootfs: %w", err) + } + + err = os.RemoveAll(getInstanceDir(instance.Id)) + if err != nil { + return fmt.Errorf("failed to remove run dir: %w", err) } + return nil } // RecoverInstanceVM implements instance.VMBuilder. -func (b *Builder) RecoverInstanceVM(ctx context.Context, i *instance.Instance) (instance.VM, instance.Handle, error) { - var h instance.Handle - vmm, err := cloudhypervisor.NewVMM( - b.socketPath(i.Id), - cloudhypervisor.WithSysProcAttr(&syscall.SysProcAttr{ - Setsid: true, - }), - ) +func (b *Builder) RecoverInstanceVM(ctx context.Context, i *instance.Instance) (instance.VM, error) { + vmm, err := cloudhypervisor.NewVMMClient(getAPISocketPath(i.Id)) if err != nil { - return nil, h, err + return nil, err } - vm := &vm{ id: i.Id, vmm: vmm, - vsock: b.vsockPath(i.Id), + vsock: getVsockPath(i.Id), waitChan: make(chan struct{}), } - h, ok := vm.recover() + ok := vm.recover() if !ok { - return nil, h, errors.New("failed to recover VM") + return nil, errors.New("failed to recover VM") } - return vm, h, nil + return vm, nil } func (r *Builder) getContainerMachineCHVmConfig(i *instance.Instance, rootfs string) cloudhypervisor.VmConfig { - instanceId := i.Id config := i.Config return cloudhypervisor.VmConfig{ Cpus: &cloudhypervisor.CpusConfig{ @@ -140,12 +259,12 @@ func (r *Builder) getContainerMachineCHVmConfig(i *instance.Instance, rootfs str Size: int64(config.Guest.MemoryMB) * 1_000_000, }, Console: &cloudhypervisor.ConsoleConfig{ - Mode: "Pty", + Mode: cloudhypervisor.ConsoleConfigModeTty, }, Payload: cloudhypervisor.PayloadConfig{ - Initramfs: cloudhypervisor.StringPtr(r.getInitRDPath(instanceId)), - Kernel: cloudhypervisor.StringPtr(r.linuxKernel), - Cmdline: cloudhypervisor.StringPtr("ro console=hvc0 rdinit=ravel-init"), + Initramfs: cloudhypervisor.StringPtr(initRamfsPath), + Kernel: cloudhypervisor.StringPtr(linuxKernelPath), + Cmdline: cloudhypervisor.StringPtr("ro console=hvc0 rdinit=ravel-init quiet"), }, Disks: &[]cloudhypervisor.DiskConfig{ { @@ -159,15 +278,7 @@ func (r *Builder) getContainerMachineCHVmConfig(i *instance.Instance, rootfs str }, Vsock: &cloudhypervisor.VsockConfig{ Cid: 3, - Socket: r.vsockPath(instanceId), + Socket: vsockPath, }, } } - -func (r *Builder) vsockPath(instanceId string) string { - return fmt.Sprintf("/tmp/%s-vsock.sock", instanceId) -} - -func (r *Builder) socketPath(instanceId string) string { - return path.Join(r.runPath.InstanceDir(instanceId), "instance.sock") -} diff --git a/runtime/vm/dir.go b/runtime/vm/dir.go deleted file mode 100644 index 51e2be9..0000000 --- a/runtime/vm/dir.go +++ /dev/null @@ -1,46 +0,0 @@ -package vm - -import ( - "os" - "path" -) - -type Dir string - -func (d Dir) String() string { - return string(d) -} - -func (d Dir) InstancesDir() string { - return path.Join(d.String(), "instances") -} - -func (d Dir) InstanceDir(id string) string { - return path.Join(d.InstancesDir(), id) -} - -const DefaultDataDir = Dir("/var/lib/ravel") -const DefaultRunDir = Dir("/var/run/ravel") - -func createInstanceDirectories(dataDir Dir, runDir Dir, id string) error { - if err := os.MkdirAll(dataDir.InstanceDir(id), 0644); err != nil { - return err - } - - if err := os.MkdirAll(runDir.InstanceDir(id), 0644); err != nil { - return err - } - return nil -} - -func removeInstanceDirectories(dataDir Dir, runDir Dir, id string) error { - if err := os.RemoveAll(dataDir.InstanceDir(id)); err != nil { - return err - } - - if err := os.RemoveAll(runDir.InstanceDir(id)); err != nil { - return err - } - - return nil -} diff --git a/runtime/vm/filesystem.go b/runtime/vm/filesystem.go index 55d8125..4dc3b06 100644 --- a/runtime/vm/filesystem.go +++ b/runtime/vm/filesystem.go @@ -34,11 +34,6 @@ func (b *Builder) prepareRootFS(ctx context.Context, id string, image client.Ima if err != nil { return "", fmt.Errorf("failed to prepare rootfs for instance %q: %w", id, err) } - defer func() { - if err != nil { - b.removeRootFS(id) - } - }() return rootfs, nil } @@ -63,7 +58,19 @@ func (b *Builder) prepareContainerRootFS(ctx context.Context, id string, image c slog.Debug("preparing snapshot", "id", id, "parent", parent) mounts, err := ss.Prepare(context.Background(), rootFSName(id), parent) if err != nil { - return "", fmt.Errorf("failed to prepare snapshot %q: %w", id, err) + if !errdefs.IsAlreadyExists(err) { + return "", fmt.Errorf("failed to prepare snapshot %q: %w", id, err) + + } + + err = b.removeRootFS(id) + if err != nil { + return "", fmt.Errorf("failed to remove existing snapshot %q: %w", id, err) + } + mounts, err = ss.Prepare(context.Background(), rootFSName(id), parent) + if err != nil { + return "", fmt.Errorf("failed to prepare snapshot %q: %w", id, err) + } } if len(mounts) == 0 { diff --git a/runtime/vm/initrd.go b/runtime/vm/initrd.go index f45110b..6b56d40 100644 --- a/runtime/vm/initrd.go +++ b/runtime/vm/initrd.go @@ -1,10 +1,13 @@ package vm import ( + "bytes" "compress/gzip" "encoding/json" "fmt" + "log/slog" "os" + "time" v1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/u-root/u-root/pkg/cpio" @@ -14,19 +17,17 @@ import ( "golang.org/x/sys/unix" ) -func (b *Builder) prepareInitRD(instance *instance.Instance, image v1.Image) error { - init, err := os.Open(b.initBinary) - if err != nil { - return fmt.Errorf("failed to read init binary: %w", err) - } - defer init.Close() +func (b *Builder) writeInitrd(file *os.File, instance *instance.Instance, image v1.Image) error { + slog.Debug("writing initrd", "instance", instance.Id) - initrdPath := b.getInitRDPath(instance.Id) + t1 := time.Now() + // init, err := os.Open(b.initBinary) + // if err != nil { + // return fmt.Errorf("failed to read init binary: %w", err) + // } + // defer init.Close() - file, err := os.Create(initrdPath) - if err != nil { - return fmt.Errorf("failed to create initrd file: %w", err) - } + slog.Info("init binary opened after", "time", time.Since(t1)) gz := gzip.NewWriter(file) defer gz.Close() @@ -39,27 +40,31 @@ func (b *Builder) prepareInitRD(instance *instance.Instance, image v1.Image) err return fmt.Errorf("failed to marshal init config: %w", err) } - initInfos, err := init.Stat() - if err != nil { - return fmt.Errorf("failed to get init stat: %w", err) - } + // initInfos, err := init.Stat() + // if err != nil { + // return fmt.Errorf("failed to get init stat: %w", err) + // } configRecord := cpio.StaticFile("/ravel/run.json", string(configJSON), 0644) + init := bytes.NewReader(b.initBin) initRecord := cpio.Record{ ReaderAt: init, Info: cpio.Info{ - FileSize: uint64(initInfos.Size()), + FileSize: uint64(len(b.initBin)), Name: "ravel-init", Mode: unix.S_IFREG | 0755, }, } + slog.Info("init and config records created after", "time", time.Since(t1)) err = cpio.WriteRecordsAndDirs(w, []cpio.Record{initRecord, configRecord}) if err != nil { return fmt.Errorf("failed to write records and dirs: %w", err) } + slog.Info("initrd written after", "time", time.Since(t1)) + return nil } diff --git a/runtime/vm/vm.go b/runtime/vm/vm.go index 6ae81d9..8e31dcf 100644 --- a/runtime/vm/vm.go +++ b/runtime/vm/vm.go @@ -4,8 +4,8 @@ import ( "context" "fmt" "log/slog" + "os/exec" "sync/atomic" - "syscall" "time" vminit "github.com/valyentdev/ravel-init/client" @@ -22,13 +22,13 @@ const ( ) type vm struct { + cmd *exec.Cmd successFullyShutdowned atomic.Bool id string runResult *RunResult vmConfig cloudhypervisor.VmConfig vmm *cloudhypervisor.VMM vsock string - console string stopRequested bool waitChan chan struct{} } @@ -39,31 +39,27 @@ func (vm *vm) Id() string { return vm.id } -func newVM(id string, vmConfig cloudhypervisor.VmConfig, socket, vsock string) (*vm, error) { - vmm, err := cloudhypervisor.NewVMM( - socket, - cloudhypervisor.WithSysProcAttr(&syscall.SysProcAttr{ - Setsid: true, - }), - ) +func newVM(id string, cmd *exec.Cmd, vmConfig cloudhypervisor.VmConfig) (*vm, error) { + slog.Debug("creating new VM", "id", id, "socket", getAPISocketPath(id)) + vmm, err := cloudhypervisor.NewVMMClient(getAPISocketPath(id)) if err != nil { return nil, err } return &vm{ id: id, + cmd: cmd, vmConfig: vmConfig, vmm: vmm, waitChan: make(chan struct{}), - vsock: vsock, + vsock: getVsockPath(id), }, nil } -func (vm *vm) Start(ctx context.Context) (instance.Handle, error) { - h := instance.Handle{} - err := vm.vmm.StartVMM(ctx) +func (vm *vm) Start(ctx context.Context) error { + err := vm.cmd.Start() if err != nil { - return h, fmt.Errorf("failed to start vmm for machine %q: %w", vm.Id(), err) + return fmt.Errorf("failed to start vmm for machine %q: %w", vm.Id(), err) } defer func() { if err != nil { @@ -73,31 +69,22 @@ func (vm *vm) Start(ctx context.Context) (instance.Handle, error) { err = vm.vmm.WaitReady(ctx) if err != nil { - return h, fmt.Errorf("failed to wait for vmm to be ready for machine %q: %w", vm.Id(), err) + return fmt.Errorf("failed to wait for vmm to be ready for machine %q: %w", vm.Id(), err) } err = vm.vmm.CreateVM(ctx, vm.vmConfig) if err != nil { - return h, fmt.Errorf("failed to create vm for machine %q: %w", vm.Id(), err) + return fmt.Errorf("failed to create vm for machine %q: %w", vm.Id(), err) } err = vm.vmm.BootVM(ctx) if err != nil { - return h, fmt.Errorf("failed to boot vm for machine %q: %w", vm.Id(), err) + return fmt.Errorf("failed to boot vm for machine %q: %w", vm.Id(), err) } - vminfo, err := vm.vmm.VMInfo(ctx) - if err != nil { - return h, fmt.Errorf("failed to get vm info for machine %q: %w", vm.Id(), err) - } - - vm.console = *vminfo.Config.Console.File - - h.Console = vm.console - go vm.run() - return h, nil + return nil } @@ -254,31 +241,24 @@ func (vm *vm) WaitExit(ctx context.Context) (exited bool) { } } -func (vm *vm) recover() (instance.Handle, bool) { - h := instance.Handle{} +func (vm *vm) recover() bool { ok := false state := vm.determinateState() if !state.isVMMRunning { - return h, ok + return ok } if !state.isVMRunning { err := vm.Shutdown(context.Background()) if err != nil { slog.Error("failed to shutdown VMM", "err", err) } - return h, ok + return ok } - serial := state.vminfo.Config.Console.File - - vm.console = *serial - go vm.run() - h.Console = vm.console - - return h, true + return true } type internalState struct {