diff --git a/Dockerfile b/Dockerfile index 0e58df106531b..22294606cca1f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,9 +32,11 @@ RUN echo deb http://ppa.launchpad.net/zfs-native/stable/ubuntu trusty main > /et # Packaged dependencies RUN apt-get update && apt-get install -y \ apparmor \ + asciidoc \ aufs-tools \ automake \ bash-completion \ + bsdmainutils \ btrfs-tools \ build-essential \ createrepo \ @@ -43,19 +45,28 @@ RUN apt-get update && apt-get install -y \ gcc-mingw-w64 \ git \ iptables \ + libaio-dev \ libapparmor-dev \ libcap-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ mercurial \ parallel \ + pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ + python-minimal \ python-mock \ python-pip \ + python-protobuf \ python-websocket \ reprepro \ ruby1.9.1 \ ruby1.9.1-dev \ s3cmd=1.1.0* \ ubuntu-zfs \ + xmlto \ libzfs-dev \ --no-install-recommends @@ -80,6 +91,13 @@ RUN cd /usr/src/lxc \ && make install \ && ldconfig +# Install Criu +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v1.6.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 +RUN cd /usr/src/criu \ + && make \ + && make install + # Install Go ENV GO_VERSION 1.4.2 RUN curl -sSL https://golang.org/dl/go${GO_VERSION}.src.tar.gz | tar -v -C /usr/local -xz \ diff --git a/api/client/checkpoint.go b/api/client/checkpoint.go new file mode 100644 index 0000000000000..24fed5f68b634 --- /dev/null +++ b/api/client/checkpoint.go @@ -0,0 +1,55 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdCheckpoint(args ...string) error { + cmd := Cli.Subcmd("checkpoint", []string{"CONTAINER [CONTAINER...]"}, "Checkpoint one or more running containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory for storing checkpoint image files") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file") + flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpoint") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow checkpointing tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow checkpointing external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow checkpointing shell jobs") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + criuOpts := &runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + LeaveRunning: *flLeaveRunning, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/client/restore.go b/api/client/restore.go new file mode 100644 index 0000000000000..bef78a262b54a --- /dev/null +++ b/api/client/restore.go @@ -0,0 +1,57 @@ +// +build experimental + +package client + +import ( + "fmt" + + Cli "github.com/docker/docker/cli" + flag "github.com/docker/docker/pkg/mflag" + "github.com/docker/docker/runconfig" +) + +func (cli *DockerCli) CmdRestore(args ...string) error { + cmd := Cli.Subcmd("restore", []string{"CONTAINER [CONTAINER...]"}, "Restore one or more checkpointed containers", true) + cmd.Require(flag.Min, 1) + + var ( + flImgDir = cmd.String([]string{"-image-dir"}, "", "directory to restore image files from") + flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for restore log") + flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow restoring tcp connections") + flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow restoring external unix connections") + flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow restoring shell jobs") + flForce = cmd.Bool([]string{"-force"}, false, "bypass checks for current container state") + ) + + if err := cmd.ParseFlags(args, true); err != nil { + return err + } + + if cmd.NArg() < 1 { + cmd.Usage() + return nil + } + + restoreOpts := &runconfig.RestoreConfig{ + CriuOpts: runconfig.CriuConfig{ + ImagesDirectory: *flImgDir, + WorkDirectory: *flWorkDir, + TcpEstablished: *flCheckTcp, + ExternalUnixConnections: *flExtUnix, + ShellJob: *flShell, + }, + ForceRestore: *flForce, + } + + var encounteredError error + for _, name := range cmd.Args() { + _, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil)) + if err != nil { + fmt.Fprintf(cli.err, "%s\n", err) + encounteredError = fmt.Errorf("Error: failed to restore one or more containers") + } else { + fmt.Fprintf(cli.out, "%s\n", name) + } + } + return encounteredError +} diff --git a/api/server/server.go b/api/server/server.go index 66ba4e0842770..bf944f92e0e52 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -215,6 +215,7 @@ func httpError(w http.ResponseWriter, err error) { // json encoding. func writeJSON(w http.ResponseWriter, code int, v interface{}) error { w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) return json.NewEncoder(w).Encode(v) } @@ -365,6 +366,8 @@ func createRouter(s *Server) *mux.Router { }, } + addExperimentalRoutes(s, m) + // If "api-cors-header" is not given, but "api-enable-cors" is true, we set cors to "*" // otherwise, all head values will be passed to HTTP handler corsHeaders := s.cfg.CorsHeaders diff --git a/api/server/server_experimental_unix.go b/api/server/server_experimental_unix.go index 03829a11ecb76..ca17cdd1036b8 100644 --- a/api/server/server_experimental_unix.go +++ b/api/server/server_experimental_unix.go @@ -2,6 +2,19 @@ package server +import ( + "encoding/json" + "fmt" + "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" + "net/http" +) + +func addExperimentalRoutes(s *Server, m map[string]map[string]HTTPAPIFunc) { + m["POST"]["/containers/{name:.*}/checkpoint"] = s.postContainersCheckpoint + m["POST"]["/containers/{name:.*}/restore"] = s.postContainersRestore +} + func (s *Server) registerSubRouter() { httpHandler := s.daemon.NetworkApiRouter() @@ -15,3 +28,45 @@ func (s *Server) registerSubRouter() { subrouter = s.router.PathPrefix("/services").Subrouter() subrouter.Methods("GET", "POST", "PUT", "DELETE").HandlerFunc(httpHandler) } + +func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + criuOpts := &runconfig.CriuConfig{} + if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil { + return err + } + + if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if vars == nil { + return fmt.Errorf("Missing parameter") + } + if err := parseForm(r); err != nil { + return err + } + + restoreOpts := runconfig.RestoreConfig{} + if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil { + return err + } + + if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} diff --git a/api/server/server_stub.go b/api/server/server_stub.go index cae28493836f4..6d93d21626dd1 100644 --- a/api/server/server_stub.go +++ b/api/server/server_stub.go @@ -2,5 +2,9 @@ package server +func addExperimentalRoutes(s *Server, m map[string]map[string]HTTPAPIFunc) { + +} + func (s *Server) registerSubRouter() { } diff --git a/api/types/types.go b/api/types/types.go index 5cbb9bdba8c0a..ee994f3bbe2a3 100644 --- a/api/types/types.go +++ b/api/types/types.go @@ -227,16 +227,18 @@ type ExecStartCheck struct { // ContainerState stores container's running state // it's part of ContainerJSONBase and will return by "inspect" command type ContainerState struct { - Running bool - Paused bool - Restarting bool - OOMKilled bool - Dead bool - Pid int - ExitCode int - Error string - StartedAt string - FinishedAt string + Running bool + Paused bool + Checkpointed bool + Restarting bool + OOMKilled bool + Dead bool + Pid int + ExitCode int + Error string + StartedAt string + FinishedAt string + CheckpointedAt string `json:"-"` } // ContainerJSONBase contains response of Remote API: diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000000..a39662cc0f325 --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,56 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/runconfig" +) + +// Checkpoint a running container. +func (daemon *Daemon) ContainerCheckpoint(name string, opts *runconfig.CriuConfig) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + if !container.IsRunning() { + return fmt.Errorf("Container %s not running", name) + } + if err := container.Checkpoint(opts); err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + container.LogEvent("checkpoint") + return nil +} + +// Restore a checkpointed container. +func (daemon *Daemon) ContainerRestore(name string, opts *runconfig.CriuConfig, forceRestore bool) error { + container, err := daemon.Get(name) + if err != nil { + return err + } + + if !forceRestore { + // TODO: It's possible we only want to bypass the checkpointed check, + // I'm not sure how this will work if the container is already running + if container.IsRunning() { + return fmt.Errorf("Container %s already running", name) + } + + if !container.IsCheckpointed() { + return fmt.Errorf("Container %s is not checkpointed", name) + } + } else { + if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" { + return fmt.Errorf("You must specify an image directory to restore from %s", name) + } + } + + if err = container.Restore(opts, forceRestore); err != nil { + container.LogEvent("die") + return fmt.Errorf("Cannot restore container %s: %s", name, err) + } + + container.LogEvent("restore") + return nil +} diff --git a/daemon/container.go b/daemon/container.go index f56b8cbe99607..872f9ebb0fc8b 100644 --- a/daemon/container.go +++ b/daemon/container.go @@ -268,7 +268,7 @@ func (container *Container) Start() (err error) { // backwards API compatibility. container.hostConfig = runconfig.SetDefaultNetModeIfBlank(container.hostConfig) - if err := container.initializeNetworking(); err != nil { + if err := container.initializeNetworking(false); err != nil { return err } linkedEnv, err := container.setupLinkedContainers() @@ -343,7 +343,11 @@ func (container *Container) isNetworkAllocated() bool { // cleanup releases any network resources allocated to the container along with any rules // around how containers are linked together. It also unmounts the container's root filesystem. func (container *Container) cleanup() { - container.ReleaseNetwork() + if container.IsCheckpointed() { + logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID) + } else { + container.ReleaseNetwork(false) + } if err := container.CleanupStorage(); err != nil { logrus.Errorf("%v: Failed to cleanup storage: %v", container.ID, err) diff --git a/daemon/container_checkpoint.go b/daemon/container_checkpoint.go new file mode 100644 index 0000000000000..b6aad41aabcf4 --- /dev/null +++ b/daemon/container_checkpoint.go @@ -0,0 +1,110 @@ +package daemon + +import ( + "fmt" + + "github.com/docker/docker/pkg/promise" + "github.com/docker/docker/runconfig" + + "github.com/docker/libnetwork/netutils" +) + +func (container *Container) Checkpoint(opts *runconfig.CriuConfig) error { + if err := container.daemon.Checkpoint(container, opts); err != nil { + return err + } + + if opts.LeaveRunning == false { + container.ReleaseNetwork(true) + } + return nil +} + +func (container *Container) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var err error + container.Lock() + defer container.Unlock() + + defer func() { + if err != nil { + container.setError(err) + // if no one else has set it, make sure we don't leave it at zero + if container.ExitCode == 0 { + container.ExitCode = 128 + } + container.toDisk() + container.cleanup() + } + }() + + if err := container.Mount(); err != nil { + return err + } + if err = container.initializeNetworking(true); err != nil { + return err + } + + nctl := container.daemon.netController + network, err := nctl.NetworkByID(container.NetworkSettings.NetworkID) + if err != nil { + return err + } + + ep_t, err := network.EndpointByID(container.NetworkSettings.EndpointID) + if err != nil { + return err + } + + for _, i := range ep_t.SandboxInterfaces() { + outname, err := netutils.GenerateIfaceName("veth", 7) + if err != nil { + return err + } + vethpair := runconfig.VethPairName{ + InName: i.DstName(), + OutName: outname, + } + opts.VethPairs = append(opts.VethPairs, vethpair) + } + + linkedEnv, err := container.setupLinkedContainers() + if err != nil { + return err + } + if err = container.setupWorkingDirectory(); err != nil { + return err + } + + env := container.createDaemonEnvironment(linkedEnv) + if err = populateCommand(container, env); err != nil { + return err + } + + mounts, err := container.setupMounts() + if err != nil { + return err + } + + container.command.Mounts = mounts + return container.waitForRestore(opts, forceRestore) +} + +func (container *Container) waitForRestore(opts *runconfig.CriuConfig, forceRestore bool) error { + container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy) + + // After calling promise.Go() we'll have two goroutines: + // - The current goroutine that will block in the select + // below until restore is done. + // - A new goroutine that will restore the container and + // wait for it to exit. + select { + case <-container.monitor.restoreSignal: + if container.ExitCode != 0 { + return fmt.Errorf("restore process failed") + } + case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }): + return err + } + + return nil +} diff --git a/daemon/container_unix.go b/daemon/container_unix.go index b52f14c5de23d..f948a3b071cd1 100644 --- a/daemon/container_unix.go +++ b/daemon/container_unix.go @@ -684,7 +684,7 @@ func (container *Container) UpdateNetwork() error { return nil } -func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointOption, error) { +func (container *Container) buildCreateEndpointOptions(restoring bool) ([]libnetwork.EndpointOption, error) { var ( portSpecs = make(nat.PortSet) bindings = make(nat.PortMap) @@ -759,6 +759,14 @@ func (container *Container) buildCreateEndpointOptions() ([]libnetwork.EndpointO createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) } + /* if restoring && container.NetworkSettings.IPAddress != "" { + genericOption := options.Generic{ + netlabel.IPAddress: net.ParseIP(container.NetworkSettings.IPAddress), + } + + createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption)) + } */ + return createOptions, nil } @@ -812,7 +820,7 @@ func (container *Container) secondaryNetworkRequired(primaryNetworkType string) return false } -func (container *Container) AllocateNetwork() error { +func (container *Container) AllocateNetwork(isRestoring bool) error { mode := container.hostConfig.NetworkMode controller := container.daemon.netController if container.Config.NetworkDisabled || mode.IsContainer() { @@ -848,19 +856,19 @@ func (container *Container) AllocateNetwork() error { if container.secondaryNetworkRequired(networkDriver) { // Configure Bridge as secondary network for port binding purposes - if err := container.configureNetwork("bridge", service, "bridge", false); err != nil { + if err := container.configureNetwork("bridge", service, "bridge", false, isRestoring); err != nil { return err } } - if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault()); err != nil { + if err := container.configureNetwork(networkName, service, networkDriver, mode.IsDefault(), isRestoring); err != nil { return err } return container.WriteHostConfig() } -func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool) error { +func (container *Container) configureNetwork(networkName, service, networkDriver string, canCreateNetwork bool, isRestoring bool) error { controller := container.daemon.netController n, err := controller.NetworkByName(networkName) if err != nil { @@ -873,20 +881,51 @@ func (container *Container) configureNetwork(networkName, service, networkDriver } } - ep, err := n.EndpointByName(service) - if err != nil { - if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { - return err - } + var ep libnetwork.Endpoint - createOptions, err := container.buildCreateEndpointOptions() - if err != nil { - return err + if isRestoring == true { + // Use existing Endpoint for a checkpointed container + for _, endpoint := range n.Endpoints() { + if endpoint.ID() == container.NetworkSettings.EndpointID { + ep = endpoint + } } + if ep == nil { + //return fmt.Errorf("Fail to find the Endpoint for the checkpointed container") + fmt.Println("Fail to find the Endpoint for the checkpointed container") + ep, err = n.EndpointByName(service) + if err != nil { + if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { + return err + } - ep, err = n.CreateEndpoint(service, createOptions...) + createOptions, err := container.buildCreateEndpointOptions(isRestoring) + if err != nil { + return err + } + + ep, err = n.CreateEndpoint(service, createOptions...) + if err != nil { + return err + } + } + } + } else { + ep, err = n.EndpointByName(service) if err != nil { - return err + if _, ok := err.(libnetwork.ErrNoSuchEndpoint); !ok { + return err + } + + createOptions, err := container.buildCreateEndpointOptions(isRestoring) + if err != nil { + return err + } + + ep, err = n.CreateEndpoint(service, createOptions...) + if err != nil { + return err + } } } @@ -910,7 +949,7 @@ func (container *Container) configureNetwork(networkName, service, networkDriver return nil } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { var err error if container.hostConfig.NetworkMode.IsContainer() { @@ -941,7 +980,7 @@ func (container *Container) initializeNetworking() error { } - if err := container.AllocateNetwork(); err != nil { + if err := container.AllocateNetwork(restoring); err != nil { return err } @@ -1024,7 +1063,7 @@ func (container *Container) getNetworkedContainer() (*Container, error) { } } -func (container *Container) ReleaseNetwork() { +func (container *Container) ReleaseNetwork(is_checkpoint bool) { if container.hostConfig.NetworkMode.IsContainer() || container.Config.NetworkDisabled { return } @@ -1063,6 +1102,10 @@ func (container *Container) ReleaseNetwork() { } } + if is_checkpoint == true { + return + } + // In addition to leaving all endpoints, delete implicitly created endpoint if container.Config.PublishService == "" { if err := ep.Delete(); err != nil { diff --git a/daemon/container_windows.go b/daemon/container_windows.go index 425e1abe54b03..5d85853341ede 100644 --- a/daemon/container_windows.go +++ b/daemon/container_windows.go @@ -45,7 +45,7 @@ func (container *Container) createDaemonEnvironment(linkedEnv []string) []string return container.Config.Env } -func (container *Container) initializeNetworking() error { +func (container *Container) initializeNetworking(restoring bool) error { return nil } diff --git a/daemon/daemon.go b/daemon/daemon.go index 4d6d43dc31915..170c97e2cfd38 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -274,6 +274,18 @@ func (daemon *Daemon) restore() error { logrus.Debugf("Loaded container %v", container.ID) containers[container.ID] = &cr{container: container} + + // If the container was checkpointed, we need to reserve + // the IP address that it was using. + // + // XXX We should also reserve host ports (if any). + /*if container.IsCheckpointed() { + err = bridge.ReserveIP(container.ID, container.NetworkSettings.IPAddress) + if err != nil { + log.Errorf("Failed to reserve IP %s for container %s", + container.ID, container.NetworkSettings.IPAddress) + } + }*/ } else { logrus.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID) } @@ -814,6 +826,25 @@ func (daemon *Daemon) Run(c *Container, pipes *execdriver.Pipes, startCallback e return daemon.execDriver.Run(c.command, pipes, startCallback) } +func (daemon *Daemon) Checkpoint(c *Container, opts *runconfig.CriuConfig) error { + if err := daemon.execDriver.Checkpoint(c.command, opts); err != nil { + return err + } + c.SetCheckpointed(opts.LeaveRunning) + return nil +} + +func (daemon *Daemon) Restore(c *Container, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + // Mount the container's filesystem (daemon/graphdriver/aufs/aufs.go). + _, err := daemon.driver.Get(c.ID, c.GetMountLabel()) + if err != nil { + return execdriver.ExitStatus{ExitCode: 0}, err + } + + exitCode, err := daemon.execDriver.Restore(c.command, pipes, restoreCallback, opts, forceRestore) + return exitCode, err +} + func (daemon *Daemon) Kill(c *Container, sig int) error { return daemon.execDriver.Kill(c.command, sig) } diff --git a/daemon/execdriver/driver.go b/daemon/execdriver/driver.go index 6a9049ba98da0..442d28a890646 100644 --- a/daemon/execdriver/driver.go +++ b/daemon/execdriver/driver.go @@ -8,6 +8,7 @@ import ( // TODO Windows: Factor out ulimit "github.com/docker/docker/pkg/ulimit" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/configs" ) @@ -28,6 +29,7 @@ var ( // It's used by 'Run' and 'Exec', does some work in parent process // after child process is started. type StartCallback func(*ProcessConfig, int) +type RestoreCallback func(*ProcessConfig, int) // Info is driver specific information based on // processes registered with the driver @@ -71,6 +73,12 @@ type Driver interface { // Unpause unpauses a container. Unpause(c *Command) error + // Checkpoints a container (with criu). + Checkpoint(c *Command, opts *runconfig.CriuConfig) error + + // Restores a checkpoint image into a container (with criu). + Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (ExitStatus, error) + // Name returns the name of the driver. Name() string diff --git a/daemon/execdriver/lxc/driver.go b/daemon/execdriver/lxc/driver.go index 27da7c8c24db8..dce464b7937db 100644 --- a/daemon/execdriver/lxc/driver.go +++ b/daemon/execdriver/lxc/driver.go @@ -25,6 +25,7 @@ import ( sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" "github.com/docker/docker/pkg/version" + "github.com/docker/docker/runconfig" "github.com/kr/pty" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/cgroups" @@ -560,7 +561,14 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return err } -// Terminate implements the exec driver Driver interface. +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Checkpointing lxc containers not supported yet\n") +} + +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n") +} + func (d *Driver) Terminate(c *execdriver.Command) error { return killLxc(c.ID, 9) } diff --git a/daemon/execdriver/native/driver.go b/daemon/execdriver/native/driver.go index b241bdbc504c8..5f2e8f4ea0a83 100644 --- a/daemon/execdriver/native/driver.go +++ b/daemon/execdriver/native/driver.go @@ -20,6 +20,7 @@ import ( "github.com/docker/docker/pkg/reexec" sysinfo "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/term" + "github.com/docker/docker/runconfig" "github.com/opencontainers/runc/libcontainer" "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" @@ -157,8 +158,11 @@ func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallba d.activeContainers[c.ID] = cont d.Unlock() defer func() { - cont.Destroy() - d.cleanContainer(c.ID) + status, err := cont.Status() + if err != nil || status != libcontainer.Checkpointed { + cont.Destroy() + d.cleanContainer(c.ID) + } }() if err := cont.Start(p); err != nil { @@ -262,6 +266,7 @@ func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*o // Kill implements the exec driver Driver interface. func (d *Driver) Kill(c *execdriver.Command, sig int) error { d.Lock() + _, err := d.factory.Load(c.ID) active := d.activeContainers[c.ID] d.Unlock() if active == nil { @@ -298,6 +303,121 @@ func (d *Driver) Unpause(c *execdriver.Command) error { return active.Resume() } +func libcontainerCriuOpts(runconfigOpts *runconfig.CriuConfig) *libcontainer.CriuOpts { + criuopts := &libcontainer.CriuOpts{ + ImagesDirectory: runconfigOpts.ImagesDirectory, + WorkDirectory: runconfigOpts.WorkDirectory, + LeaveRunning: runconfigOpts.LeaveRunning, + TcpEstablished: runconfigOpts.TcpEstablished, + ExternalUnixConnections: runconfigOpts.ExternalUnixConnections, + ShellJob: runconfigOpts.ShellJob, + } + + for _, i := range runconfigOpts.VethPairs { + criuopts.VethPairs = append(criuopts.VethPairs, + libcontainer.VethPairName{ + InName: i.InName, + OutName: i.OutName, + }) + } + return criuopts +} + +func (d *Driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + active := d.activeContainers[c.ID] + if active == nil { + return fmt.Errorf("active container for %s does not exist", c.ID) + } + + d.Lock() + defer d.Unlock() + err := active.Checkpoint(libcontainerCriuOpts(opts)) + if err != nil { + return err + } + + return nil +} + +func (d *Driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + var ( + cont libcontainer.Container + err error + ) + + cont, err = d.factory.Load(c.ID) + if err != nil { + if forceRestore { + var config *configs.Config + config, err = d.createContainer(c) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + cont, err = d.factory.Create(c.ID, config) + if err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } else { + return execdriver.ExitStatus{ExitCode: -1}, err + } + } + + p := &libcontainer.Process{ + Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...), + Env: c.ProcessConfig.Env, + Cwd: c.WorkingDir, + User: c.ProcessConfig.User, + } + + config := cont.Config() + if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + d.Lock() + d.activeContainers[c.ID] = cont + d.Unlock() + defer func() { + cont.Destroy() + d.cleanContainer(c.ID) + }() + + if err := cont.Restore(p, libcontainerCriuOpts(opts)); err != nil { + return execdriver.ExitStatus{ExitCode: -1}, err + } + + // FIXME: no idea if any of this is needed... + if restoreCallback != nil { + pid, err := p.Pid() + if err != nil { + p.Signal(os.Kill) + p.Wait() + return execdriver.ExitStatus{ExitCode: -1}, err + } + restoreCallback(&c.ProcessConfig, pid) + } + + oom := notifyOnOOM(cont) + waitF := p.Wait + if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) { + // we need such hack for tracking processes with inherited fds, + // because cmd.Wait() waiting for all streams to be copied + waitF = waitInPIDHost(p, cont) + } + ps, err := waitF() + if err != nil { + execErr, ok := err.(*exec.ExitError) + if !ok { + return execdriver.ExitStatus{ExitCode: -1}, err + } + ps = execErr.ProcessState + } + + cont.Destroy() + _, oomKill := <-oom + return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil +} + // Terminate implements the exec driver Driver interface. func (d *Driver) Terminate(c *execdriver.Command) error { defer d.cleanContainer(c.ID) diff --git a/daemon/execdriver/windows/windows.go b/daemon/execdriver/windows/windows.go index 198ddc8dd7184..c87c50e230524 100644 --- a/daemon/execdriver/windows/windows.go +++ b/daemon/execdriver/windows/windows.go @@ -11,6 +11,7 @@ import ( "github.com/docker/docker/autogen/dockerversion" "github.com/docker/docker/daemon/execdriver" "github.com/docker/docker/pkg/parsers" + "github.com/docker/docker/runconfig" ) // This is a daemon development variable only and should not be @@ -93,3 +94,11 @@ func setupEnvironmentVariables(a []string) map[string]string { } return r } + +func (d *driver) Checkpoint(c *execdriver.Command, opts *runconfig.CriuConfig) error { + return fmt.Errorf("Windows: Containers cannot be checkpointed") +} + +func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *runconfig.CriuConfig, forceRestore bool) (execdriver.ExitStatus, error) { + return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Windows: Containers cannot be restored") +} diff --git a/daemon/inspect.go b/daemon/inspect.go index 0bff1cba541fb..bcc84bf936eca 100644 --- a/daemon/inspect.go +++ b/daemon/inspect.go @@ -42,16 +42,18 @@ func (daemon *Daemon) getInspectData(container *Container) (*types.ContainerJSON } containerState := &types.ContainerState{ - Running: container.State.Running, - Paused: container.State.Paused, - Restarting: container.State.Restarting, - OOMKilled: container.State.OOMKilled, - Dead: container.State.Dead, - Pid: container.State.Pid, - ExitCode: container.State.ExitCode, - Error: container.State.Error, - StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), - FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + Running: container.State.Running, + Paused: container.State.Paused, + Checkpointed: container.State.Checkpointed, + Restarting: container.State.Restarting, + OOMKilled: container.State.OOMKilled, + Dead: container.State.Dead, + Pid: container.State.Pid, + ExitCode: container.State.ExitCode, + Error: container.State.Error, + StartedAt: container.State.StartedAt.Format(time.RFC3339Nano), + FinishedAt: container.State.FinishedAt.Format(time.RFC3339Nano), + CheckpointedAt: container.State.CheckpointedAt.Format(time.RFC3339Nano), } contJSONBase := &types.ContainerJSONBase{ diff --git a/daemon/monitor.go b/daemon/monitor.go index 1f020574b0523..de1c77ee36205 100644 --- a/daemon/monitor.go +++ b/daemon/monitor.go @@ -47,6 +47,9 @@ type containerMonitor struct { // left waiting for nothing to happen during this time stopChan chan struct{} + // like startSignal but for restoring a container + restoreSignal chan struct{} + // timeIncrement is the amount of time to wait between restarts // this is in milliseconds timeIncrement int @@ -64,6 +67,7 @@ func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) * timeIncrement: defaultTimeIncrement, stopChan: make(chan struct{}), startSignal: make(chan struct{}), + restoreSignal: make(chan struct{}), } } @@ -184,6 +188,51 @@ func (m *containerMonitor) Start() error { } } +// Like Start() but for restoring a container. +func (m *containerMonitor) Restore(opts *runconfig.CriuConfig, forceRestore bool) error { + var ( + err error + // XXX The following line should be changed to + // exitStatus execdriver.ExitStatus to match Start() + exitCode execdriver.ExitStatus + afterRestore bool + ) + defer func() { + if afterRestore { + m.container.Lock() + m.container.setStopped(&execdriver.ExitStatus{exitCode.ExitCode, false}) + defer m.container.Unlock() + } + m.Close() + }() + + // FIXME: right now if we startLogging again we get double logs after a restore + if m.container.logCopier == nil { + if err := m.container.startLogging(); err != nil { + m.resetContainer(false) + return err + } + } + + pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin) + + m.container.LogEvent("restore") + m.lastStartTime = time.Now() + if exitCode, err = m.container.daemon.Restore(m.container, pipes, m.restoreCallback, opts, forceRestore); err != nil { + logrus.Errorf("Error restoring container: %s, exitCode=%d", err, exitCode) + m.container.ExitCode = -1 + m.resetContainer(false) + return err + } + afterRestore = true + + m.container.ExitCode = exitCode.ExitCode + m.resetMonitor(err == nil && exitCode.ExitCode == 0) + m.container.LogEvent("die") + m.resetContainer(true) + return err +} + // resetMonitor resets the stateful fields on the containerMonitor based on the // previous runs success or failure. Regardless of success, if the container had // an execution time of more than 10s then reset the timer back to the default @@ -270,6 +319,29 @@ func (m *containerMonitor) callback(processConfig *execdriver.ProcessConfig, pid } } +// Like callback() but for restoring a container. +func (m *containerMonitor) restoreCallback(processConfig *execdriver.ProcessConfig, restorePid int) { + // If restorePid is 0, it means that restore failed. + if restorePid != 0 { + m.container.setRunning(restorePid) + } + + // Unblock the goroutine waiting in waitForRestore(). + select { + case <-m.restoreSignal: + default: + close(m.restoreSignal) + } + + if restorePid != 0 { + // Write config.json and hostconfig.json files + // to /var/lib/docker/containers/. + if err := m.container.ToDisk(); err != nil { + logrus.Debugf("%s", err) + } + } +} + // resetContainer resets the container's IO and ensures that the command is able to be executed again // by copying the data into a new struct // if lock is true, then container locked during reset diff --git a/daemon/state.go b/daemon/state.go index 861671d7adf42..f52aa395d174b 100644 --- a/daemon/state.go +++ b/daemon/state.go @@ -14,6 +14,7 @@ type State struct { Running bool Paused bool Restarting bool + Checkpointed bool OOMKilled bool removalInProgress bool // Not need for this to be persistent on disk. Dead bool @@ -22,6 +23,7 @@ type State struct { Error string // contains last known error when starting the container StartedAt time.Time FinishedAt time.Time + CheckpointedAt time.Time waitChan chan struct{} } @@ -48,6 +50,10 @@ func (s *State) String() string { return "Removal In Progress" } + if s.Checkpointed { + return fmt.Sprintf("Checkpointed %s ago", units.HumanDuration(time.Now().UTC().Sub(s.CheckpointedAt))) + } + if s.Dead { return "Dead" } @@ -75,6 +81,10 @@ func (s *State) StateString() string { return "running" } + if s.Checkpointed { + return "checkpointed'" + } + if s.Dead { return "dead" } @@ -178,6 +188,7 @@ func (s *State) setRunning(pid int) { s.Error = "" s.Running = true s.Paused = false + s.Checkpointed = false s.Restarting = false s.ExitCode = 0 s.Pid = pid @@ -274,3 +285,24 @@ func (s *State) SetDead() { s.Dead = true s.Unlock() } + +func (s *State) SetCheckpointed(leaveRunning bool) { + s.Lock() + s.CheckpointedAt = time.Now().UTC() + s.Checkpointed = !leaveRunning + s.Running = leaveRunning + s.Paused = false + s.Restarting = false + // XXX Not sure if we need to close and recreate waitChan. + // close(s.waitChan) + // s.waitChan = make(chan struct{}) + s.Unlock() +} + +func (s *State) HasBeenCheckpointed() bool { + return s.CheckpointedAt != time.Time{} +} + +func (s *State) IsCheckpointed() bool { + return s.Checkpointed +} diff --git a/docker/docker.go b/docker/docker.go index 8ad0d13c05c1a..fb0a5240e6936 100644 --- a/docker/docker.go +++ b/docker/docker.go @@ -3,6 +3,7 @@ package main import ( "fmt" "os" + "sort" "github.com/Sirupsen/logrus" "github.com/docker/docker/api/client" @@ -35,7 +36,10 @@ func main() { help := "\nCommands:\n" - for _, cmd := range dockerCommands { + allCommands := append(dockerCommands, experimentalCommands...) + sort.Sort(byName(allCommands)) + + for _, cmd := range allCommands { help += fmt.Sprintf(" %-10.10s%s\n", cmd.name, cmd.description) } diff --git a/docker/flags_experimental.go b/docker/flags_experimental.go new file mode 100644 index 0000000000000..08893a7cf0b49 --- /dev/null +++ b/docker/flags_experimental.go @@ -0,0 +1,10 @@ +// +build experimental + +package main + +var ( + experimentalCommands = []command{ + {"checkpoint", "Checkpoint one or more running containers"}, + {"restore", "Restore one or more checkpointed containers"}, + } +) diff --git a/docker/flags_stub.go b/docker/flags_stub.go new file mode 100644 index 0000000000000..d627b86a3a8f7 --- /dev/null +++ b/docker/flags_stub.go @@ -0,0 +1,7 @@ +// +build !experimental + +package main + +var ( + experimentalCommands = []command{} +) diff --git a/integration-cli/docker_cli_checkpoint_test.go b/integration-cli/docker_cli_checkpoint_test.go new file mode 100644 index 0000000000000..09ec47a9a0d54 --- /dev/null +++ b/integration-cli/docker_cli_checkpoint_test.go @@ -0,0 +1,39 @@ +// +build experimental + +package main + +import ( + "os/exec" + "strings" + + "github.com/go-check/check" +) + +func (s *DockerSuite) TestCheckpointAndRestore(c *check.C) { + defer unpauseAllContainers() + + runCmd := exec.Command(dockerBinary, "run", "-d", "busybox", "top") + out, _, err := runCommandWithOutput(runCmd) + if err != nil { + c.Fatalf("failed to run container: %v, output: %q", err, out) + } + + containerID := strings.TrimSpace(out) + checkpointCmd := exec.Command(dockerBinary, "checkpoint", containerID) + out, _, err = runCommandWithOutput(checkpointCmd) + if err != nil { + c.Fatalf("failed to checkpoint container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "true") + + restoreCmd := exec.Command(dockerBinary, "restore", containerID) + out, _, _, err = runCommandWithStdoutStderr(restoreCmd) + if err != nil { + c.Fatalf("failed to restore container: %v, output: %q", err, out) + } + + out, err = inspectField(containerID, "State.Checkpointed") + c.Assert(out, check.Equals, "false") +} diff --git a/integration-cli/docker_cli_help_experimental_test.go b/integration-cli/docker_cli_help_experimental_test.go new file mode 100644 index 0000000000000..55080154100a3 --- /dev/null +++ b/integration-cli/docker_cli_help_experimental_test.go @@ -0,0 +1,5 @@ +// +build experimental + +package main + +var totalDockerCLICommands = 41 diff --git a/integration-cli/docker_cli_help_standard_test.go b/integration-cli/docker_cli_help_standard_test.go new file mode 100644 index 0000000000000..f889ba4b27731 --- /dev/null +++ b/integration-cli/docker_cli_help_standard_test.go @@ -0,0 +1,5 @@ +// +build !experimental + +package main + +var totalDockerCLICommands = 39 diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index 311308359ac9d..dc317ddb348d6 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -238,13 +238,13 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { } - expected := 39 + expected := totalDockerCLICommands if isLocalDaemon { expected++ // for the daemon command } if len(cmds) != expected { c.Fatalf("Wrong # of cmds(%d), it should be: %d\nThe list:\n%q", - len(cmds), expected, cmds) + len(cmds), totalDockerCLICommands, cmds) } } diff --git a/project/PACKAGERS.md b/project/PACKAGERS.md index 22f24b4789b77..5ba406cedf009 100644 --- a/project/PACKAGERS.md +++ b/project/PACKAGERS.md @@ -303,6 +303,9 @@ by having support for them in the kernel or userspace. A few examples include: least the "auplink" utility from aufs-tools) * BTRFS graph driver (requires BTRFS support enabled in the kernel) * ZFS graph driver (requires userspace zfs-utils and a corresponding kernel module) +* Checkpoint/Restore containers: + - requires criu version 1.5.2 or later (criu.org) + - requires kernel version 3.19 or later if using overlay-fs ## Daemon Init Script diff --git a/runconfig/restore.go b/runconfig/restore.go new file mode 100644 index 0000000000000..70deac8e8683a --- /dev/null +++ b/runconfig/restore.go @@ -0,0 +1,21 @@ +package runconfig + +type VethPairName struct { + InName string + OutName string +} + +type CriuConfig struct { + ImagesDirectory string + WorkDirectory string + LeaveRunning bool + TcpEstablished bool + ExternalUnixConnections bool + ShellJob bool + VethPairs []VethPairName +} + +type RestoreConfig struct { + CriuOpts CriuConfig + ForceRestore bool +} diff --git a/vendor/src/github.com/docker/libnetwork/endpoint.go b/vendor/src/github.com/docker/libnetwork/endpoint.go index 3475e9eafda04..b0354a03bc6e1 100644 --- a/vendor/src/github.com/docker/libnetwork/endpoint.go +++ b/vendor/src/github.com/docker/libnetwork/endpoint.go @@ -54,6 +54,9 @@ type Endpoint interface { // Retrieve the interfaces' statistics from the sandbox Statistics() (map[string]*sandbox.InterfaceStatistics, error) + + // Retrieve the interfaces from sandbox, for restoring checkpointed container + SandboxInterfaces() []sandbox.Interface } // EndpointOption is a option setter function type used to pass varios options to Network @@ -583,6 +586,17 @@ func (ep *endpoint) Statistics() (map[string]*sandbox.InterfaceStatistics, error return m, nil } +func (ep *endpoint) SandboxInterfaces() []sandbox.Interface { + n := ep.network + + n.Lock() + c := n.ctrlr + n.Unlock() + + sbox := c.sandboxGet(ep.container.data.SandboxKey) + return sbox.Info().Interfaces() +} + func (ep *endpoint) deleteEndpoint() error { ep.Lock() n := ep.network diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go index 9a27eb432faad..57d565ab69bc7 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -520,6 +520,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { break } } + + /* XXX, we can not get interface from config */ for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -532,6 +534,14 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { break } } + /* Use criuop to pass veth pairs */ + for _, i := range criuOpts.VethPairs { + outname := i.OutName + "@docker0" + veth := new(criurpc.CriuVethPair) + veth.IfOut = proto.String(outname) + veth.IfIn = proto.String(i.InName) + req.Opts.Veths = append(req.Opts.Veths, veth) + } var ( fds []string diff --git a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go index bca81672eac8b..1e3c79e6ce9db 100644 --- a/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go +++ b/vendor/src/github.com/opencontainers/runc/libcontainer/criu_opts.go @@ -5,6 +5,11 @@ type CriuPageServerInfo struct { Port int32 // port number of CRIU page server } +type VethPairName struct { + InName string + OutName string +} + type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to @@ -14,4 +19,5 @@ type CriuOpts struct { ShellJob bool // allow to dump and restore shell jobs FileLocks bool // handle file locks, for safety PageServer CriuPageServerInfo // allow to dump to criu page server + VethPairs []VethPairName // pass the veth to criu when restore }