Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions api/client/checkpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package client

import (
"fmt"

"github.com/docker/libcontainer"
)

func (cli *DockerCli) CmdCheckpoint(args ...string) error {
cmd := cli.Subcmd("checkpoint", "CONTAINER [CONTAINER...]", "Checkpoint one or more running containers", true)

var (
flImgDir = cmd.String([]string{"-image-dir"}, "", "(optional) directory for storing checkpoint image files")
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory for storing log file")
flLeaveRunning = cmd.Bool([]string{"-leave-running"}, false, "leave the container running after checkpointing")
flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow checkpointing established tcp connections")
flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow checkpointing external unix connections")
flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow checkpointing shell jobs")
)

if err := cmd.ParseFlags(args, true); err != nil {
return err
}

if cmd.NArg() < 1 {
cmd.Usage()
return nil
}

criuOpts := &libcontainer.CriuOpts{
ImagesDirectory: *flImgDir,
WorkDirectory: *flWorkDir,
LeaveRunning: *flLeaveRunning,
TcpEstablished: *flCheckTcp,
ExternalUnixConnections: *flExtUnix,
ShellJob: *flShell,
}

var encounteredError error
for _, name := range cmd.Args() {
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/checkpoint", criuOpts, nil))
if err != nil {
fmt.Fprintf(cli.err, "%s\n", err)
encounteredError = fmt.Errorf("Error: failed to checkpoint one or more containers")
} else {
fmt.Fprintf(cli.out, "%s\n", name)
}
}
return encounteredError
}
53 changes: 53 additions & 0 deletions api/client/restore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package client

import (
"fmt"

"github.com/docker/docker/runconfig"
"github.com/docker/libcontainer"
)

func (cli *DockerCli) CmdRestore(args ...string) error {
cmd := cli.Subcmd("restore", "CONTAINER [CONTAINER...]", "Restore one or more checkpointed containers", true)

var (
flImgDir = cmd.String([]string{"-image-dir"}, "", "(optional) directory to restore image files from")
flWorkDir = cmd.String([]string{"-work-dir"}, "", "directory to store temp files and restore.log")
flCheckTcp = cmd.Bool([]string{"-allow-tcp"}, false, "allow restoring tcp connections")
flExtUnix = cmd.Bool([]string{"-allow-ext-unix"}, false, "allow restoring external unix connections")
flShell = cmd.Bool([]string{"-allow-shell"}, false, "allow restoring shell jobs")
flForce = cmd.Bool([]string{"-force"}, false, "try bypassing checks for current container state")
)

if err := cmd.ParseFlags(args, true); err != nil {
return err
}

if cmd.NArg() < 1 {
cmd.Usage()
return nil
}

restoreOpts := &runconfig.RestoreConfig {
CriuOpts: libcontainer.CriuOpts {
ImagesDirectory: *flImgDir,
WorkDirectory: *flWorkDir,
TcpEstablished: *flCheckTcp,
ExternalUnixConnections: *flExtUnix,
ShellJob: *flShell,
},
ForceRestore: *flForce,
}

var encounteredError error
for _, name := range cmd.Args() {
_, _, err := readBody(cli.call("POST", "/containers/"+name+"/restore", restoreOpts, nil))
if err != nil {
fmt.Fprintf(cli.err, "%s\n", err)
encounteredError = fmt.Errorf("Error: failed to restore one or more containers")
} else {
fmt.Fprintf(cli.out, "%s\n", name)
}
}
return encounteredError
}
89 changes: 67 additions & 22 deletions api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/docker/docker/pkg/version"
"github.com/docker/docker/runconfig"
"github.com/docker/docker/utils"
"github.com/docker/libcontainer"
"github.com/docker/libnetwork/portallocator"
)

Expand Down Expand Up @@ -1286,6 +1287,48 @@ func (s *Server) postContainersCopy(version version.Version, w http.ResponseWrit
return nil
}

func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}

criuOpts := &libcontainer.CriuOpts{}
if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil {
return err
}

if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}

func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}

restoreOpts := runconfig.RestoreConfig{}
if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil {
return err
}

if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}

func (s *Server) postContainerExecCreate(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := parseForm(r); err != nil {
return nil
Expand Down Expand Up @@ -1488,28 +1531,30 @@ func createRouter(s *Server) *mux.Router {
"/exec/{id:.*}/json": s.getExecByID,
},
"POST": {
"/auth": s.postAuth,
"/commit": s.postCommit,
"/build": s.postBuild,
"/images/create": s.postImagesCreate,
"/images/load": s.postImagesLoad,
"/images/{name:.*}/push": s.postImagesPush,
"/images/{name:.*}/tag": s.postImagesTag,
"/containers/create": s.postContainersCreate,
"/containers/{name:.*}/kill": s.postContainersKill,
"/containers/{name:.*}/pause": s.postContainersPause,
"/containers/{name:.*}/unpause": s.postContainersUnpause,
"/containers/{name:.*}/restart": s.postContainersRestart,
"/containers/{name:.*}/start": s.postContainersStart,
"/containers/{name:.*}/stop": s.postContainersStop,
"/containers/{name:.*}/wait": s.postContainersWait,
"/containers/{name:.*}/resize": s.postContainersResize,
"/containers/{name:.*}/attach": s.postContainersAttach,
"/containers/{name:.*}/copy": s.postContainersCopy,
"/containers/{name:.*}/exec": s.postContainerExecCreate,
"/exec/{name:.*}/start": s.postContainerExecStart,
"/exec/{name:.*}/resize": s.postContainerExecResize,
"/containers/{name:.*}/rename": s.postContainerRename,
"/auth": s.postAuth,
"/commit": s.postCommit,
"/build": s.postBuild,
"/images/create": s.postImagesCreate,
"/images/load": s.postImagesLoad,
"/images/{name:.*}/push": s.postImagesPush,
"/images/{name:.*}/tag": s.postImagesTag,
"/containers/create": s.postContainersCreate,
"/containers/{name:.*}/kill": s.postContainersKill,
"/containers/{name:.*}/pause": s.postContainersPause,
"/containers/{name:.*}/unpause": s.postContainersUnpause,
"/containers/{name:.*}/restart": s.postContainersRestart,
"/containers/{name:.*}/start": s.postContainersStart,
"/containers/{name:.*}/stop": s.postContainersStop,
"/containers/{name:.*}/wait": s.postContainersWait,
"/containers/{name:.*}/resize": s.postContainersResize,
"/containers/{name:.*}/attach": s.postContainersAttach,
"/containers/{name:.*}/copy": s.postContainersCopy,
"/containers/{name:.*}/exec": s.postContainerExecCreate,
"/exec/{name:.*}/start": s.postContainerExecStart,
"/exec/{name:.*}/resize": s.postContainerExecResize,
"/containers/{name:.*}/rename": s.postContainerRename,
"/containers/{name:.*}/checkpoint": s.postContainersCheckpoint,
"/containers/{name:.*}/restore": s.postContainersRestore,
},
"DELETE": {
"/containers/{name:.*}": s.deleteContainers,
Expand Down
56 changes: 56 additions & 0 deletions daemon/checkpoint.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package daemon

import (
"fmt"

"github.com/docker/libcontainer"
)

// Checkpoint a running container.
func (daemon *Daemon) ContainerCheckpoint(name string, opts *libcontainer.CriuOpts) error {
container, err := daemon.Get(name)
if err != nil {
return err
}
if !container.IsRunning() {
return fmt.Errorf("Container %s not running", name)
}
if err := container.Checkpoint(opts); err != nil {
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}

container.LogEvent("checkpoint")
return nil
}

// Restore a checkpointed container.
func (daemon *Daemon) ContainerRestore(name string, opts *libcontainer.CriuOpts, forceRestore bool) error {
container, err := daemon.Get(name)
if err != nil {
return err
}

if !forceRestore {
// TODO: It's possible we only want to bypass the checkpointed check,
// I'm not sure how this will work if the container is already running
if container.IsRunning() {
return fmt.Errorf("Container %s already running", name)
}

if !container.IsCheckpointed() {
return fmt.Errorf("Container %s is not checkpointed", name)
}
} else {
if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" {
return fmt.Errorf("You must specify an image directory to restore from %s", name)
}
}

if err = container.Restore(opts, forceRestore); err != nil {
container.LogEvent("die")
return fmt.Errorf("Cannot restore container %s: %s", name, err)
}

container.LogEvent("restore")
return nil
}
85 changes: 83 additions & 2 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"syscall"
"time"

"github.com/docker/libcontainer"
"github.com/docker/libcontainer/label"

"github.com/Sirupsen/logrus"
Expand Down Expand Up @@ -255,7 +256,7 @@ func (container *Container) Start() (err error) {
if err := container.Mount(); err != nil {
return err
}
if err := container.initializeNetworking(); err != nil {
if err := container.initializeNetworking(false); err != nil {
return err
}
container.verifyDaemonSettings()
Expand Down Expand Up @@ -342,7 +343,11 @@ func (container *Container) isNetworkAllocated() bool {
// cleanup releases any network resources allocated to the container along with any rules
// around how containers are linked together. It also unmounts the container's root filesystem.
func (container *Container) cleanup() {
container.ReleaseNetwork()
if container.IsCheckpointed() {
logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
} else {
container.ReleaseNetwork()
}

disableAllActiveLinks(container)

Expand Down Expand Up @@ -564,6 +569,58 @@ func validateID(id string) error {
return nil
}


func (container *Container) Checkpoint(opts *libcontainer.CriuOpts) error {
if err := container.daemon.Checkpoint(container, opts); err != nil {
return err
}

if opts.LeaveRunning == false {
container.ReleaseNetwork()
}
return nil
}

// XXX Start() does a lot more. Not sure if we have
// to do everything it does.
func (container *Container) Restore(opts *libcontainer.CriuOpts, forceRestore bool) error {
var err error
container.Lock()
defer container.Unlock()

defer func() {
if err != nil {
container.cleanup()
}
}()
if err := container.Mount(); err != nil {
return err
}
if err = container.initializeNetworking(true); err != nil {
return err
}
container.verifyDaemonSettings()

linkedEnv, err := container.setupLinkedContainers()
if err != nil {
return err
}
if err = container.setupWorkingDirectory(); err != nil {
return err
}

env := container.createDaemonEnvironment(linkedEnv)
if err = populateCommand(container, env); err != nil {
return err
}

if err = container.setupMounts(); err != nil {
return err
}

return container.waitForRestore(opts, forceRestore)
}

func (container *Container) Copy(resource string) (io.ReadCloser, error) {
container.Lock()
defer container.Unlock()
Expand Down Expand Up @@ -807,6 +864,29 @@ func (container *Container) monitorExec(execConfig *execConfig, callback execdri
return err
}

// Like waitForStart() but for restoring a container.
//
// XXX Does RestartPolicy apply here?
func (container *Container) waitForRestore(opts *libcontainer.CriuOpts, forceRestore bool) error {
container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)

// After calling promise.Go() we'll have two goroutines:
// - The current goroutine that will block in the select
// below until restore is done.
// - A new goroutine that will restore the container and
// wait for it to exit.
select {
case <-container.monitor.restoreSignal:
if container.ExitCode != 0 {
return fmt.Errorf("restore process failed")
}
case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }):
return err
}

return nil
}

func (c *Container) Attach(stdin io.ReadCloser, stdout io.Writer, stderr io.Writer) chan error {
return attach(&c.StreamConfig, c.Config.OpenStdin, c.Config.StdinOnce, c.Config.Tty, stdin, stdout, stderr)
}
Expand Down Expand Up @@ -886,6 +966,7 @@ func attach(streamConfig *StreamConfig, openStdin, stdinOnce, tty bool, stdin io
wg.Add(1)
}


// Connect stdin of container to the http conn.
go func() {
if stdin == nil || !openStdin {
Expand Down
Loading