Skip to content

Commit 246f062

Browse files
committed
Update checkpoint/restore support to match docker/master
Docker-DCO-1.1-Signed-off-by: Ross Boucher <rboucher@gmail.com> (github: boucher)
1 parent 243c8d0 commit 246f062

File tree

4 files changed

+69
-143
lines changed

4 files changed

+69
-143
lines changed

daemon/execdriver/driver.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ type Driver interface {
6060
Kill(c *Command, sig int) error
6161
Pause(c *Command) error
6262
Unpause(c *Command) error
63-
Checkpoint(c *Command) error
64-
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
63+
Checkpoint(c *Command, opts *libcontainer.CriuOpts) error
64+
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (ExitStatus, error)
6565
Name() string // Driver name
6666
Info(id string) Info // "temporary" hack (until we move state from core to plugins)
6767
GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.

daemon/execdriver/lxc/driver.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,12 @@ func (d *driver) Unpause(c *execdriver.Command) error {
547547
return err
548548
}
549549

550-
func (d *driver) Checkpoint(c *execdriver.Command) error {
550+
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
551551
return fmt.Errorf("Checkpointing lxc containers not supported yet\n")
552552
}
553553

554-
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
555-
return 0, fmt.Errorf("Restoring lxc containers not supported yet\n")
554+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
555+
return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n")
556556
}
557557

558558
func (d *driver) Terminate(c *execdriver.Command) error {

daemon/execdriver/native/create.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package native
44

55
import (
66
"errors"
7-
"encoding/json"
87
"fmt"
98
"net"
109
"strings"
@@ -89,24 +88,6 @@ func generateIfaceName() (string, error) {
8988
return "", errors.New("Failed to find name for new interface")
9089
}
9190

92-
// Re-create the container type from the image that was saved during checkpoint.
93-
func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) {
94-
// Read the container.json.
95-
f1, err := os.Open(filepath.Join(imageDir, "container.json"))
96-
if err != nil {
97-
return nil, err
98-
}
99-
defer f1.Close()
100-
101-
var container *libcontainer.Config
102-
err = json.NewDecoder(f1).Decode(&container)
103-
if err != nil {
104-
return nil, err
105-
}
106-
107-
return container, nil
108-
}
109-
11091
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
11192
if c.Network.ContainerID != "" {
11293
d.Lock()

daemon/execdriver/native/driver.go

Lines changed: 64 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/docker/docker/pkg/reexec"
2020
sysinfo "github.com/docker/docker/pkg/system"
2121
"github.com/docker/docker/pkg/term"
22-
"github.com/docker/docker/utils"
2322
"github.com/docker/libcontainer"
2423
"github.com/docker/libcontainer/apparmor"
2524
"github.com/docker/libcontainer/cgroups/systemd"
@@ -275,153 +274,99 @@ func (d *driver) Unpause(c *execdriver.Command) error {
275274
return active.Resume()
276275
}
277276

278-
// XXX Where is the right place for the following
279-
// const and getCheckpointImageDir() function?
280-
const (
281-
containersDir = "/var/lib/docker/containers"
282-
criuImgDir = "criu_img"
283-
)
284-
285-
func getCheckpointImageDir(containerId string) string {
286-
return filepath.Join(containersDir, containerId, criuImgDir)
287-
}
288-
289-
func (d *driver) Checkpoint(c *execdriver.Command) error {
277+
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
290278
active := d.activeContainers[c.ID]
291279
if active == nil {
292280
return fmt.Errorf("active container for %s does not exist", c.ID)
293281
}
294-
container := active.container
295-
296-
// Create an image directory for this container (which
297-
// may already exist from a previous checkpoint).
298-
imageDir := getCheckpointImageDir(c.ID)
299-
err := os.MkdirAll(imageDir, 0700)
300-
if err != nil && !os.IsExist(err) {
301-
return err
302-
}
303-
304-
// Copy container.json and state.json files to the CRIU
305-
// image directory for later use during restore. Do this
306-
// before checkpointing because after checkpoint the container
307-
// will exit and these files will be removed.
308-
log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
309-
srcFiles := []string{"container.json", "state.json"}
310-
for _, f := range srcFiles {
311-
srcFile := filepath.Join(d.root, c.ID, f)
312-
dstFile := filepath.Join(imageDir, f)
313-
if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
314-
return err
315-
}
316-
}
317282

318283
d.Lock()
319284
defer d.Unlock()
320-
err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
285+
err := active.Checkpoint(opts)
321286
if err != nil {
322287
return err
323288
}
324289

325290
return nil
326291
}
327292

328-
type restoreOutput struct {
329-
exitCode int
330-
err error
331-
}
293+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
294+
var (
295+
cont libcontainer.Container
296+
err error
297+
)
332298

333-
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
334-
imageDir := getCheckpointImageDir(c.ID)
335-
container, err := d.createRestoreContainer(c, imageDir)
299+
cont, err = d.factory.Load(c.ID)
336300
if err != nil {
337-
return 1, err
301+
if forceRestore {
302+
var config *configs.Config
303+
config, err = d.createContainer(c)
304+
if err != nil {
305+
return execdriver.ExitStatus{ExitCode: -1}, err
306+
}
307+
cont, err = d.factory.Create(c.ID, config)
308+
if err != nil {
309+
return execdriver.ExitStatus{ExitCode: -1}, err
310+
}
311+
} else {
312+
return execdriver.ExitStatus{ExitCode: -1}, err
313+
}
338314
}
339315

340-
var term execdriver.Terminal
341-
342-
if c.ProcessConfig.Tty {
343-
term, err = NewTtyConsole(&c.ProcessConfig, pipes)
344-
} else {
345-
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
316+
p := &libcontainer.Process{
317+
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
318+
Env: c.ProcessConfig.Env,
319+
Cwd: c.WorkingDir,
320+
User: c.ProcessConfig.User,
346321
}
347-
if err != nil {
348-
return -1, err
322+
323+
config := cont.Config()
324+
if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil {
325+
return execdriver.ExitStatus{ExitCode: -1}, err
349326
}
350-
c.ProcessConfig.Terminal = term
351327

352328
d.Lock()
353-
d.activeContainers[c.ID] = &activeContainer{
354-
container: container,
355-
cmd: &c.ProcessConfig.Cmd,
356-
}
329+
d.activeContainers[c.ID] = cont
357330
d.Unlock()
358-
defer d.cleanContainer(c.ID)
331+
defer func() {
332+
cont.Destroy()
333+
d.cleanContainer(c.ID)
334+
}()
359335

360-
// Since the CRIU binary exits after restoring the container, we
361-
// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
362-
// so that it'll be owned by this process (Docker daemon) after restore.
363-
//
364-
// XXX This really belongs to where the Docker daemon starts.
365-
if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
366-
return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
336+
if err := cont.Restore(p, opts); err != nil {
337+
return execdriver.ExitStatus{ExitCode: -1}, err
367338
}
368339

369-
restoreOutputChan := make(chan restoreOutput, 1)
370-
waitForRestore := make(chan struct{})
371-
372-
go func() {
373-
exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
374-
func(child *os.File, args []string) *exec.Cmd {
375-
cmd := new(exec.Cmd)
376-
cmd.Path = d.initPath
377-
cmd.Args = append([]string{
378-
DriverName,
379-
"-restore",
380-
"-pipe", "3",
381-
"--",
382-
}, args...)
383-
cmd.ExtraFiles = []*os.File{child}
384-
return cmd
385-
},
386-
func(restorePid int) error {
387-
log.CRDbg("restorePid=%d", restorePid)
388-
if restorePid == 0 {
389-
restoreCallback(&c.ProcessConfig, 0)
390-
return nil
391-
}
392-
393-
// The container.json file should be written *after* the container
394-
// has started because its StdFds cannot be initialized before.
395-
//
396-
// XXX How do we handle error here?
397-
d.writeContainerFile(container, c.ID)
398-
close(waitForRestore)
399-
if restoreCallback != nil {
400-
c.ProcessConfig.Process, err = os.FindProcess(restorePid)
401-
if err != nil {
402-
log.Debugf("cannot find restored process %d", restorePid)
403-
return err
404-
}
405-
c.ContainerPid = c.ProcessConfig.Process.Pid
406-
restoreCallback(&c.ProcessConfig, c.ContainerPid)
407-
}
408-
return nil
409-
})
410-
restoreOutputChan <- restoreOutput{exitCode, err}
411-
}()
340+
// FIXME: no idea if any of this is needed...
341+
if restoreCallback != nil {
342+
pid, err := p.Pid()
343+
if err != nil {
344+
p.Signal(os.Kill)
345+
p.Wait()
346+
return execdriver.ExitStatus{ExitCode: -1}, err
347+
}
348+
restoreCallback(&c.ProcessConfig, pid)
349+
}
412350

413-
select {
414-
case restoreOutput := <-restoreOutputChan:
415-
// there was an error
416-
return restoreOutput.exitCode, restoreOutput.err
417-
case <-waitForRestore:
418-
// container restored
419-
break
351+
oom := notifyOnOOM(cont)
352+
waitF := p.Wait
353+
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
354+
// we need such hack for tracking processes with inherited fds,
355+
// because cmd.Wait() waiting for all streams to be copied
356+
waitF = waitInPIDHost(p, cont)
357+
}
358+
ps, err := waitF()
359+
if err != nil {
360+
execErr, ok := err.(*exec.ExitError)
361+
if !ok {
362+
return execdriver.ExitStatus{ExitCode: -1}, err
363+
}
364+
ps = execErr.ProcessState
420365
}
421366

422-
// Wait for the container to exit.
423-
restoreOutput := <-restoreOutputChan
424-
return restoreOutput.exitCode, restoreOutput.err
367+
cont.Destroy()
368+
_, oomKill := <-oom
369+
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
425370
}
426371

427372
func (d *driver) Terminate(c *execdriver.Command) error {

0 commit comments

Comments
 (0)