Skip to content

Commit 75826ad

Browse files
committed
Update checkpoint/restore support to match docker/master
Docker-DCO-1.1-Signed-off-by: Ross Boucher <rboucher@gmail.com> (github: boucher)
1 parent 591feba commit 75826ad

File tree

4 files changed

+69
-143
lines changed

4 files changed

+69
-143
lines changed

daemon/execdriver/driver.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ type Driver interface {
6060
Kill(c *Command, sig int) error
6161
Pause(c *Command) error
6262
Unpause(c *Command) error
63-
Checkpoint(c *Command) error
64-
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
63+
Checkpoint(c *Command, opts *libcontainer.CriuOpts) error
64+
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (ExitStatus, error)
6565
Name() string // Driver name
6666
Info(id string) Info // "temporary" hack (until we move state from core to plugins)
6767
GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.

daemon/execdriver/lxc/driver.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,12 @@ func (d *driver) Unpause(c *execdriver.Command) error {
547547
return err
548548
}
549549

550-
func (d *driver) Checkpoint(c *execdriver.Command) error {
550+
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
551551
return fmt.Errorf("Checkpointing lxc containers not supported yet\n")
552552
}
553553

554-
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
555-
return 0, fmt.Errorf("Restoring lxc containers not supported yet\n")
554+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
555+
return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n")
556556
}
557557

558558
func (d *driver) Terminate(c *execdriver.Command) error {

daemon/execdriver/native/create.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ package native
44

55
import (
66
"errors"
7-
"encoding/json"
87
"fmt"
98
"net"
109
"strings"
@@ -89,24 +88,6 @@ func generateIfaceName() (string, error) {
8988
return "", errors.New("Failed to find name for new interface")
9089
}
9190

92-
// Re-create the container type from the image that was saved during checkpoint.
93-
func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) {
94-
// Read the container.json.
95-
f1, err := os.Open(filepath.Join(imageDir, "container.json"))
96-
if err != nil {
97-
return nil, err
98-
}
99-
defer f1.Close()
100-
101-
var container *libcontainer.Config
102-
err = json.NewDecoder(f1).Decode(&container)
103-
if err != nil {
104-
return nil, err
105-
}
106-
107-
return container, nil
108-
}
109-
11091
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
11192
if c.Network == nil {
11293
return nil

daemon/execdriver/native/driver.go

Lines changed: 64 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"github.com/docker/docker/pkg/reexec"
2020
sysinfo "github.com/docker/docker/pkg/system"
2121
"github.com/docker/docker/pkg/term"
22-
"github.com/docker/docker/utils"
2322
"github.com/docker/libcontainer"
2423
"github.com/docker/libcontainer/apparmor"
2524
"github.com/docker/libcontainer/cgroups/systemd"
@@ -279,153 +278,99 @@ func (d *driver) Unpause(c *execdriver.Command) error {
279278
return active.Resume()
280279
}
281280

282-
// XXX Where is the right place for the following
283-
// const and getCheckpointImageDir() function?
284-
const (
285-
containersDir = "/var/lib/docker/containers"
286-
criuImgDir = "criu_img"
287-
)
288-
289-
func getCheckpointImageDir(containerId string) string {
290-
return filepath.Join(containersDir, containerId, criuImgDir)
291-
}
292-
293-
func (d *driver) Checkpoint(c *execdriver.Command) error {
281+
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
294282
active := d.activeContainers[c.ID]
295283
if active == nil {
296284
return fmt.Errorf("active container for %s does not exist", c.ID)
297285
}
298-
container := active.container
299-
300-
// Create an image directory for this container (which
301-
// may already exist from a previous checkpoint).
302-
imageDir := getCheckpointImageDir(c.ID)
303-
err := os.MkdirAll(imageDir, 0700)
304-
if err != nil && !os.IsExist(err) {
305-
return err
306-
}
307-
308-
// Copy container.json and state.json files to the CRIU
309-
// image directory for later use during restore. Do this
310-
// before checkpointing because after checkpoint the container
311-
// will exit and these files will be removed.
312-
log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
313-
srcFiles := []string{"container.json", "state.json"}
314-
for _, f := range srcFiles {
315-
srcFile := filepath.Join(d.root, c.ID, f)
316-
dstFile := filepath.Join(imageDir, f)
317-
if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
318-
return err
319-
}
320-
}
321286

322287
d.Lock()
323288
defer d.Unlock()
324-
err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
289+
err := active.Checkpoint(opts)
325290
if err != nil {
326291
return err
327292
}
328293

329294
return nil
330295
}
331296

332-
type restoreOutput struct {
333-
exitCode int
334-
err error
335-
}
297+
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
298+
var (
299+
cont libcontainer.Container
300+
err error
301+
)
336302

337-
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
338-
imageDir := getCheckpointImageDir(c.ID)
339-
container, err := d.createRestoreContainer(c, imageDir)
303+
cont, err = d.factory.Load(c.ID)
340304
if err != nil {
341-
return 1, err
305+
if forceRestore {
306+
var config *configs.Config
307+
config, err = d.createContainer(c)
308+
if err != nil {
309+
return execdriver.ExitStatus{ExitCode: -1}, err
310+
}
311+
cont, err = d.factory.Create(c.ID, config)
312+
if err != nil {
313+
return execdriver.ExitStatus{ExitCode: -1}, err
314+
}
315+
} else {
316+
return execdriver.ExitStatus{ExitCode: -1}, err
317+
}
342318
}
343319

344-
var term execdriver.Terminal
345-
346-
if c.ProcessConfig.Tty {
347-
term, err = NewTtyConsole(&c.ProcessConfig, pipes)
348-
} else {
349-
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
320+
p := &libcontainer.Process{
321+
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
322+
Env: c.ProcessConfig.Env,
323+
Cwd: c.WorkingDir,
324+
User: c.ProcessConfig.User,
350325
}
351-
if err != nil {
352-
return -1, err
326+
327+
config := cont.Config()
328+
if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil {
329+
return execdriver.ExitStatus{ExitCode: -1}, err
353330
}
354-
c.ProcessConfig.Terminal = term
355331

356332
d.Lock()
357-
d.activeContainers[c.ID] = &activeContainer{
358-
container: container,
359-
cmd: &c.ProcessConfig.Cmd,
360-
}
333+
d.activeContainers[c.ID] = cont
361334
d.Unlock()
362-
defer d.cleanContainer(c.ID)
335+
defer func() {
336+
cont.Destroy()
337+
d.cleanContainer(c.ID)
338+
}()
363339

364-
// Since the CRIU binary exits after restoring the container, we
365-
// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
366-
// so that it'll be owned by this process (Docker daemon) after restore.
367-
//
368-
// XXX This really belongs to where the Docker daemon starts.
369-
if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
370-
return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
340+
if err := cont.Restore(p, opts); err != nil {
341+
return execdriver.ExitStatus{ExitCode: -1}, err
371342
}
372343

373-
restoreOutputChan := make(chan restoreOutput, 1)
374-
waitForRestore := make(chan struct{})
375-
376-
go func() {
377-
exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
378-
func(child *os.File, args []string) *exec.Cmd {
379-
cmd := new(exec.Cmd)
380-
cmd.Path = d.initPath
381-
cmd.Args = append([]string{
382-
DriverName,
383-
"-restore",
384-
"-pipe", "3",
385-
"--",
386-
}, args...)
387-
cmd.ExtraFiles = []*os.File{child}
388-
return cmd
389-
},
390-
func(restorePid int) error {
391-
log.CRDbg("restorePid=%d", restorePid)
392-
if restorePid == 0 {
393-
restoreCallback(&c.ProcessConfig, 0)
394-
return nil
395-
}
396-
397-
// The container.json file should be written *after* the container
398-
// has started because its StdFds cannot be initialized before.
399-
//
400-
// XXX How do we handle error here?
401-
d.writeContainerFile(container, c.ID)
402-
close(waitForRestore)
403-
if restoreCallback != nil {
404-
c.ProcessConfig.Process, err = os.FindProcess(restorePid)
405-
if err != nil {
406-
log.Debugf("cannot find restored process %d", restorePid)
407-
return err
408-
}
409-
c.ContainerPid = c.ProcessConfig.Process.Pid
410-
restoreCallback(&c.ProcessConfig, c.ContainerPid)
411-
}
412-
return nil
413-
})
414-
restoreOutputChan <- restoreOutput{exitCode, err}
415-
}()
344+
// FIXME: no idea if any of this is needed...
345+
if restoreCallback != nil {
346+
pid, err := p.Pid()
347+
if err != nil {
348+
p.Signal(os.Kill)
349+
p.Wait()
350+
return execdriver.ExitStatus{ExitCode: -1}, err
351+
}
352+
restoreCallback(&c.ProcessConfig, pid)
353+
}
416354

417-
select {
418-
case restoreOutput := <-restoreOutputChan:
419-
// there was an error
420-
return restoreOutput.exitCode, restoreOutput.err
421-
case <-waitForRestore:
422-
// container restored
423-
break
355+
oom := notifyOnOOM(cont)
356+
waitF := p.Wait
357+
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
358+
// we need such hack for tracking processes with inherited fds,
359+
// because cmd.Wait() waiting for all streams to be copied
360+
waitF = waitInPIDHost(p, cont)
361+
}
362+
ps, err := waitF()
363+
if err != nil {
364+
execErr, ok := err.(*exec.ExitError)
365+
if !ok {
366+
return execdriver.ExitStatus{ExitCode: -1}, err
367+
}
368+
ps = execErr.ProcessState
424369
}
425370

426-
// Wait for the container to exit.
427-
restoreOutput := <-restoreOutputChan
428-
return restoreOutput.exitCode, restoreOutput.err
371+
cont.Destroy()
372+
_, oomKill := <-oom
373+
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
429374
}
430375

431376
func (d *driver) Terminate(c *execdriver.Command) error {

0 commit comments

Comments
 (0)