Skip to content

Commit

Permalink
docker: fix bug in waiting for container to exit (#24081)
Browse files Browse the repository at this point in the history
In ##23966 when we switched to using the official Docker SDK client, we had more
contexts to add because most of the library methods take one. But for some APIs
like waiting for a container to exit after we've started it, we never want to
close this context, because the operation can outlive the Nomad agent itself.
  • Loading branch information
tgross authored Sep 30, 2024
1 parent 242de8a commit 154aeb7
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions drivers/docker/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ func (h *taskHandle) run() {

h.startCpusetFixer()

ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

var werr error
var exitCode containerapi.WaitResponse
exitCodeC, errC := h.infinityClient.ContainerWait(ctx, h.containerID, containerapi.WaitConditionNotRunning)
// this needs to use the background context because the container can
// outlive Nomad itself
exitCodeC, errC := h.infinityClient.ContainerWait(
context.Background(), h.containerID, containerapi.WaitConditionNotRunning)

select {
case exitCode = <-exitCodeC:
Expand All @@ -308,6 +308,9 @@ func (h *taskHandle) run() {
h.logger.Error("failed to wait for container; already terminated")
}

ctx, inspectCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer inspectCancel()

container, ierr := h.dockerClient.ContainerInspect(ctx, h.containerID)
oom := false
if ierr != nil {
Expand All @@ -331,7 +334,10 @@ func (h *taskHandle) run() {
close(h.doneCh)

// Stop the container just incase the docker daemon's wait returned
// incorrectly.
// incorrectly. Container should have exited by now so kill_timeout can be
// ignored.
ctx, stopCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer stopCancel()
if err := h.dockerClient.ContainerStop(ctx, h.containerID, containerapi.StopOptions{
Timeout: pointer.Of(0),
}); err != nil {
Expand Down

0 comments on commit 154aeb7

Please sign in to comment.