Skip to content

Commit

Permalink
docker: fix bug in waiting for container to exit
Browse files Browse the repository at this point in the history
In ##23966 when we switched to using the official Docker SDK client, we had more
contexts to add because most of the library methods take one. But for some APIs
like waiting for a container to exit after we've started it, we never want to
close this context, because the operation can outlive the Nomad agent itself.
  • Loading branch information
tgross committed Sep 27, 2024
1 parent 5f92ccb commit fd67c34
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions drivers/docker/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,12 +292,12 @@ func (h *taskHandle) run() {

h.startCpusetFixer()

ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout)
defer cancel()

var werr error
var exitCode containerapi.WaitResponse
exitCodeC, errC := h.infinityClient.ContainerWait(ctx, h.containerID, containerapi.WaitConditionNotRunning)
// this needs to use the background context because the container can
// outlive Nomad itself
exitCodeC, errC := h.infinityClient.ContainerWait(
context.Background(), h.containerID, containerapi.WaitConditionNotRunning)

select {
case exitCode = <-exitCodeC:
Expand All @@ -308,6 +308,9 @@ func (h *taskHandle) run() {
h.logger.Error("failed to wait for container; already terminated")
}

ctx, inspectCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer inspectCancel()

container, ierr := h.dockerClient.ContainerInspect(ctx, h.containerID)
oom := false
if ierr != nil {
Expand All @@ -331,7 +334,10 @@ func (h *taskHandle) run() {
close(h.doneCh)

// Stop the container just incase the docker daemon's wait returned
// incorrectly.
// incorrectly. Container should have exited by now so kill_timeout can be
// ignored.
ctx, stopCancel := context.WithTimeout(context.Background(), 10*time.Second)
defer stopCancel()
if err := h.dockerClient.ContainerStop(ctx, h.containerID, containerapi.StopOptions{
Timeout: pointer.Of(0),
}); err != nil {
Expand Down

0 comments on commit fd67c34

Please sign in to comment.