diff --git a/drivers/docker/handle.go b/drivers/docker/handle.go index 34d8e305d5c..50042d0bb8f 100644 --- a/drivers/docker/handle.go +++ b/drivers/docker/handle.go @@ -292,12 +292,12 @@ func (h *taskHandle) run() { h.startCpusetFixer() - ctx, cancel := context.WithTimeout(context.Background(), dockerTimeout) - defer cancel() - var werr error var exitCode containerapi.WaitResponse - exitCodeC, errC := h.infinityClient.ContainerWait(ctx, h.containerID, containerapi.WaitConditionNotRunning) + // this needs to use the background context because the container can + // outlive Nomad itself + exitCodeC, errC := h.infinityClient.ContainerWait( + context.Background(), h.containerID, containerapi.WaitConditionNotRunning) select { case exitCode = <-exitCodeC: @@ -308,6 +308,9 @@ func (h *taskHandle) run() { h.logger.Error("failed to wait for container; already terminated") } + ctx, inspectCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer inspectCancel() + container, ierr := h.dockerClient.ContainerInspect(ctx, h.containerID) oom := false if ierr != nil { @@ -331,7 +334,10 @@ func (h *taskHandle) run() { close(h.doneCh) // Stop the container just incase the docker daemon's wait returned - // incorrectly. + // incorrectly. Container should have exited by now so kill_timeout can be + // ignored. + ctx, stopCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer stopCancel() if err := h.dockerClient.ContainerStop(ctx, h.containerID, containerapi.StopOptions{ Timeout: pointer.Of(0), }); err != nil {