Skip to content

Commit

Permalink
kernel: do not protect TaskSet.liveTasks with TaskSet.mu
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 686665800
  • Loading branch information
nixprime authored and gvisor-bot committed Oct 26, 2024
1 parent 0b2cae1 commit fc070c2
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 25 deletions.
25 changes: 20 additions & 5 deletions pkg/sentry/kernel/kernel.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"errors"
"fmt"
"io"
"math"
"path/filepath"
"time"

Expand Down Expand Up @@ -1445,12 +1446,26 @@ func (k *Kernel) decRunningTasks() {
// WaitExited blocks until all tasks in k have exited. No tasks can be created
// after WaitExited returns.
func (k *Kernel) WaitExited() {
k.tasks.mu.Lock()
defer k.tasks.mu.Unlock()
k.tasks.noNewTasksIfZeroLive = true
for k.tasks.liveTasks != 0 {
k.tasks.zeroLiveTasksCond.Wait()
// Ensure that the most significant bit of k.tasks.liveTasks is unset,
// preventing k.tasks.newTask() from transitioning k.tasks.liveTasks out of
// 0.
for {
liveTasks := k.tasks.liveTasks.Load()
if liveTasks == 0 {
return
}
if liveTasks > 0 {
break
}
newLiveTasks := liveTasks &^ math.MinInt64
if k.tasks.liveTasks.CompareAndSwap(liveTasks, newLiveTasks) {
if newLiveTasks == 0 {
close(k.tasks.zeroLiveTasksC)
}
break
}
}
<-k.tasks.zeroLiveTasksC
}

// Kill requests that all tasks in k immediately exit as if group exiting with
Expand Down
16 changes: 15 additions & 1 deletion pkg/sentry/kernel/kernel_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,27 @@ package kernel

import (
"context"
"math"

"gvisor.dev/gvisor/pkg/tcpip"
)

// saveLiveTasks is invoked by stateify.
func (ts *TaskSet) saveLiveTasks() int64 {
// The MSB, which is cleared by Kernel.WaitExited(), is never saved and is
// always set again after restore, since whether Kernel.WaitExited() was
// called before checkpointing is not intended to apply after restore.
return ts.liveTasks.Load() &^ math.MinInt64
}

// loadLiveTasks is invoked by stateify.
func (ts *TaskSet) loadLiveTasks(_ context.Context, liveTasks int64) {
ts.liveTasks.Store(liveTasks | math.MinInt64)
}

// afterLoad is invoked by stateify.
func (ts *TaskSet) afterLoad(_ context.Context) {
ts.zeroLiveTasksCond.L = &ts.mu
ts.zeroLiveTasksC = make(chan struct{}, 0)
}

// saveDanglingEndpoints is invoked by stateify.
Expand Down
7 changes: 2 additions & 5 deletions pkg/sentry/kernel/task_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,9 @@ func (t *Task) run(threadID uintptr) {
t.p.Release()

ts := t.tg.pidns.owner
ts.mu.Lock()
ts.liveTasks--
if ts.liveTasks == 0 {
ts.zeroLiveTasksCond.Broadcast()
if ts.liveTasks.Add(-1) == 0 {
close(ts.zeroLiveTasksC)
}
ts.mu.Unlock()
ts.runningGoroutines.Done()

// Deferring this store triggers a false positive in the race
Expand Down
10 changes: 7 additions & 3 deletions pkg/sentry/kernel/task_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,20 +226,24 @@ func (ts *TaskSet) newTask(ctx context.Context, cfg *TaskConfig) (*Task, error)
// we're in uncharted territory and can return whatever we want.
return nil, linuxerr.EINTR
}
if ts.liveTasks == 0 && ts.noNewTasksIfZeroLive {
if ts.liveTasks.Add(1) == 1 {
// ts.mu is locked, so this can't race with concurrent calls to
// ts.newTask().
ts.liveTasks.Add(-1)
// Since liveTasks == 0, our caller cannot be a task goroutine invoking
// a syscall, so it's safe to return a non-errno error that is more
// explanatory.
return nil, fmt.Errorf("task creation disabled after Kernel.WaitExited() may have returned")
}
if err := ts.assignTIDsLocked(t); err != nil {
if ts.liveTasks.Add(-1) == 0 {
close(ts.zeroLiveTasksC)
}
return nil, err
}
// Below this point, newTask is expected not to fail (there is no rollback
// of assignTIDsLocked or any of the following).

ts.liveTasks++

// Logging on t's behalf will panic if t.logPrefix hasn't been
// initialized. This is the earliest point at which we can do so
// (since t now has thread IDs).
Expand Down
25 changes: 14 additions & 11 deletions pkg/sentry/kernel/threads.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package kernel

import (
"fmt"
"math"

"gvisor.dev/gvisor/pkg/atomicbitops"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
Expand Down Expand Up @@ -77,16 +78,12 @@ type TaskSet struct {
stopCount int32 `state:"nosave"`

// liveTasks is the number of tasks in the TaskSet whose goroutines have
// not exited. liveTasks is protected by mu.
liveTasks uint32
// not exited.
liveTasks atomicbitops.Int64 `state:".(int64)"`

// If noNewTasksIfZeroLive is true and liveTasks is zero, calls to
// Kernel.NewTask() will fail. noNewTasksIfZeroLive is protected by mu.
noNewTasksIfZeroLive bool

// zeroLiveTasksCond is broadcast when liveTasks transitions from non-zero
// to zero.
zeroLiveTasksCond sync.Cond `state:"nosave"`
// zeroLiveTasksC is closed when liveTasks transitions from non-zero to
// zero.
zeroLiveTasksC chan struct{} `state:"manual"`

// runningGoroutines is the number of running task goroutines in the
// TaskSet.
Expand All @@ -106,8 +103,14 @@ type TaskSet struct {

// newTaskSet returns a new, empty TaskSet.
func newTaskSet(pidns *PIDNamespace) *TaskSet {
ts := &TaskSet{Root: pidns}
ts.zeroLiveTasksCond.L = &ts.mu
ts := &TaskSet{
Root: pidns,
zeroLiveTasksC: make(chan struct{}, 0),
}
// Set the most significant bit of ts.liveTasks to make it non-zero, and
// therefore allow TaskSet.newTask() to proceed even if there are no live
// tasks; it will be cleared by Kernel.WaitExited().
ts.liveTasks.Store(math.MinInt64)
pidns.owner = ts
return ts
}
Expand Down

0 comments on commit fc070c2

Please sign in to comment.