Skip to content

Commit

Permalink
Merge pull request juju#18781 from SimonRichardson/fix-data-races
Browse files Browse the repository at this point in the history
juju#18781

There is a potential for data races around the context of the api remote caller. Getting the context cancellation right is a bit tricky because of how we want the semantics to be for changes.

The solution is simple, put the cancellation in the goroutine, so there is no possibility of a race. Everything is sequenced.
  • Loading branch information
jujubot authored Feb 3, 2025
2 parents 149fe7a + 611549c commit 9af5799
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 18 deletions.
41 changes: 28 additions & 13 deletions internal/worker/apiremotecaller/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,27 +133,33 @@ func (w *remoteServer) loop() error {
ctx, cancel := w.scopedContext()
defer cancel()

// When we receive a new change, we want to be able to cancel the current
// connection attempt. The current setup is that it will dial indefinitely
// until it has connected. The cancelling of the connection should not
// affect the current connection, it should always remain in the same
// state.
var canceler context.CancelCauseFunc
defer func() {
if canceler != nil {
canceler(nil)
}
}()

requests := make(chan request)
w.tomb.Go(func() error {
// When we receive a new change, we want to be able to cancel the current
// connection attempt. The current setup is that it will dial indefinitely
// until it has connected. The cancelling of the connection should not
// affect the current connection, it should always remain in the same
// state.
var canceler context.CancelCauseFunc
defer func() {
// Clean up the canceler if it exists.
if canceler != nil {
canceler(context.Canceled)
}
}()

// If the worker is dying, we need to cancel the current connection
// attempt.
// Note: do not use context.Done() here, as it will cause the worker
// to die for the wrong cause (context.Canceled).
for {
select {
case <-w.tomb.Dying():
// There is no guarantee that we'll have a context to cancel,
// to avoid panics, we need to check if the canceler is nil.
if canceler != nil {
canceler(context.Canceled)
}
return tomb.ErrDying
case addresses := <-w.changes:
// Cancel the current connection attempt and then proxy the
Expand All @@ -170,6 +176,8 @@ func (w *remoteServer) loop() error {
// period of time, to avoid sending too many changes at once.
select {
case <-w.tomb.Dying():
// We'll always have a canceler if we're dying, so we can
// safely call it here.
canceler(context.Canceled)
return tomb.ErrDying
case requests <- request{
Expand Down Expand Up @@ -209,7 +217,14 @@ func (w *remoteServer) loop() error {
if errors.Is(err, newChangeRequestError) {
continue
} else if err != nil {
return err
// If we're dying we don't want to return an error, we just want
// to die.
select {
case <-w.tomb.Dying():
return tomb.ErrDying
default:
return err
}
}

w.logger.Debugf("connected to %s with addresses: %v", w.controllerID, addresses)
Expand Down
10 changes: 5 additions & 5 deletions internal/worker/httpclient/worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ func (s *workerSuite) TestGetHTTPClientConcurrently(c *gc.C) {

s.expectClock()

s.newHTTPClient = func() *internalhttp.Client {
atomic.AddInt64(&s.called, 1)
return internalhttp.NewClient()
}

w := s.newWorker(c)
defer workertest.CleanKill(c, w)

Expand All @@ -175,11 +180,6 @@ func (s *workerSuite) TestGetHTTPClientConcurrently(c *gc.C) {

name := fmt.Sprintf("anything-%d", i)

s.newHTTPClient = func() *internalhttp.Client {
atomic.AddInt64(&s.called, 1)
return internalhttp.NewClient()
}

_, err := worker.GetHTTPClient(context.Background(), corehttp.Purpose(name))
c.Assert(err, jc.ErrorIsNil)
}(i)
Expand Down

0 comments on commit 9af5799

Please sign in to comment.