From 93649b2b309571fa899c39ddd5a31f77b2af3cc2 Mon Sep 17 00:00:00 2001 From: Johan Brandhorst-Satzkorn Date: Fri, 16 Aug 2024 16:43:30 -0700 Subject: [PATCH 1/4] internal/event: simplify new ctx creation If the input context is done, we construct a new context with a new timeout. Make use of context.WithoutCancel to simplify the logic for adding existing context values into this new context. --- internal/event/context.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/internal/event/context.go b/internal/event/context.go index bef6c3668a..796dea9c6a 100644 --- a/internal/event/context.go +++ b/internal/event/context.go @@ -368,15 +368,8 @@ func newSendCtx(ctx context.Context) (context.Context, context.CancelFunc) { switch { case ctx == nil: return context.Background(), nil - case ctx.Err() == context.Canceled || ctx.Err() == context.DeadlineExceeded: - sendCtx, sendCancel = context.WithTimeout(context.Background(), cancelledSendTimeout) - info, ok := RequestInfoFromContext(ctx) - if ok { - reqCtx, err := NewRequestInfoContext(sendCtx, info) - if err == nil { - sendCtx = reqCtx - } - } + case ctx.Err() != nil: + sendCtx, sendCancel = context.WithTimeout(context.WithoutCancel(ctx), cancelledSendTimeout) default: sendCtx = ctx } From db928d3f3f752fe3828997c0aa49a5e6f4c960be Mon Sep 17 00:00:00 2001 From: Johan Brandhorst-Satzkorn Date: Fri, 16 Aug 2024 16:45:31 -0700 Subject: [PATCH 2/4] internal/errors: add ctx cancel cause Adds the context cancel cause to the error, if set. This can provide useful error information to debugging efforts when a context cancellation error happens. --- internal/errors/error.go | 10 ++++++++++ internal/errors/error_test.go | 25 ++++++++++++++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/internal/errors/error.go b/internal/errors/error.go index 9de9402387..1a7725c512 100644 --- a/internal/errors/error.go +++ b/internal/errors/error.go @@ -38,6 +38,10 @@ type Err struct { // Wrapped is the error which this Err wraps and will be nil if there's no // error to wrap. Wrapped error + + // ContextCause will be set if the context provided to the error creation + // was Done. + ContextCause error } // E creates a new Err with provided code and supports the options of: @@ -79,6 +83,9 @@ func E(ctx context.Context, opt ...Option) error { Wrapped: opts.withErrWrapped, Msg: fmt.Sprintf(opts.withErrMsg, opts.withErrMsgArgs...), } + if ctx != nil { + err.ContextCause = context.Cause(ctx) + } if opts.withoutEvent { return err } @@ -228,6 +235,9 @@ func (e *Err) Error() string { if e.Msg != "" { join(&s, ": ", e.Msg) } + if e.ContextCause != nil { + join(&s, " ", "("+e.ContextCause.Error()+")") + } var skipInfo bool var wrapped *Err diff --git a/internal/errors/error_test.go b/internal/errors/error_test.go index a227a9ad92..de0915663e 100644 --- a/internal/errors/error_test.go +++ b/internal/errors/error_test.go @@ -4,6 +4,7 @@ package errors_test import ( + "cmp" "context" stderrors "errors" "fmt" @@ -19,10 +20,13 @@ import ( func Test_ErrorE(t *testing.T) { t.Parallel() ctx := context.Background() + canceledCtx, cancel := context.WithCancelCause(ctx) + cancel(stderrors.New("this context was canceled")) errRecordNotFound := errors.E(context.TODO(), errors.WithoutEvent(), errors.WithCode(errors.RecordNotFound)) tests := []struct { name string opt []errors.Option + ctx context.Context want error }{ { @@ -77,15 +81,19 @@ func Test_ErrorE(t *testing.T) { Wrapped: errRecordNotFound, }, }, + { + name: "context-with-cancel-cause", + ctx: canceledCtx, + want: &errors.Err{ + ContextCause: stderrors.New("this context was canceled"), + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { assert, require := assert.New(t), require.New(t) - err := errors.E(ctx, tt.opt...) - require.Error(err) - assert.Equal(tt.want, err) - - err = errors.E(context.TODO(), tt.opt...) + testCtx := cmp.Or(tt.ctx, ctx) + err := errors.E(testCtx, tt.opt...) require.Error(err) assert.Equal(tt.want, err) }) @@ -310,6 +318,8 @@ func TestError_Info(t *testing.T) { func TestError_Error(t *testing.T) { t.Parallel() ctx := context.Background() + canceledCtx, cancel := context.WithCancelCause(ctx) + cancel(stderrors.New("this context was canceled")) tests := []struct { name string err error @@ -350,6 +360,11 @@ func TestError_Error(t *testing.T) { err: errors.E(context.TODO(), errors.WithoutEvent(), errors.WithCode(errors.CheckConstraint), errors.WithWrap(errors.E(ctx, errors.WithCode(errors.CheckConstraint), errors.WithMsg("wrapped msg"))), errors.WithMsg("test msg")), want: "test msg: wrapped msg: integrity violation: error #1000", }, + { + name: "error-with-cancel-cause", + err: errors.E(canceledCtx, errors.WithoutEvent(), errors.WithMsg("test msg")), + want: "test msg (this context was canceled)", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 55a1d770068f60afe7ecaf315a66668c9ff8a391 Mon Sep 17 00:00:00 2001 From: Johan Brandhorst-Satzkorn Date: Fri, 16 Aug 2024 17:35:02 -0700 Subject: [PATCH 3/4] internal/event: include context cancel cause in event The context cancel cause can be a great way to understand why a context was canceled. --- internal/event/context.go | 2 +- internal/event/event_error.go | 21 +++++++++++++-------- internal/event/event_error_test.go | 23 ++++++++++++++++++++++- internal/event/eventer_retry_test.go | 2 +- internal/event/eventer_test.go | 2 +- 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/internal/event/context.go b/internal/event/context.go index 796dea9c6a..77334aa93e 100644 --- a/internal/event/context.go +++ b/internal/event/context.go @@ -196,7 +196,7 @@ func WriteError(ctx context.Context, caller Op, e error, opt ...Option) { return } } - ev, err := newError(caller, e, opt...) + ev, err := newError(ctx, caller, e, opt...) if err != nil { eventer.logger.Error(fmt.Sprintf("%s: %v", op, err)) eventer.logger.Error(fmt.Sprintf("%s: unable to create new error to write error: %v", op, e)) diff --git a/internal/event/event_error.go b/internal/event/event_error.go index 6c6533122a..b0463e6e82 100644 --- a/internal/event/event_error.go +++ b/internal/event/event_error.go @@ -4,6 +4,7 @@ package event import ( + "context" "errors" "fmt" "reflect" @@ -13,16 +14,17 @@ import ( const errorVersion = "v0.1" type err struct { - Error string `json:"error"` - ErrorFields error `json:"error_fields"` - Id Id `json:"id,omitempty"` - Version string `json:"version"` - Op Op `json:"op,omitempty"` - RequestInfo *RequestInfo `json:"request_info,omitempty"` - Info map[string]any `json:"info,omitempty"` + Error string `json:"error"` + ErrorFields error `json:"error_fields"` + Id Id `json:"id,omitempty"` + Version string `json:"version"` + Op Op `json:"op,omitempty"` + RequestInfo *RequestInfo `json:"request_info,omitempty"` + Info map[string]any `json:"info,omitempty"` + ContextCause error `json:"cause,omitempty"` } -func newError(fromOperation Op, e error, opt ...Option) (*err, error) { +func newError(ctx context.Context, fromOperation Op, e error, opt ...Option) (*err, error) { const op = "event.newError" if fromOperation == "" { return nil, fmt.Errorf("%s: missing operation: %w", op, ErrInvalidParameter) @@ -53,6 +55,9 @@ func newError(fromOperation Op, e error, opt ...Option) (*err, error) { Error: e.Error(), ErrorFields: e, } + if ctx != nil { + newErr.ContextCause = context.Cause(ctx) + } if err := newErr.validate(); err != nil { return nil, fmt.Errorf("%s: %w", op, err) } diff --git a/internal/event/event_error_test.go b/internal/event/event_error_test.go index 37a38d3f1c..4625cacbed 100644 --- a/internal/event/event_error_test.go +++ b/internal/event/event_error_test.go @@ -4,6 +4,9 @@ package event import ( + "cmp" + "context" + "errors" "fmt" "testing" @@ -14,12 +17,16 @@ import ( func Test_newError(t *testing.T) { t.Parallel() + ctx := context.Background() + canceledCtx, cancel := context.WithCancelCause(ctx) + cancel(errors.New("test cancel")) tests := []struct { name string fromOp Op e error opts []Option + ctx context.Context want *err wantErrIs error wantErrContains string @@ -87,11 +94,25 @@ func Test_newError(t *testing.T) { Info: map[string]any{"msg": "hello"}, }, }, + { + name: "context-cause", + ctx: canceledCtx, + fromOp: Op("context-cause"), + e: fmt.Errorf("%s: valid: %w", "context-cause", ErrInvalidParameter), + want: &err{ + ErrorFields: fmt.Errorf("%s: valid: %w", "context-cause", ErrInvalidParameter), + Error: "context-cause: valid: invalid parameter", + Version: errorVersion, + Op: Op("context-cause"), + ContextCause: errors.New("test cancel"), + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { assert, require := assert.New(t), require.New(t) - got, err := newError(tt.fromOp, tt.e, tt.opts...) + testCtx := cmp.Or(tt.ctx, ctx) + got, err := newError(testCtx, tt.fromOp, tt.e, tt.opts...) if tt.wantErrIs != nil { require.Error(err) assert.Nil(got) diff --git a/internal/event/eventer_retry_test.go b/internal/event/eventer_retry_test.go index e9b6153459..a7f1e39a2f 100644 --- a/internal/event/eventer_retry_test.go +++ b/internal/event/eventer_retry_test.go @@ -30,7 +30,7 @@ func TestEventer_retrySend(t *testing.T) { require.NoError(t, err) testError := fmt.Errorf("%s: missing operation: %w", "missing operation", ErrInvalidParameter) - testEvent, err := newError("TestEventer_retrySend", testError, WithId("test-error")) + testEvent, err := newError(ctx, "TestEventer_retrySend", testError, WithId("test-error")) require.NoError(t, err) tests := []struct { diff --git a/internal/event/eventer_test.go b/internal/event/eventer_test.go index e1a50fd145..0953e2d8f2 100644 --- a/internal/event/eventer_test.go +++ b/internal/event/eventer_test.go @@ -304,7 +304,7 @@ func TestEventer_writeError(t *testing.T) { eventer, er := NewEventer(testLogger, testLock, "TestEventer_writeError", testSetup.EventerConfig) require.NoError(t, er) - testError, er := newError("TestEventer_writeError", fmt.Errorf("%s: no msg: test", ErrIo)) + testError, er := newError(ctx, "TestEventer_writeError", fmt.Errorf("%s: no msg: test", ErrIo)) require.NoError(t, er) tests := []struct { From 4c9e1e8e9585eec4ca76abe52e47614114d7308c Mon Sep 17 00:00:00 2001 From: Johan Brandhorst-Satzkorn Date: Fri, 23 Aug 2024 16:20:55 -0700 Subject: [PATCH 4/4] all: add cancellation causes to timeouts The new WithTimeoutCause and WithDeadlineCause functions allow us to decorate contexts with metadata surrounding a specific timeout or deadline. Combined with the automatic discovery of the context cause in the errors and event packages, we should get much more information about context cancellations. --- api/client.go | 10 +++++++++- api/proxy/proxy.go | 13 +++++++++++-- .../clientcache/cmd/cache/wrapper_register.go | 7 ++++++- internal/clientcache/internal/cache/refresh.go | 6 +++++- internal/clientcache/internal/daemon/server.go | 8 ++++++-- internal/cmd/commands/server/server.go | 16 ++++++++++++++-- internal/cmd/ops/server.go | 13 +++++++++---- internal/daemon/controller/handler.go | 6 +++++- .../handlers/targets/target_service.go | 8 ++++++-- internal/daemon/controller/interceptor.go | 6 +++++- internal/daemon/controller/listeners.go | 9 +++++++-- internal/daemon/controller/testing.go | 6 +++++- internal/daemon/worker/handler.go | 2 +- internal/daemon/worker/listeners.go | 7 ++++++- internal/daemon/worker/session/session.go | 6 +++++- internal/daemon/worker/status.go | 18 +++++++++++++++--- internal/daemon/worker/worker.go | 12 ++++++++++-- internal/event/context.go | 7 ++++++- internal/kms/kms.go | 6 +++++- internal/tests/helper/testing_helper.go | 12 ++++++++++-- 20 files changed, 146 insertions(+), 32 deletions(-) diff --git a/api/client.go b/api/client.go index 3a71fd57ee..3625ad3f43 100644 --- a/api/client.go +++ b/api/client.go @@ -736,6 +736,7 @@ func (c *Client) NewRequest(ctx context.Context, method, requestPath string, bod // Do takes a properly configured request and applies client configuration to // it, returning the response. func (c *Client) Do(r *retryablehttp.Request, opt ...Option) (*Response, error) { + const op = "api.(Client).Do" opts := getOpts(opt...) c.modifyLock.RLock() limiter := c.config.Limiter @@ -772,7 +773,11 @@ func (c *Client) Do(r *retryablehttp.Request, opt ...Option) (*Response, error) if timeout != 0 { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, timeout) + ctx, cancel = context.WithTimeoutCause( + ctx, + timeout, + fmt.Errorf("%s: client configured timeout exceeded", op), + ) // This dance is just to ignore vet warnings; we don't want to cancel // this as it will make reading the response body impossible _ = cancel @@ -841,6 +846,9 @@ func (c *Client) Do(r *retryablehttp.Request, opt ...Option) (*Response, error) } if err != nil { + if ctxCause := context.Cause(ctx); ctxCause != nil { + return nil, fmt.Errorf("%w (%w)", err, ctxCause) + } if strings.Contains(err.Error(), "tls: oversized") { err = fmt.Errorf( "%w\n\n"+ diff --git a/api/proxy/proxy.go b/api/proxy/proxy.go index 68e9f7347e..75637db9f0 100644 --- a/api/proxy/proxy.go +++ b/api/proxy/proxy.go @@ -80,6 +80,7 @@ type ClientProxy struct { // EXPERIMENTAL: While this API is not expected to change, it is new and // feedback from users may necessitate changes. func New(ctx context.Context, authzToken string, opt ...Option) (*ClientProxy, error) { + const op = "proxy.New" opts, err := getOpts(opt...) if err != nil { return nil, fmt.Errorf("could not parse options: %w", err) @@ -142,7 +143,7 @@ func New(ctx context.Context, authzToken string, opt ...Option) (*ClientProxy, e // We don't _rely_ on client-side timeout verification but this prevents us // seeming to be ready for a connection that will immediately fail when we // try to actually make it - p.ctx, p.cancel = context.WithDeadline(ctx, p.expiration) + p.ctx, p.cancel = context.WithDeadlineCause(ctx, p.expiration, fmt.Errorf("%s: session expiration exceeded", op)) transport := cleanhttp.DefaultTransport() transport.DisableKeepAlives = false @@ -173,6 +174,7 @@ func New(ctx context.Context, authzToken string, opt ...Option) (*ClientProxy, e // EXPERIMENTAL: While this API is not expected to change, it is new and // feedback from users may necessitate changes. func (p *ClientProxy) Start(opt ...Option) (retErr error) { + const op = "proxy.(ClientProxy).Start" opts, err := getOpts(opt...) if err != nil { return fmt.Errorf("could not parse options: %w", err) @@ -350,9 +352,16 @@ func (p *ClientProxy) Start(opt ...Option) (retErr error) { return nil } - ctx, cancel := context.WithTimeout(context.Background(), opts.withSessionTeardownTimeout) + ctx, cancel := context.WithTimeoutCause( + context.Background(), + opts.withSessionTeardownTimeout, + fmt.Errorf("%s: session teardown timeout exceeded", op), + ) defer cancel() if err := p.sendSessionTeardown(ctx); err != nil { + if ctxCause := ctx.Err(); ctxCause != nil { + return fmt.Errorf("error sending session teardown request to worker: %w (%w)", err, ctxCause) + } return fmt.Errorf("error sending session teardown request to worker: %w", err) } diff --git a/internal/clientcache/cmd/cache/wrapper_register.go b/internal/clientcache/cmd/cache/wrapper_register.go index 9831a60fbf..90a7c56b1a 100644 --- a/internal/clientcache/cmd/cache/wrapper_register.go +++ b/internal/clientcache/cmd/cache/wrapper_register.go @@ -80,6 +80,7 @@ func silentUi() *cli.BasicUi { // addTokenToCache runs AddTokenCommand with the token used in, or retrieved by // the wrapped command. func addTokenToCache(ctx context.Context, baseCmd *base.Command, token string) bool { + const op = "cache.addTokenToCache" com := AddTokenCommand{Command: base.NewCommand(baseCmd.UI)} client, err := baseCmd.Client() if err != nil { @@ -95,7 +96,11 @@ func addTokenToCache(ctx context.Context, baseCmd *base.Command, token string) b // Since the daemon might have just started, we need to wait until it can // respond to our requests - waitCtx, cancel := context.WithTimeout(ctx, 3*time.Second) + waitCtx, cancel := context.WithTimeoutCause( + ctx, + 3*time.Second, + fmt.Errorf("%s: daemon startup timeout exceeded", op), + ) defer cancel() if err := waitForDaemon(waitCtx); err != nil { // TODO: Print the result of this out into a log in the dot directory diff --git a/internal/clientcache/internal/cache/refresh.go b/internal/clientcache/internal/cache/refresh.go index b9a77a5372..579c3d47cb 100644 --- a/internal/clientcache/internal/cache/refresh.go +++ b/internal/clientcache/internal/cache/refresh.go @@ -154,7 +154,11 @@ func (r *RefreshService) RefreshForSearch(ctx context.Context, authTokenid strin const op = "cache.(RefreshService).RefreshForSearch" if r.maxSearchRefreshTimeout > 0 { var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, r.maxSearchRefreshTimeout) + ctx, cancel = context.WithTimeoutCause( + ctx, + r.maxSearchRefreshTimeout, + fmt.Errorf("%s: search refresh timeout exceeded", op), + ) defer cancel() } at, err := r.repo.LookupToken(ctx, authTokenid) diff --git a/internal/clientcache/internal/daemon/server.go b/internal/clientcache/internal/daemon/server.go index 79a80e67a8..c7713fda26 100644 --- a/internal/clientcache/internal/daemon/server.go +++ b/internal/clientcache/internal/daemon/server.go @@ -139,11 +139,15 @@ func (s *CacheServer) Shutdown(ctx context.Context) error { if s.conf.ContextCancel != nil { s.conf.ContextCancel() } - srvCtx, srvCancel := context.WithTimeout(context.Background(), 5*time.Second) + srvCtx, srvCancel := context.WithTimeoutCause( + context.Background(), + 5*time.Second, + fmt.Errorf("%s: http server shutdown timeout exceeded", op), + ) defer srvCancel() err := s.httpSrv.Shutdown(srvCtx) if err != nil { - shutdownErr = fmt.Errorf("error shutting down server: %w", err) + shutdownErr = errors.Wrap(ctx, err, op, errors.WithMsg("error shutting down server"), errors.WithoutEvent()) return } s.tickerWg.Wait() diff --git a/internal/cmd/commands/server/server.go b/internal/cmd/commands/server/server.go index 2d6cc2cd38..07f34f3afe 100644 --- a/internal/cmd/commands/server/server.go +++ b/internal/cmd/commands/server/server.go @@ -172,6 +172,7 @@ func (c *Command) AutocompleteFlags() complete.Flags { } func (c *Command) Run(args []string) int { + const op = "server.(Command).Run" c.CombineLogs = c.flagCombineLogs defer func() { @@ -479,12 +480,23 @@ func (c *Command) Run(args []string) int { // 1 second is chosen so the shutdown is still responsive and this is a mostly // non critical step since the lock should be released when the session with the // database is closed. - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + ctx, cancel := context.WithTimeoutCause( + context.Background(), + 1*time.Second, + fmt.Errorf("%s: database lock release timeout exceeded", op), + ) defer cancel() err := c.schemaManager.Close(ctx) if err != nil { - c.UI.Error(fmt.Errorf("Unable to release shared lock to the database: %w", err).Error()) + // Use errors.E to capture the context cause if there is one + c.UI.Error(errors.Wrap( + ctx, + err, + op, + errors.WithMsg("Unable to release shared lock to the database"), + errors.WithoutEvent(), + ).Error()) } }() diff --git a/internal/cmd/ops/server.go b/internal/cmd/ops/server.go index 75a69e0448..85ed9797a5 100644 --- a/internal/cmd/ops/server.go +++ b/internal/cmd/ops/server.go @@ -7,7 +7,7 @@ package ops import ( "context" - "errors" + stderrors "errors" "fmt" "net" "net/http" @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/boundary/internal/cmd/base" "github.com/hashicorp/boundary/internal/daemon/controller" "github.com/hashicorp/boundary/internal/daemon/worker" + "github.com/hashicorp/boundary/internal/errors" "github.com/hashicorp/go-cleanhttp" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-secure-stdlib/listenerutil" @@ -94,18 +95,22 @@ func (s *Server) Shutdown() error { return fmt.Errorf("%s: missing bundle, listener or its fields", op) } - ctx, cancel := context.WithTimeout(context.Background(), b.ln.Config.MaxRequestDuration) + ctx, cancel := context.WithTimeoutCause( + context.Background(), + b.ln.Config.MaxRequestDuration, + fmt.Errorf("%s: max request duration exceeded", op), + ) defer cancel() err := b.ln.HTTPServer.Shutdown(ctx) if err != nil { - errors.Join(closeErrors, fmt.Errorf("%s: failed to shutdown http server: %w", op, err)) + closeErrors = stderrors.Join(closeErrors, errors.Wrap(ctx, err, op, errors.WithMsg("failed to shutdown http server"))) } err = b.ln.OpsListener.Close() err = listenerCloseErrorCheck(b.ln.Config.Type, err) if err != nil { - errors.Join(closeErrors, fmt.Errorf("%s: failed to close listener mux: %w", op, err)) + closeErrors = stderrors.Join(closeErrors, fmt.Errorf("%s: failed to close listener mux: %w", op, err)) } } diff --git a/internal/daemon/controller/handler.go b/internal/daemon/controller/handler.go index b220fc3079..54b5664a16 100644 --- a/internal/daemon/controller/handler.go +++ b/internal/daemon/controller/handler.go @@ -476,7 +476,11 @@ func wrapHandlerWithCommonFuncs(h http.Handler, c *Controller, props HandlerProp w.Header().Set("Cache-Control", "no-store") // Start with the request context and our timeout - ctx, cancelFunc := context.WithTimeout(r.Context(), maxRequestDuration) + ctx, cancelFunc := context.WithTimeoutCause( + r.Context(), + maxRequestDuration, + fmt.Errorf("%s: max request duration exceeded", op), + ) defer cancelFunc() // Add a size limiter if desired diff --git a/internal/daemon/controller/handlers/targets/target_service.go b/internal/daemon/controller/handlers/targets/target_service.go index 48e5be7358..1cbb30cdab 100644 --- a/internal/daemon/controller/handlers/targets/target_service.go +++ b/internal/daemon/controller/handlers/targets/target_service.go @@ -1064,7 +1064,7 @@ func (s Service) AuthorizeSession(ctx context.Context, req *pbs.AuthorizeSession if retErr != nil { // Delete created session in case of errors. // Use new context for deletion in case error is because of context cancellation. - deleteCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + deleteCtx, cancel := context.WithTimeoutCause(context.Background(), 5*time.Second, stderrors.New("session deletion timeout exceeded")) defer cancel() _, err := sessionRepo.DeleteSession(deleteCtx, sess.PublicId) retErr = stderrors.Join(retErr, err) @@ -1095,7 +1095,11 @@ func (s Service) AuthorizeSession(ctx context.Context, req *pbs.AuthorizeSession if retErr != nil { // Revoke issued credentials in case of errors. // Use new context for deletion in case error is because of context cancellation. - deleteCtx, cancel := context.WithTimeout(context.Background(), time.Minute) + deleteCtx, cancel := context.WithTimeoutCause( + context.Background(), + time.Minute, + fmt.Errorf("%s: credential revocation timeout exceeded", op), + ) defer cancel() err := credRepo.Revoke(deleteCtx, sess.PublicId) retErr = stderrors.Join(retErr, err) diff --git a/internal/daemon/controller/interceptor.go b/internal/daemon/controller/interceptor.go index 5a62ab1290..3d65cecdb7 100644 --- a/internal/daemon/controller/interceptor.go +++ b/internal/daemon/controller/interceptor.go @@ -495,7 +495,11 @@ func eventsResponseInterceptor( func requestMaxDurationInterceptor(_ context.Context, maxRequestDuration time.Duration) grpc.UnaryServerInterceptor { const op = "controller.requestMaxDurationInterceptor" return func(interceptorCtx context.Context, req any, _ *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { - withTimeout, cancel := context.WithTimeout(interceptorCtx, maxRequestDuration) + withTimeout, cancel := context.WithTimeoutCause( + interceptorCtx, + maxRequestDuration, + fmt.Errorf("%s: max request duration exceeded", op), + ) defer cancel() return handler(withTimeout, req) } diff --git a/internal/daemon/controller/listeners.go b/internal/daemon/controller/listeners.go index d6982ffa35..9099099005 100644 --- a/internal/daemon/controller/listeners.go +++ b/internal/daemon/controller/listeners.go @@ -299,6 +299,7 @@ func (c *Controller) stopClusterGrpcServerAndListener() error { } func (c *Controller) stopHttpServersAndListeners() error { + const op = "controller.Controller.stopHttpServersAndListeners" var closeErrors error for i := range c.apiListeners { ln := c.apiListeners[i] @@ -306,8 +307,12 @@ func (c *Controller) stopHttpServersAndListeners() error { continue } - ctx, cancel := context.WithTimeout(c.baseContext, ln.Config.MaxRequestDuration) - ln.HTTPServer.Shutdown(ctx) + ctx, cancel := context.WithTimeoutCause( + c.baseContext, + ln.Config.MaxRequestDuration, + fmt.Errorf("%s: max request duration exceeded", op), + ) + _ = ln.HTTPServer.Shutdown(ctx) cancel() err := ln.ApiListener.Close() // The HTTP Shutdown call should close this, but just in case. diff --git a/internal/daemon/controller/testing.go b/internal/daemon/controller/testing.go index 41dc57d2ba..4124763a65 100644 --- a/internal/daemon/controller/testing.go +++ b/internal/daemon/controller/testing.go @@ -888,7 +888,11 @@ func (tc *TestController) WaitForNextWorkerStatusUpdate(workerStatusName string) ctx := context.TODO() event.WriteSysEvent(ctx, op, "waiting for next status report from worker", "worker", workerStatusName) waitStatusStart := time.Now() - ctx, cancel := context.WithTimeout(tc.ctx, time.Duration(tc.c.workerStatusGracePeriod.Load())) + ctx, cancel := context.WithTimeoutCause( + tc.ctx, + time.Duration(tc.c.workerStatusGracePeriod.Load()), + fmt.Errorf("%s: worker status grace period exceeded", op), + ) defer cancel() var err error for { diff --git a/internal/daemon/worker/handler.go b/internal/daemon/worker/handler.go index 90e743e396..63d2dc3e32 100644 --- a/internal/daemon/worker/handler.go +++ b/internal/daemon/worker/handler.go @@ -136,7 +136,7 @@ func (w *Worker) handleProxy(listenerCfg *listenerutil.ListenerConfig, sessionMa // Later calls will cause this to noop if they return a different status defer conn.Close(websocket.StatusNormalClosure, "done") - connCtx, connCancel := context.WithDeadline(ctx, sess.GetExpiration()) + connCtx, connCancel := context.WithDeadlineCause(ctx, sess.GetExpiration(), fmt.Errorf("%s: session expiration exceeded", op)) defer connCancel() var handshake proxy.ClientHandshake diff --git a/internal/daemon/worker/listeners.go b/internal/daemon/worker/listeners.go index 9b46520206..8162a55fca 100644 --- a/internal/daemon/worker/listeners.go +++ b/internal/daemon/worker/listeners.go @@ -321,6 +321,7 @@ func (w *Worker) stopServersAndListeners() error { } func (w *Worker) stopHttpServer() error { + const op = "worker.stopHttpServer" if w.proxyListener == nil { return nil } @@ -329,7 +330,11 @@ func (w *Worker) stopHttpServer() error { return nil } - ctx, cancel := context.WithTimeout(w.baseContext, w.proxyListener.Config.MaxRequestDuration) + ctx, cancel := context.WithTimeoutCause( + w.baseContext, + w.proxyListener.Config.MaxRequestDuration, + fmt.Errorf("%s: max request duration exceeded", op), + ) w.proxyListener.HTTPServer.Shutdown(ctx) cancel() diff --git a/internal/daemon/worker/session/session.go b/internal/daemon/worker/session/session.go index e684ce0a56..c54dc86ce5 100644 --- a/internal/daemon/worker/session/session.go +++ b/internal/daemon/worker/session/session.go @@ -503,7 +503,11 @@ func closeConnections(ctx context.Context, sessClient pbs.SessionServiceClient, // bit of formalization in terms of how we handle timeouts. For now, this // just ensures consistency with the same status call in that it times out // within an adequate period of time. - closeConnCtx, closeConnCancel := context.WithTimeout(ctx, time.Duration(CloseCallTimeout.Load())) + closeConnCtx, closeConnCancel := context.WithTimeoutCause( + ctx, + time.Duration(CloseCallTimeout.Load()), + fmt.Errorf("%s: close call timeout exceeded", op), + ) defer closeConnCancel() response, err := closeConnection(closeConnCtx, sessClient, makeCloseConnectionRequest(closeInfo)) if err != nil { diff --git a/internal/daemon/worker/status.go b/internal/daemon/worker/status.go index dbddbc8b2b..3d391201bf 100644 --- a/internal/daemon/worker/status.go +++ b/internal/daemon/worker/status.go @@ -93,7 +93,11 @@ func (w *Worker) LastStatusSuccess() *LastStatusInformation { func (w *Worker) WaitForNextSuccessfulStatusUpdate() error { const op = "worker.(Worker).WaitForNextSuccessfulStatusUpdate" waitStatusStart := time.Now() - ctx, cancel := context.WithTimeout(w.baseContext, time.Duration(w.successfulStatusGracePeriod.Load())) + ctx, cancel := context.WithTimeoutCause( + w.baseContext, + time.Duration(w.successfulStatusGracePeriod.Load()), + fmt.Errorf("%s: status grace period exceeded", op), + ) defer cancel() event.WriteSysEvent(ctx, op, "waiting for next status report to controller") for { @@ -189,7 +193,11 @@ func (w *Worker) sendWorkerStatus(cancelCtx context.Context, sessionManager sess if w.updateTags.Load() { tags = w.tags.Load().([]*pb.TagPair) } - statusCtx, statusCancel := context.WithTimeout(cancelCtx, time.Duration(w.statusCallTimeoutDuration.Load())) + statusCtx, statusCancel := context.WithTimeoutCause( + cancelCtx, + time.Duration(w.statusCallTimeoutDuration.Load()), + fmt.Errorf("%s: status call timeout exceeded", op), + ) defer statusCancel() keyId := w.WorkerAuthCurrentKeyId.Load() @@ -312,7 +320,11 @@ func (w *Worker) sendWorkerStatus(cancelCtx context.Context, sessionManager sess } } else if checkHCPBUpstreams != nil && checkHCPBUpstreams(w) { // This is a worker that is one hop away from managed workers, so attempt to get that list - hcpbWorkersCtx, hcpbWorkersCancel := context.WithTimeout(cancelCtx, time.Duration(w.statusCallTimeoutDuration.Load())) + hcpbWorkersCtx, hcpbWorkersCancel := context.WithTimeoutCause( + cancelCtx, + time.Duration(w.statusCallTimeoutDuration.Load()), + fmt.Errorf("%s: status call timeout exceeded", op), + ) defer hcpbWorkersCancel() workersResp, err := client.ListHcpbWorkers(hcpbWorkersCtx, &pbs.ListHcpbWorkersRequest{}) if err != nil { diff --git a/internal/daemon/worker/worker.go b/internal/daemon/worker/worker.go index a403188aeb..89957c4089 100644 --- a/internal/daemon/worker/worker.go +++ b/internal/daemon/worker/worker.go @@ -725,7 +725,11 @@ func (w *Worker) Shutdown() error { // at our default liveness value, which is also our default status grace // period timeout waitStatusStart := time.Now() - nextStatusCtx, nextStatusCancel := context.WithTimeout(w.baseContext, server.DefaultLiveness) + nextStatusCtx, nextStatusCancel := context.WithTimeoutCause( + w.baseContext, + server.DefaultLiveness, + fmt.Errorf("%s: liveness timeout exceeded", op), + ) defer nextStatusCancel() for { if err := nextStatusCtx.Err(); err != nil { @@ -813,7 +817,11 @@ func (w *Worker) getSessionTls(sessionManager session.Manager) func(hello *tls.C return nil, fmt.Errorf("no last status information found at session acceptance time") } - timeoutContext, cancel := context.WithTimeout(w.baseContext, session.ValidateSessionTimeout) + timeoutContext, cancel := context.WithTimeoutCause( + w.baseContext, + session.ValidateSessionTimeout, + fmt.Errorf("%s: session validation timeout exceeded", op), + ) defer cancel() sess, err := sessionManager.LoadLocalSession(timeoutContext, sessionId, lastSuccess.GetWorkerId()) if err != nil { diff --git a/internal/event/context.go b/internal/event/context.go index 77334aa93e..370650ea2b 100644 --- a/internal/event/context.go +++ b/internal/event/context.go @@ -363,13 +363,18 @@ func WriteSysEvent(ctx context.Context, caller Op, msg string, args ...any) { } func newSendCtx(ctx context.Context) (context.Context, context.CancelFunc) { + const op = "event.newSendCtx" var sendCtx context.Context var sendCancel context.CancelFunc switch { case ctx == nil: return context.Background(), nil case ctx.Err() != nil: - sendCtx, sendCancel = context.WithTimeout(context.WithoutCancel(ctx), cancelledSendTimeout) + sendCtx, sendCancel = context.WithTimeoutCause( + context.WithoutCancel(ctx), + cancelledSendTimeout, + fmt.Errorf("%s: cancelled send timeout exceeded", op), + ) default: sendCtx = ctx } diff --git a/internal/kms/kms.go b/internal/kms/kms.go index fe6f83d253..c5a2c638ac 100644 --- a/internal/kms/kms.go +++ b/internal/kms/kms.go @@ -499,7 +499,11 @@ func (k *Kms) MonitorTableRewrappingRuns(ctx context.Context, tableName string, if retErr != nil { // Create new context in case we failed because the context was canceled var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second) + ctx, cancel = context.WithTimeoutCause( + context.Background(), + 5*time.Second, + fmt.Errorf("%s: exec timeout exceeded", op), + ) defer cancel() } // Update the progress of this run diff --git a/internal/tests/helper/testing_helper.go b/internal/tests/helper/testing_helper.go index 0392eb3f62..84e2d10146 100644 --- a/internal/tests/helper/testing_helper.go +++ b/internal/tests/helper/testing_helper.go @@ -122,7 +122,11 @@ func (s *TestSession) ExpectConnectionStateOnController( require := require.New(t) assert := assert.New(t) - ctx, cancel := context.WithTimeout(ctx, expectConnectionStateOnControllerTimeout) + ctx, cancel := context.WithTimeoutCause( + ctx, + expectConnectionStateOnControllerTimeout, + errors.New("connection state on controller timeout exceeded"), + ) defer cancel() // This is just for initialization of the actual state set. @@ -188,7 +192,11 @@ func (s *TestSession) ExpectConnectionStateOnWorker( require := require.New(t) assert := assert.New(t) - ctx, cancel := context.WithTimeout(ctx, expectConnectionStateOnWorkerTimeout) + ctx, cancel := context.WithTimeoutCause( + ctx, + expectConnectionStateOnWorkerTimeout, + errors.New("connection state on worker timeout exceeded"), + ) defer cancel() // This is just for initialization of the actual state set.