Skip to content

Commit

Permalink
WFE: Correct Error Handling for Nonce Redemption RPCs with Unknown Pr…
Browse files Browse the repository at this point in the history
…efixes (#7004)

Fix an issue related to the custom gRPC Picker implementation introduced
in #6618. When a nonce contained a prefix not associated with a known
backend, the Picker would continuously rebuild, re-resolve DNS, and
eventually throw a 500 "Server Error" at RPC timeout. The Picker now
promptly returns a 400 "Bad Nonce" error as expected, in response the
requesting client should retry their request with a fresh nonce.

Additionally:
- WFE unit tests use derived nonces when `"BOULDER_CONFIG_DIR" ==
"test/config-next"`.
- `Balancer.Build()` in "noncebalancer" forces a rebuild until non-zero
backends are available. This matches the
[balancer/roundrobin](https://github.com/grpc/grpc-go/blob/d524b409462c601ef3f05a7e1fba19755a337c77/balancer/roundrobin/roundrobin.go#L49-L53)
implementation.
- Nonces with no matching backend increment "jose_errors" with label
`"type": "JWSInvalidNonce"` and "nonce_no_backend_found".
- Nonces of incorrect length are now rejected at the WFE and increment
"jose_errors" with label `"type": "JWSMalformedNonce"` instead of
`"type": "JWSInvalidNonce"`.
- Nonces not encoded as base64url are now rejected at the WFE and
increment "jose_errors" with label `"type": "JWSMalformedNonce"` instead
of `"type": "JWSInvalidNonce"`.

Fixes #6969
Part of #6974
  • Loading branch information
beautifulentropy authored Jul 28, 2023
1 parent 4da9853 commit b141fa7
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 32 deletions.
27 changes: 21 additions & 6 deletions grpc/noncebalancer/noncebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ import (
"errors"

"github.com/letsencrypt/boulder/nonce"

"google.golang.org/grpc/balancer"
"google.golang.org/grpc/balancer/base"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

const (
Expand All @@ -20,6 +23,15 @@ const (
SRVResolverScheme = "nonce-srv"
)

// ErrNoBackendsMatchPrefix indicates that no backends were found which match
// the nonce prefix provided in the RPC context. This can happen when the
// provided nonce is stale, valid but the backend has since been removed from
// the balancer, or valid but the backend has not yet been added to the
// balancer.
//
// In any case, when the WFE receives this error it will return a badNonce error
// to the ACME client.
var ErrNoBackendsMatchPrefix = status.New(codes.Unavailable, "no backends match the nonce prefix")
var errMissingPrefixCtxKey = errors.New("nonce.PrefixCtxKey value required in RPC context")
var errMissingHMACKeyCtxKey = errors.New("nonce.HMACKeyCtxKey value required in RPC context")
var errInvalidPrefixCtxKeyType = errors.New("nonce.PrefixCtxKey value in RPC context must be a string")
Expand All @@ -35,9 +47,12 @@ var _ base.PickerBuilder = (*Balancer)(nil)

// Build implements the base.PickerBuilder interface. It is called by the gRPC
// runtime when the balancer is first initialized and when the set of backend
// (SubConn) addresses changes. It is responsible for initializing the Picker's
// backends map and returning a balancer.Picker.
// (SubConn) addresses changes.
func (b *Balancer) Build(buildInfo base.PickerBuildInfo) balancer.Picker {
if len(buildInfo.ReadySCs) == 0 {
// The Picker must be rebuilt if there are no backends available.
return base.NewErrPicker(balancer.ErrNoSubConnAvailable)
}
return &Picker{
backends: buildInfo.ReadySCs,
}
Expand All @@ -58,7 +73,8 @@ var _ balancer.Picker = (*Picker)(nil)
// (SubConn) based on the context of each RPC message.
func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
if len(p.backends) == 0 {
// The Picker must be rebuilt if there are no backends available.
// This should never happen, the Picker should only be built when there
// are backends available.
return balancer.PickResult{}, balancer.ErrNoSubConnAvailable
}

Expand All @@ -75,8 +91,7 @@ func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
}

if p.prefixToBackend == nil {
// Iterate over the backends and build a map of the derived prefix for
// each backend SubConn.
// First call to Pick with a new Picker.
prefixToBackend := make(map[string]balancer.SubConn)
for sc, scInfo := range p.backends {
scPrefix := nonce.DerivePrefix(scInfo.Address.Addr, hmacKey)
Expand All @@ -100,7 +115,7 @@ func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
sc, ok := p.prefixToBackend[destPrefix]
if !ok {
// No backend SubConn was found for the destination prefix.
return balancer.PickResult{}, balancer.ErrNoSubConnAvailable
return balancer.PickResult{}, ErrNoBackendsMatchPrefix.Err()
}
return balancer.PickResult{SubConn: sc}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion grpc/noncebalancer/noncebalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func TestPickerNoMatchingSubConnAvailable(t *testing.T) {
info := balancer.PickInfo{Ctx: testCtx}

gotPick, err := p.Pick(info)
test.AssertErrorIs(t, err, balancer.ErrNoSubConnAvailable)
test.AssertErrorIs(t, err, ErrNoBackendsMatchPrefix.Err())
test.AssertNil(t, gotPick.SubConn, "subConn should be nil")
}

Expand Down
11 changes: 7 additions & 4 deletions nonce/nonce.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,16 @@ import (
const (
// PrefixLen is the character length of a nonce prefix.
PrefixLen = 8

// DeprecatedPrefixLen is the character length of a nonce prefix.
//
// DEPRECATED: Use PrefixLen instead.
// TODO(#6610): Remove once we've moved to derivable prefixes by default.
DeprecatedPrefixLen = 4
defaultMaxUsed = 65536
nonceLen = 32

// NonceLen is the character length of a nonce, excluding the prefix.
NonceLen = 32
defaultMaxUsed = 65536
)

var errInvalidNonceLength = errors.New("invalid nonce length")
Expand Down Expand Up @@ -203,7 +206,7 @@ func (ns *NonceService) encrypt(counter int64) (string, error) {
copy(pt[pad:], ctr.Bytes())

// Encrypt
ret := make([]byte, nonceLen)
ret := make([]byte, NonceLen)
ct := ns.gcm.Seal(nil, nonce, pt, nil)
copy(ret, nonce[4:])
copy(ret[8:], ct)
Expand All @@ -228,7 +231,7 @@ func (ns *NonceService) decrypt(nonce string) (int64, error) {
if err != nil {
return 0, err
}
if len(decoded) != nonceLen {
if len(decoded) != NonceLen {
return 0, errInvalidNonceLength
}

Expand Down
68 changes: 68 additions & 0 deletions test/integration/nonce_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//go:build integration

package integration

import (
"context"
"os"
"strings"
"testing"

"github.com/jmhodges/clock"

"github.com/letsencrypt/boulder/cmd"
bgrpc "github.com/letsencrypt/boulder/grpc"
nb "github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/nonce"
noncepb "github.com/letsencrypt/boulder/nonce/proto"
"github.com/letsencrypt/boulder/test"
"google.golang.org/grpc/status"
)

type Config struct {
NotWFE struct {
TLS cmd.TLSConfig
GetNonceService *cmd.GRPCClientConfig
RedeemNonceService *cmd.GRPCClientConfig
NoncePrefixKey cmd.PasswordConfig
}
}

func TestNonceBalancer_NoBackendMatchingPrefix(t *testing.T) {
t.Parallel()

if !strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
t.Skip("Derived nonce prefixes are only configured in config-next")
}

// We're going to use a minimal nonce service client called "notwfe" which
// masquerades as a wfe for the purpose of redeeming nonces.

// Load the test config.
var c Config
err := cmd.ReadConfigFile("test/integration/testdata/nonce-client.json", &c)
test.AssertNotError(t, err, "Could not read config file")

tlsConfig, err := c.NotWFE.TLS.Load(metrics.NoopRegisterer)
test.AssertNotError(t, err, "Could not load TLS config")

rncKey, err := c.NotWFE.NoncePrefixKey.Pass()
test.AssertNotError(t, err, "Failed to load noncePrefixKey")

clk := clock.New()

redeemNonceConn, err := bgrpc.ClientSetup(c.NotWFE.RedeemNonceService, tlsConfig, metrics.NoopRegisterer, clk)
test.AssertNotError(t, err, "Failed to load credentials and create gRPC connection to redeem nonce service")
rnc := nonce.NewRedeemer(redeemNonceConn)

// Attempt to redeem a nonce with a prefix that doesn't match any backends.
ctx := context.WithValue(context.Background(), nonce.PrefixCtxKey{}, "12345678")
ctx = context.WithValue(ctx, nonce.HMACKeyCtxKey{}, rncKey)
_, err = rnc.Redeem(ctx, &noncepb.NonceMessage{Nonce: "0123456789"})

// We expect to get a specific gRPC status error with code NotFound.
gotRPCStatus, ok := status.FromError(err)
test.Assert(t, ok, "Failed to convert error to status")
test.AssertEquals(t, gotRPCStatus, nb.ErrNoBackendsMatchPrefix)
}
39 changes: 39 additions & 0 deletions test/integration/testdata/nonce-client.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"notwfe": {
"tls": {
"caCertFile": "test/grpc-creds/minica.pem",
"certFile": "test/grpc-creds/wfe.boulder/cert.pem",
"keyFile": "test/grpc-creds/wfe.boulder/key.pem"
},
"getNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookup": {
"service": "nonce",
"domain": "service.consul"
},
"timeout": "15s",
"noWaitForReady": true,
"hostOverride": "nonce.boulder"
},
"redeemNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookups": [
{
"service": "nonce1",
"domain": "service.consul"
},
{
"service": "nonce2",
"domain": "service.consul"
}
],
"srvResolver": "nonce-srv",
"timeout": "15s",
"noWaitForReady": true,
"hostOverride": "nonce.boulder"
},
"noncePrefixKey": {
"passwordFile": "test/secrets/nonce_prefix_key"
}
}
}
20 changes: 16 additions & 4 deletions wfe2/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ type wfe2Stats struct {
// improperECFieldLengths counts the number of ACME account EC JWKs we see
// with improper X and Y lengths for their curve
improperECFieldLengths prometheus.Counter
// nonceNoMatchingBackendCount counts the number of times we've received a nonce
// with a prefix that doesn't match a known backend.
nonceNoMatchingBackendCount prometheus.Counter
}

func initStats(stats prometheus.Registerer) wfe2Stats {
Expand Down Expand Up @@ -53,10 +56,19 @@ func initStats(stats prometheus.Registerer) wfe2Stats {
)
stats.MustRegister(improperECFieldLengths)

nonceNoBackendCount := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "nonce_no_backend_found",
Help: "Number of times we've received a nonce with a prefix that doesn't match a known backend",
},
)
stats.MustRegister(nonceNoBackendCount)

return wfe2Stats{
httpErrorCount: httpErrorCount,
joseErrorCount: joseErrorCount,
csrSignatureAlgs: csrSignatureAlgs,
improperECFieldLengths: improperECFieldLengths,
httpErrorCount: httpErrorCount,
joseErrorCount: joseErrorCount,
csrSignatureAlgs: csrSignatureAlgs,
improperECFieldLengths: improperECFieldLengths,
nonceNoMatchingBackendCount: nonceNoBackendCount,
}
}
48 changes: 47 additions & 1 deletion wfe2/verify.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"crypto/ecdsa"
"crypto/rsa"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
Expand All @@ -14,12 +15,14 @@ import (
"strings"

"github.com/prometheus/client_golang/prometheus"
"google.golang.org/grpc/status"
"gopkg.in/go-jose/go-jose.v2"

"github.com/letsencrypt/boulder/core"
berrors "github.com/letsencrypt/boulder/errors"
"github.com/letsencrypt/boulder/goodkey"
"github.com/letsencrypt/boulder/grpc"
nb "github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/nonce"
noncepb "github.com/letsencrypt/boulder/nonce/proto"
"github.com/letsencrypt/boulder/probs"
Expand Down Expand Up @@ -183,6 +186,23 @@ func (wfe *WebFrontEndImpl) validPOSTRequest(request *http.Request) *probs.Probl
return nil
}

// nonceWellFormed checks a JWS' Nonce header to ensure it is well-formed,
// otherwise a bad nonce problem is returned. This avoids unnecessary RPCs to
// the nonce redemption service.
func nonceWellFormed(nonceHeader string, prefixLen int) *probs.ProblemDetails {
errBadNonce := probs.BadNonce(fmt.Sprintf("JWS has an invalid anti-replay nonce: %q", nonceHeader))
body, err := base64.RawURLEncoding.DecodeString(nonceHeader[prefixLen:])
if err != nil {
// Nonce was not valid base64url.
return errBadNonce
}
if len(body) != nonce.NonceLen {
// Nonce was an unexpected length.
return errBadNonce
}
return nil
}

// validNonce checks a JWS' Nonce header to ensure it is one that the
// nonceService knows about, otherwise a bad nonce problem is returned.
// NOTE: this function assumes the JWS has already been verified with the
Expand All @@ -196,17 +216,43 @@ func (wfe *WebFrontEndImpl) validNonce(ctx context.Context, header jose.Header)
var err error
if wfe.noncePrefixMap == nil {
// Dispatch nonce redemption RPCs dynamically.
prob := nonceWellFormed(header.Nonce, nonce.PrefixLen)
if prob != nil {
wfe.stats.joseErrorCount.With(prometheus.Labels{"type": "JWSMalformedNonce"}).Inc()
return prob
}

// Populate the context with the nonce prefix and HMAC key. These are
// used by a custom gRPC balancer, known as "noncebalancer", to route
// redemption RPCs to the backend that originally issued the nonce.
ctx = context.WithValue(ctx, nonce.PrefixCtxKey{}, header.Nonce[:nonce.PrefixLen])
ctx = context.WithValue(ctx, nonce.HMACKeyCtxKey{}, wfe.rncKey)

resp, err := wfe.rnc.Redeem(ctx, &noncepb.NonceMessage{Nonce: header.Nonce})
if err != nil {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
rpcStatus, ok := status.FromError(err)
if !ok || rpcStatus != nb.ErrNoBackendsMatchPrefix {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
}

// ErrNoBackendsMatchPrefix suggests that the nonce backend, which
// issued this nonce, is presently unreachable or unrecognized by
// this WFE. As this is a transient failure, the client should retry
// their request with a fresh nonce.
resp = &noncepb.ValidMessage{Valid: false}
wfe.stats.nonceNoMatchingBackendCount.Inc()
}
valid = resp.Valid
} else {
// Dispatch nonce redpemption RPCs using a static mapping.
//
// TODO(#6610) Remove code below and the `npm` mapping.
prob := nonceWellFormed(header.Nonce, nonce.DeprecatedPrefixLen)
if prob != nil {
wfe.stats.joseErrorCount.With(prometheus.Labels{"type": "JWSMalformedNonce"}).Inc()
return prob
}

valid, err = nonce.RemoteRedeem(ctx, wfe.noncePrefixMap, header.Nonce)
if err != nil {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
Expand Down
Loading

0 comments on commit b141fa7

Please sign in to comment.