Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WFE: Correct Error Handling for Nonce Redemption RPCs with Unknown Prefixes #7004

Merged
merged 8 commits into from
Jul 28, 2023
Merged
27 changes: 21 additions & 6 deletions grpc/noncebalancer/noncebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ import (
"errors"

"github.com/letsencrypt/boulder/nonce"

"google.golang.org/grpc/balancer"
"google.golang.org/grpc/balancer/base"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

const (
Expand All @@ -20,6 +23,15 @@ const (
SRVResolverScheme = "nonce-srv"
)

// ErrNoBackendsMatchPrefix indicates that no backends were found which match
// the nonce prefix provided in the RPC context. This can happen when the
// provided nonce is stale, valid but the backend has since been removed from
// the balancer, or valid but the backend has not yet been added to the
// balancer.
//
// In any case, when the WFE receives this error it will return a badNonce error
// to the ACME client.
var ErrNoBackendsMatchPrefix = status.New(codes.Unavailable, "no backends match the nonce prefix")
var errMissingPrefixCtxKey = errors.New("nonce.PrefixCtxKey value required in RPC context")
var errMissingHMACKeyCtxKey = errors.New("nonce.HMACKeyCtxKey value required in RPC context")
var errInvalidPrefixCtxKeyType = errors.New("nonce.PrefixCtxKey value in RPC context must be a string")
Expand All @@ -35,9 +47,12 @@ var _ base.PickerBuilder = (*Balancer)(nil)

// Build implements the base.PickerBuilder interface. It is called by the gRPC
// runtime when the balancer is first initialized and when the set of backend
// (SubConn) addresses changes. It is responsible for initializing the Picker's
// backends map and returning a balancer.Picker.
// (SubConn) addresses changes.
func (b *Balancer) Build(buildInfo base.PickerBuildInfo) balancer.Picker {
if len(buildInfo.ReadySCs) == 0 {
// The Picker must be rebuilt if there are no backends available.
return base.NewErrPicker(balancer.ErrNoSubConnAvailable)
}
return &Picker{
backends: buildInfo.ReadySCs,
}
Expand All @@ -58,7 +73,8 @@ var _ balancer.Picker = (*Picker)(nil)
// (SubConn) based on the context of each RPC message.
func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
if len(p.backends) == 0 {
// The Picker must be rebuilt if there are no backends available.
// This should never happen, the Picker should only be built when there
// are backends available.
return balancer.PickResult{}, balancer.ErrNoSubConnAvailable
}

Expand All @@ -75,8 +91,7 @@ func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
}

if p.prefixToBackend == nil {
// Iterate over the backends and build a map of the derived prefix for
// each backend SubConn.
// First call to Pick with a new Picker.
prefixToBackend := make(map[string]balancer.SubConn)
for sc, scInfo := range p.backends {
scPrefix := nonce.DerivePrefix(scInfo.Address.Addr, hmacKey)
Expand All @@ -100,7 +115,7 @@ func (p *Picker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
sc, ok := p.prefixToBackend[destPrefix]
if !ok {
// No backend SubConn was found for the destination prefix.
return balancer.PickResult{}, balancer.ErrNoSubConnAvailable
return balancer.PickResult{}, ErrNoBackendsMatchPrefix.Err()
}
return balancer.PickResult{SubConn: sc}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion grpc/noncebalancer/noncebalancer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func TestPickerNoMatchingSubConnAvailable(t *testing.T) {
info := balancer.PickInfo{Ctx: testCtx}

gotPick, err := p.Pick(info)
test.AssertErrorIs(t, err, balancer.ErrNoSubConnAvailable)
test.AssertErrorIs(t, err, ErrNoBackendsMatchPrefix.Err())
test.AssertNil(t, gotPick.SubConn, "subConn should be nil")
}

Expand Down
11 changes: 7 additions & 4 deletions nonce/nonce.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,16 @@ import (
const (
// PrefixLen is the character length of a nonce prefix.
PrefixLen = 8

// DeprecatedPrefixLen is the character length of a nonce prefix.
//
// DEPRECATED: Use PrefixLen instead.
// TODO(#6610): Remove once we've moved to derivable prefixes by default.
DeprecatedPrefixLen = 4
defaultMaxUsed = 65536
nonceLen = 32

// NonceLen is the character length of a nonce, excluding the prefix.
NonceLen = 32
defaultMaxUsed = 65536
)

var errInvalidNonceLength = errors.New("invalid nonce length")
Expand Down Expand Up @@ -203,7 +206,7 @@ func (ns *NonceService) encrypt(counter int64) (string, error) {
copy(pt[pad:], ctr.Bytes())

// Encrypt
ret := make([]byte, nonceLen)
ret := make([]byte, NonceLen)
ct := ns.gcm.Seal(nil, nonce, pt, nil)
copy(ret, nonce[4:])
copy(ret[8:], ct)
Expand All @@ -228,7 +231,7 @@ func (ns *NonceService) decrypt(nonce string) (int64, error) {
if err != nil {
return 0, err
}
if len(decoded) != nonceLen {
if len(decoded) != NonceLen {
return 0, errInvalidNonceLength
}

Expand Down
68 changes: 68 additions & 0 deletions test/integration/nonce_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//go:build integration

package integration

import (
"context"
"os"
"strings"
"testing"

"github.com/jmhodges/clock"

"github.com/letsencrypt/boulder/cmd"
bgrpc "github.com/letsencrypt/boulder/grpc"
nb "github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/nonce"
noncepb "github.com/letsencrypt/boulder/nonce/proto"
"github.com/letsencrypt/boulder/test"
"google.golang.org/grpc/status"
)

type Config struct {
NotWFE struct {
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
TLS cmd.TLSConfig
GetNonceService *cmd.GRPCClientConfig
RedeemNonceService *cmd.GRPCClientConfig
NoncePrefixKey cmd.PasswordConfig
}
}

func TestNonceBalancer_NoBackendMatchingPrefix(t *testing.T) {
t.Parallel()

if !strings.Contains(os.Getenv("BOULDER_CONFIG_DIR"), "test/config-next") {
t.Skip("Derived nonce prefixes are only configured in config-next")
beautifulentropy marked this conversation as resolved.
Show resolved Hide resolved
}

// We're going to use a minimal nonce service client called "notwfe" which
// masquerades as a wfe for the purpose of redeeming nonces.

// Load the test config.
var c Config
err := cmd.ReadConfigFile("test/integration/testdata/nonce-client.json", &c)
test.AssertNotError(t, err, "Could not read config file")

tlsConfig, err := c.NotWFE.TLS.Load(metrics.NoopRegisterer)
test.AssertNotError(t, err, "Could not load TLS config")

rncKey, err := c.NotWFE.NoncePrefixKey.Pass()
test.AssertNotError(t, err, "Failed to load noncePrefixKey")

clk := clock.New()

redeemNonceConn, err := bgrpc.ClientSetup(c.NotWFE.RedeemNonceService, tlsConfig, metrics.NoopRegisterer, clk)
test.AssertNotError(t, err, "Failed to load credentials and create gRPC connection to redeem nonce service")
rnc := nonce.NewRedeemer(redeemNonceConn)

// Attempt to redeem a nonce with a prefix that doesn't match any backends.
ctx := context.WithValue(context.Background(), nonce.PrefixCtxKey{}, "12345678")
ctx = context.WithValue(ctx, nonce.HMACKeyCtxKey{}, rncKey)
_, err = rnc.Redeem(ctx, &noncepb.NonceMessage{Nonce: "0123456789"})

// We expect to get a specific gRPC status error with code NotFound.
gotRPCStatus, ok := status.FromError(err)
test.Assert(t, ok, "Failed to convert error to status")
test.AssertEquals(t, gotRPCStatus, nb.ErrNoBackendsMatchPrefix)
}
39 changes: 39 additions & 0 deletions test/integration/testdata/nonce-client.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"notwfe": {
"tls": {
"caCertFile": "test/grpc-creds/minica.pem",
"certFile": "test/grpc-creds/wfe.boulder/cert.pem",
"keyFile": "test/grpc-creds/wfe.boulder/key.pem"
},
"getNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookup": {
"service": "nonce",
"domain": "service.consul"
},
"timeout": "15s",
"noWaitForReady": true,
"hostOverride": "nonce.boulder"
},
"redeemNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookups": [
{
"service": "nonce1",
"domain": "service.consul"
},
{
"service": "nonce2",
"domain": "service.consul"
}
],
"srvResolver": "nonce-srv",
"timeout": "15s",
"noWaitForReady": true,
"hostOverride": "nonce.boulder"
},
"noncePrefixKey": {
"passwordFile": "test/secrets/nonce_prefix_key"
}
}
}
20 changes: 16 additions & 4 deletions wfe2/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ type wfe2Stats struct {
// improperECFieldLengths counts the number of ACME account EC JWKs we see
// with improper X and Y lengths for their curve
improperECFieldLengths prometheus.Counter
// nonceNoMatchingBackendCount counts the number of times we've received a nonce
// with a prefix that doesn't match a known backend.
nonceNoMatchingBackendCount prometheus.Counter
}

func initStats(stats prometheus.Registerer) wfe2Stats {
Expand Down Expand Up @@ -53,10 +56,19 @@ func initStats(stats prometheus.Registerer) wfe2Stats {
)
stats.MustRegister(improperECFieldLengths)

nonceNoBackendCount := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "nonce_no_backend_found",
Help: "Number of times we've received a nonce with a prefix that doesn't match a known backend",
},
)
stats.MustRegister(nonceNoBackendCount)

return wfe2Stats{
httpErrorCount: httpErrorCount,
joseErrorCount: joseErrorCount,
csrSignatureAlgs: csrSignatureAlgs,
improperECFieldLengths: improperECFieldLengths,
httpErrorCount: httpErrorCount,
joseErrorCount: joseErrorCount,
csrSignatureAlgs: csrSignatureAlgs,
improperECFieldLengths: improperECFieldLengths,
nonceNoMatchingBackendCount: nonceNoBackendCount,
}
}
48 changes: 47 additions & 1 deletion wfe2/verify.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"crypto/ecdsa"
"crypto/rsa"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
Expand All @@ -14,12 +15,14 @@ import (
"strings"

"github.com/prometheus/client_golang/prometheus"
"google.golang.org/grpc/status"
"gopkg.in/go-jose/go-jose.v2"

"github.com/letsencrypt/boulder/core"
berrors "github.com/letsencrypt/boulder/errors"
"github.com/letsencrypt/boulder/goodkey"
"github.com/letsencrypt/boulder/grpc"
nb "github.com/letsencrypt/boulder/grpc/noncebalancer"
"github.com/letsencrypt/boulder/nonce"
noncepb "github.com/letsencrypt/boulder/nonce/proto"
"github.com/letsencrypt/boulder/probs"
Expand Down Expand Up @@ -183,6 +186,23 @@ func (wfe *WebFrontEndImpl) validPOSTRequest(request *http.Request) *probs.Probl
return nil
}

// nonceWellFormed checks a JWS' Nonce header to ensure it is well-formed,
// otherwise a bad nonce problem is returned. This avoids unnecessary RPCs to
// the nonce redemption service.
func nonceWellFormed(nonceHeader string, prefixLen int) *probs.ProblemDetails {
errBadNonce := probs.BadNonce(fmt.Sprintf("JWS has an invalid anti-replay nonce: %q", nonceHeader))
body, err := base64.RawURLEncoding.DecodeString(nonceHeader[prefixLen:])
if err != nil {
// Nonce was not valid base64url.
return errBadNonce
}
if len(body) != nonce.NonceLen {
// Nonce was an unexpected length.
return errBadNonce
}
return nil
}

// validNonce checks a JWS' Nonce header to ensure it is one that the
// nonceService knows about, otherwise a bad nonce problem is returned.
// NOTE: this function assumes the JWS has already been verified with the
Expand All @@ -196,17 +216,43 @@ func (wfe *WebFrontEndImpl) validNonce(ctx context.Context, header jose.Header)
var err error
if wfe.noncePrefixMap == nil {
// Dispatch nonce redemption RPCs dynamically.
prob := nonceWellFormed(header.Nonce, nonce.PrefixLen)
if prob != nil {
wfe.stats.joseErrorCount.With(prometheus.Labels{"type": "JWSMalformedNonce"}).Inc()
return prob
}

// Populate the context with the nonce prefix and HMAC key. These are
// used by a custom gRPC balancer, known as "noncebalancer", to route
// redemption RPCs to the backend that originally issued the nonce.
ctx = context.WithValue(ctx, nonce.PrefixCtxKey{}, header.Nonce[:nonce.PrefixLen])
ctx = context.WithValue(ctx, nonce.HMACKeyCtxKey{}, wfe.rncKey)

resp, err := wfe.rnc.Redeem(ctx, &noncepb.NonceMessage{Nonce: header.Nonce})
if err != nil {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
rpcStatus, ok := status.FromError(err)
if !ok || rpcStatus != nb.ErrNoBackendsMatchPrefix {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
}

// ErrNoBackendsMatchPrefix suggests that the nonce backend, which
// issued this nonce, is presently unreachable or unrecognized by
// this WFE. As this is a transient failure, the client should retry
// their request with a fresh nonce.
resp = &noncepb.ValidMessage{Valid: false}
wfe.stats.nonceNoMatchingBackendCount.Inc()
}
valid = resp.Valid
} else {
// Dispatch nonce redpemption RPCs using a static mapping.
//
// TODO(#6610) Remove code below and the `npm` mapping.
prob := nonceWellFormed(header.Nonce, nonce.DeprecatedPrefixLen)
if prob != nil {
wfe.stats.joseErrorCount.With(prometheus.Labels{"type": "JWSMalformedNonce"}).Inc()
return prob
}

valid, err = nonce.RemoteRedeem(ctx, wfe.noncePrefixMap, header.Nonce)
if err != nil {
return web.ProblemDetailsForError(err, "failed to redeem nonce")
Expand Down
Loading