diff --git a/.changelog/23977.txt b/.changelog/23977.txt new file mode 100644 index 00000000000..88ecfcd9bd3 --- /dev/null +++ b/.changelog/23977.txt @@ -0,0 +1,3 @@ +```release-note:improvement +keyring: Stored wrapped data encryption keys in Raft +``` diff --git a/helper/backoff.go b/helper/backoff.go index e25e1f1c2b9..2d9510f2fc4 100644 --- a/helper/backoff.go +++ b/helper/backoff.go @@ -4,6 +4,8 @@ package helper import ( + "context" + "fmt" "time" ) @@ -29,3 +31,34 @@ func Backoff(backoffBase time.Duration, backoffLimit time.Duration, attempt uint return deadline } + +// WithBackoffFunc is a helper that runs a function with geometric backoff + a +// small jitter to a maximum backoff. It returns once the context closes, with +// the error wrapping over the error from the function. +func WithBackoffFunc(ctx context.Context, minBackoff, maxBackoff time.Duration, fn func() error) error { + var err error + backoff := minBackoff + t, stop := NewSafeTimer(0) + defer stop() + for { + select { + case <-ctx.Done(): + return fmt.Errorf("operation cancelled: %w", err) + case <-t.C: + } + + err = fn() + if err == nil { + return nil + } + + if backoff < maxBackoff { + backoff = backoff*2 + RandomStagger(minBackoff/10) + if backoff > maxBackoff { + backoff = maxBackoff + } + } + + t.Reset(backoff) + } +} diff --git a/helper/raftutil/msgtypes.go b/helper/raftutil/msgtypes.go index 9112eb4a616..615881173c9 100644 --- a/helper/raftutil/msgtypes.go +++ b/helper/raftutil/msgtypes.go @@ -55,8 +55,7 @@ var msgTypeNames = map[structs.MessageType]string{ structs.ServiceRegistrationDeleteByIDRequestType: "ServiceRegistrationDeleteByIDRequestType", structs.ServiceRegistrationDeleteByNodeIDRequestType: "ServiceRegistrationDeleteByNodeIDRequestType", structs.VarApplyStateRequestType: "VarApplyStateRequestType", - structs.RootKeyMetaUpsertRequestType: "RootKeyMetaUpsertRequestType", - structs.RootKeyMetaDeleteRequestType: "RootKeyMetaDeleteRequestType", + structs.WrappedRootKeysDeleteRequestType: "WrappedRootKeysDeleteRequestType", structs.ACLRolesUpsertRequestType: "ACLRolesUpsertRequestType", structs.ACLRolesDeleteByIDRequestType: "ACLRolesDeleteByIDRequestType", structs.ACLAuthMethodsUpsertRequestType: "ACLAuthMethodsUpsertRequestType", @@ -65,6 +64,8 @@ var msgTypeNames = map[structs.MessageType]string{ structs.ACLBindingRulesDeleteRequestType: "ACLBindingRulesDeleteRequestType", structs.NodePoolUpsertRequestType: "NodePoolUpsertRequestType", structs.NodePoolDeleteRequestType: "NodePoolDeleteRequestType", + structs.JobVersionTagRequestType: "JobVersionTagRequestType", + structs.WrappedRootKeysUpsertRequestType: "WrappedRootKeysUpsertRequestType", structs.NamespaceUpsertRequestType: "NamespaceUpsertRequestType", structs.NamespaceDeleteRequestType: "NamespaceDeleteRequestType", } diff --git a/nomad/acl_test.go b/nomad/acl_test.go index e7dec93e8a4..0c8e0ff00e8 100644 --- a/nomad/acl_test.go +++ b/nomad/acl_test.go @@ -63,8 +63,8 @@ func TestAuthenticate_mTLS(t *testing.T) { testutil.WaitForLeader(t, leader.RPC) testutil.Wait(t, func() (bool, error) { - keyset, err := follower.encrypter.activeKeySet() - return keyset != nil, err + cs, err := follower.encrypter.activeCipherSet() + return cs != nil, err }) rootToken := uuid.Generate() diff --git a/nomad/alloc_endpoint_test.go b/nomad/alloc_endpoint_test.go index 6e4a2daf960..715f311150d 100644 --- a/nomad/alloc_endpoint_test.go +++ b/nomad/alloc_endpoint_test.go @@ -1690,7 +1690,7 @@ func TestAlloc_SignIdentities_Bad(t *testing.T) { s1, cleanupS1 := TestServer(t, nil) t.Cleanup(cleanupS1) codec := rpcClient(t, s1) - testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForKeyring(t, s1.RPC, s1.Region()) node := mock.Node() must.NoError(t, s1.fsm.State().UpsertNode(structs.MsgTypeTestSetup, 100, node)) diff --git a/nomad/core_sched.go b/nomad/core_sched.go index fc2105b64ef..83999d064eb 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -899,14 +899,27 @@ func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool) // rootKeyRotateOrGC is used to rotate or garbage collect root keys func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error { + // migration sends updates to the leader so our view of state is no longer + // valid. we ack this core job and will pick up against at the next + // interval. + // + // COMPAT(1.12.0): remove this block in 1.12.0 LTS + stateChanged, err := c.rootKeyMigrate(eval) + if err != nil { + return err + } + if stateChanged { + return nil + } + // a rotation will be sent to the leader so our view of state // is no longer valid. we ack this core job and will pick up // the GC work on the next interval - wasRotated, err := c.rootKeyRotate(eval, time.Now()) + stateChanged, err = c.rootKeyRotate(eval, time.Now()) if err != nil { return err } - if wasRotated { + if stateChanged { return nil } return c.rootKeyGC(eval, time.Now()) @@ -915,7 +928,7 @@ func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error { func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error { ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.RootKeys(ws) if err != nil { return err } @@ -931,21 +944,21 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) - if !keyMeta.IsInactive() { + rootKey := raw.(*structs.RootKey) + if !rootKey.IsInactive() { continue // never GC keys we're still using } c.logger.Trace("checking inactive key eligibility for gc", - "create_time", keyMeta.CreateTime, "threshold", rotationThreshold.UnixNano()) + "create_time", rootKey.CreateTime, "threshold", rotationThreshold.UnixNano()) - if keyMeta.CreateTime > rotationThreshold.UnixNano() { + if rootKey.CreateTime > rotationThreshold.UnixNano() { continue // don't GC keys with potentially live Workload Identities } // don't GC keys used to encrypt Variables or sign legacy non-expiring // Workload Identities - inUse, err := c.snap.IsRootKeyInUse(keyMeta.KeyID) + inUse, err := c.snap.IsRootKeyInUse(rootKey.KeyID) if err != nil { return err } @@ -954,7 +967,7 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error } req := &structs.KeyringDeleteRootKeyRequest{ - KeyID: keyMeta.KeyID, + KeyID: rootKey.KeyID, WriteRequest: structs.WriteRequest{ Region: c.srv.config.Region, AuthToken: eval.LeaderACL, @@ -970,24 +983,69 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error return nil } -// rootKeyRotate checks if the active key is old enough that we need to kick off -// a rotation. It prepublishes a key first and only promotes that prepublished -// key to active once the rotation threshold has expired -func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) { +// rootKeyMigrate checks if the cluster is fully upgraded and migrates all the +// legacy root key material to the new wrapped key format. It returns true if +// any of the keys were migrated, because the caller should now treat the +// snapshot as invalid. +// +// COMPAT(1.12.0): remove this function in 1.12.0 LTS +func (c *CoreScheduler) rootKeyMigrate(eval *structs.Evaluation) (bool, error) { + if !ServersMeetMinimumVersion( + c.srv.serf.Members(), c.srv.Region(), minVersionKeyringInRaft, true) { + return false, nil + } ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.RootKeys(ws) if err != nil { return false, err } + stateChanged := false + for raw := iter.Next(); raw != nil; raw = iter.Next() { + wrappedKeys := raw.(*structs.RootKey) + if len(wrappedKeys.WrappedKeys) > 0 { + continue // already migrated + } + rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID) + if err != nil { + return stateChanged, err + } + req := &structs.KeyringUpdateRootKeyRequest{ + RootKey: rootKey, + WriteRequest: structs.WriteRequest{ + Region: c.srv.config.Region, + AuthToken: eval.LeaderACL, + }, + } + + if err := c.srv.RPC("Keyring.Update", + req, &structs.KeyringUpdateRootKeyResponse{}); err != nil { + c.logger.Error("migrating legacy key material failed", + "error", err, "key_id", wrappedKeys.KeyID) + return false, err + } + stateChanged = true + } + return stateChanged, nil +} + +// rootKeyRotate checks if the active key is old enough that we need to kick off +// a rotation. It prepublishes a key first and only promotes that prepublished +// key to active once the rotation threshold has expired +func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) { var ( - activeKey *structs.RootKeyMeta - prepublishedKey *structs.RootKeyMeta + activeKey *structs.RootKey + prepublishedKey *structs.RootKey ) + ws := memdb.NewWatchSet() + iter, err := c.snap.RootKeys(ws) + if err != nil { + return false, err + } for raw := iter.Next(); raw != nil; raw = iter.Next() { - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.RootKey) switch key.State { case structs.RootKeyStateActive: activeKey = key @@ -1083,7 +1141,7 @@ func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) ( func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.RootKeys(ws) if err != nil { return err } @@ -1093,11 +1151,11 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) - if !keyMeta.IsRekeying() { + wrappedKeys := raw.(*structs.RootKey) + if !wrappedKeys.IsRekeying() { continue } - varIter, err := c.snap.GetVariablesByKeyID(ws, keyMeta.KeyID) + varIter, err := c.snap.GetVariablesByKeyID(ws, wrappedKeys.KeyID) if err != nil { return err } @@ -1106,7 +1164,7 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { return err } - rootKey, err := c.srv.encrypter.GetKey(keyMeta.KeyID) + rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID) if err != nil { return fmt.Errorf("rotated key does not exist in keyring: %w", err) } diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 76a04a67de9..b47d0ebdac9 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -2623,7 +2623,7 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { // active key, will never be GC'd store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2648,11 +2648,11 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { must.NoError(t, err) must.True(t, rotated, must.Sprint("key should rotate")) - var key1 *structs.RootKeyMeta - iter, err := store.RootKeyMetas(nil) + var key1 *structs.RootKey + iter, err := store.RootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.RootKey) if k.KeyID == key0.KeyID { must.True(t, k.IsActive(), must.Sprint("expected original key to be active")) } else { @@ -2675,10 +2675,10 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { c.snap, _ = store.Snapshot() rotated, err = c.rootKeyRotate(eval, now) - iter, err = store.RootKeyMetas(nil) + iter, err = store.RootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.RootKey) switch k.KeyID { case key0.KeyID: must.True(t, k.IsActive(), must.Sprint("original key should still be active")) @@ -2694,10 +2694,10 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { now = time.Unix(0, key1.PublishTime+(time.Minute*10).Nanoseconds()) rotated, err = c.rootKeyRotate(eval, now) - iter, err = store.RootKeyMetas(nil) + iter, err = store.RootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.RootKey) switch k.KeyID { case key0.KeyID: must.True(t, k.IsInactive(), must.Sprint("original key should be inactive")) @@ -2725,7 +2725,7 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { // active key, will never be GC'd store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2733,14 +2733,14 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { yesterday := now - (24 * time.Hour).Nanoseconds() // insert an "old" inactive key - key1 := structs.NewRootKeyMeta().MakeInactive() + key1 := structs.NewRootKey(structs.NewRootKeyMeta()).MakeInactive() key1.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(600, key1, false)) + must.NoError(t, store.UpsertRootKey(600, key1, false)) // insert an "old" and inactive key with a variable that's using it - key2 := structs.NewRootKeyMeta().MakeInactive() + key2 := structs.NewRootKey(structs.NewRootKeyMeta()).MakeInactive() key2.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(700, key2, false)) + must.NoError(t, store.UpsertRootKey(700, key2, false)) variable := mock.VariableEncrypted() variable.KeyID = key2.KeyID @@ -2752,9 +2752,9 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { must.NoError(t, setResp.Error) // insert an "old" key that's inactive but being used by an alloc - key3 := structs.NewRootKeyMeta().MakeInactive() + key3 := structs.NewRootKey(structs.NewRootKeyMeta()).MakeInactive() key3.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(800, key3, false)) + must.NoError(t, store.UpsertRootKey(800, key3, false)) // insert the allocation using key3 alloc := mock.Alloc() @@ -2764,9 +2764,9 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { structs.MsgTypeTestSetup, 850, []*structs.Allocation{alloc})) // insert an "old" key that's inactive but being used by an alloc - key4 := structs.NewRootKeyMeta().MakeInactive() + key4 := structs.NewRootKey(structs.NewRootKeyMeta()).MakeInactive() key4.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(900, key4, false)) + must.NoError(t, store.UpsertRootKey(900, key4, false)) // insert the dead allocation using key4 alloc2 := mock.Alloc() @@ -2777,14 +2777,14 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { structs.MsgTypeTestSetup, 950, []*structs.Allocation{alloc2})) // insert an inactive key older than RootKeyGCThreshold but not RootKeyRotationThreshold - key5 := structs.NewRootKeyMeta().MakeInactive() + key5 := structs.NewRootKey(structs.NewRootKeyMeta()).MakeInactive() key5.CreateTime = now - (15 * time.Minute).Nanoseconds() - must.NoError(t, store.UpsertRootKeyMeta(1500, key5, false)) + must.NoError(t, store.UpsertRootKey(1500, key5, false)) // prepublishing key should never be GC'd no matter how old - key6 := structs.NewRootKeyMeta().MakePrepublished(yesterday) + key6 := structs.NewRootKey(structs.NewRootKeyMeta()).MakePrepublished(yesterday) key6.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(1600, key6, false)) + must.NoError(t, store.UpsertRootKey(1600, key6, false)) // run the core job snap, err := store.Snapshot() @@ -2795,31 +2795,31 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { must.NoError(t, c.rootKeyGC(eval, time.Now())) ws := memdb.NewWatchSet() - key, err := store.RootKeyMetaByID(ws, key0.KeyID) + key, err := store.RootKeyByID(ws, key0.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("active key should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key1.KeyID) + key, err = store.RootKeyByID(ws, key1.KeyID) must.NoError(t, err) must.Nil(t, key, must.Sprint("old and unused inactive key should have been GCd")) - key, err = store.RootKeyMetaByID(ws, key2.KeyID) + key, err = store.RootKeyByID(ws, key2.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("old key should not have been GCd if still in use")) - key, err = store.RootKeyMetaByID(ws, key3.KeyID) + key, err = store.RootKeyByID(ws, key3.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("old key used to sign a live alloc should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key4.KeyID) + key, err = store.RootKeyByID(ws, key4.KeyID) must.NoError(t, err) must.Nil(t, key, must.Sprint("old key used to sign a terminal alloc should have been GCd")) - key, err = store.RootKeyMetaByID(ws, key5.KeyID) + key, err = store.RootKeyByID(ws, key5.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("key newer than GC+rotation threshold should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key6.KeyID) + key, err = store.RootKeyByID(ws, key6.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("prepublishing key should not have been GCd")) } @@ -2835,7 +2835,7 @@ func TestCoreScheduler_VariablesRekey(t *testing.T) { testutil.WaitForKeyring(t, srv.RPC, "global") store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2883,7 +2883,7 @@ func TestCoreScheduler_VariablesRekey(t *testing.T) { } } - originalKey, _ := store.RootKeyMetaByID(nil, key0.KeyID) + originalKey, _ := store.RootKeyByID(nil, key0.KeyID) return originalKey.IsInactive() }), ), must.Sprint("variable rekey should be complete")) diff --git a/nomad/encrypter.go b/nomad/encrypter.go index 95f0604c594..ffa7c7a467b 100644 --- a/nomad/encrypter.go +++ b/nomad/encrypter.go @@ -29,6 +29,7 @@ import ( "github.com/hashicorp/go-kms-wrapping/wrappers/azurekeyvault/v2" "github.com/hashicorp/go-kms-wrapping/wrappers/gcpckms/v2" "github.com/hashicorp/go-kms-wrapping/wrappers/transit/v2" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/crypto" "github.com/hashicorp/nomad/helper/joseutil" @@ -57,33 +58,35 @@ type Encrypter struct { // issuer is the OIDC Issuer to use for workload identities if configured issuer string - keyring map[string]*keyset - lock sync.RWMutex + keyring map[string]*cipherSet + decryptTasks map[string]context.CancelFunc + lock sync.RWMutex } -// keyset contains the key material for variable encryption and workload -// identity signing. As keysets are rotated they are identified by the RootKey -// KeyID although the public key IDs are published with a type prefix to +// cipherSet contains the key material for variable encryption and workload +// identity signing. As cipherSets are rotated they are identified by the +// RootKey KeyID although the public key IDs are published with a type prefix to // disambiguate which signing algorithm to use. -type keyset struct { - rootKey *structs.RootKey +type cipherSet struct { + rootKey *structs.UnwrappedRootKey cipher cipher.AEAD eddsaPrivateKey ed25519.PrivateKey rsaPrivateKey *rsa.PrivateKey rsaPKCS1PublicKey []byte // PKCS #1 DER encoded public key for JWKS } -// NewEncrypter loads or creates a new local keystore and returns an -// encryption keyring with the keys it finds. +// NewEncrypter loads or creates a new local keystore and returns an encryption +// keyring with the keys it finds. func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) { encrypter := &Encrypter{ srv: srv, log: srv.logger.Named("keyring"), keystorePath: keystorePath, - keyring: make(map[string]*keyset), + keyring: make(map[string]*cipherSet), issuer: srv.GetConfig().OIDCIssuer, providerConfigs: map[string]*structs.KEKProviderConfig{}, + decryptTasks: map[string]context.CancelFunc{}, } providerConfigs, err := getProviderConfigs(srv) @@ -187,39 +190,60 @@ func (e *Encrypter) loadKeystore() error { }) } -// Encrypt encrypts the clear data with the cipher for the current -// root key, and returns the cipher text (including the nonce), and -// the key ID used to encrypt it +// IsReady blocks until all decrypt tasks are complete, or the context expires. +func (e *Encrypter) IsReady(ctx context.Context) error { + err := helper.WithBackoffFunc(ctx, time.Millisecond*100, time.Second, func() error { + e.lock.RLock() + defer e.lock.RUnlock() + if len(e.decryptTasks) != 0 { + keyIDs := []string{} + for keyID := range e.decryptTasks { + keyIDs = append(keyIDs, keyID) + } + return fmt.Errorf("keyring is not ready - waiting for keys %s", + strings.Join(keyIDs, ", ")) + } + return nil + }) + if err != nil { + return err + } + return nil +} + +// Encrypt encrypts the clear data with the cipher for the active root key, and +// returns the cipher text (including the nonce), and the key ID used to encrypt +// it func (e *Encrypter) Encrypt(cleartext []byte) ([]byte, string, error) { - keyset, err := e.activeKeySet() + cs, err := e.activeCipherSet() if err != nil { return nil, "", err } - nonce, err := crypto.Bytes(keyset.cipher.NonceSize()) + nonce, err := crypto.Bytes(cs.cipher.NonceSize()) if err != nil { return nil, "", fmt.Errorf("failed to generate key wrapper nonce: %v", err) } - keyID := keyset.rootKey.Meta.KeyID + keyID := cs.rootKey.Meta.KeyID additional := []byte(keyID) // include the keyID in the signature inputs - // we use the nonce as the dst buffer so that the ciphertext is - // appended to that buffer and we always keep the nonce and - // ciphertext together, and so that we're not tempted to reuse - // the cleartext buffer which the caller still owns - ciphertext := keyset.cipher.Seal(nonce, nonce, cleartext, additional) + // we use the nonce as the dst buffer so that the ciphertext is appended to + // that buffer and we always keep the nonce and ciphertext together, and so + // that we're not tempted to reuse the cleartext buffer which the caller + // still owns + ciphertext := cs.cipher.Seal(nonce, nonce, cleartext, additional) return ciphertext, keyID, nil } // Decrypt takes an encrypted buffer and then root key ID. It extracts // the nonce, decrypts the content, and returns the cleartext data. func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) { - e.lock.RLock() - defer e.lock.RUnlock() - ks, err := e.keysetByIDLocked(keyID) + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, time.Second) + defer cancel() + ks, err := e.waitForKey(ctx, keyID) if err != nil { return nil, err } @@ -247,25 +271,9 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string, return "", "", errors.New("cannot sign empty claims") } - // If a key is rotated immediately following a leader election, plans that - // are in-flight may get signed before the new leader has the key. Allow for - // a short timeout-and-retry to avoid rejecting plans - ks, err := e.activeKeySet() + cs, err := e.activeCipherSet() if err != nil { - ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 5*time.Second) - defer cancel() - for { - select { - case <-ctx.Done(): - return "", "", err - default: - time.Sleep(50 * time.Millisecond) - ks, err = e.activeKeySet() - if ks != nil { - break - } - } - } + return "", "", err } // Add Issuer claim from server configuration @@ -273,18 +281,18 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string, claims.Issuer = e.issuer } - opts := (&jose.SignerOptions{}).WithHeader("kid", ks.rootKey.Meta.KeyID).WithType("JWT") + opts := (&jose.SignerOptions{}).WithHeader("kid", cs.rootKey.Meta.KeyID).WithType("JWT") var sig jose.Signer - if ks.rsaPrivateKey != nil { + if cs.rsaPrivateKey != nil { // If an RSA key has been created prefer it as it is more widely compatible - sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.RS256, Key: ks.rsaPrivateKey}, opts) + sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.RS256, Key: cs.rsaPrivateKey}, opts) if err != nil { return "", "", err } } else { // No RSA key has been created, fallback to ed25519 which always exists - sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.EdDSA, Key: ks.eddsaPrivateKey}, opts) + sig, err = jose.NewSigner(jose.SigningKey{Algorithm: jose.EdDSA, Key: cs.eddsaPrivateKey}, opts) if err != nil { return "", "", err } @@ -295,7 +303,7 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string, return "", "", err } - return raw, ks.rootKey.Meta.KeyID, nil + return raw, cs.rootKey.Meta.KeyID, nil } // VerifyClaim accepts a previously-signed encoded claim and validates @@ -314,7 +322,7 @@ func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, er } // Find the key material - pubKey, err := e.GetPublicKey(keyID) + pubKey, err := e.waitForPublicKey(keyID) if err != nil { return nil, err } @@ -341,23 +349,175 @@ func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, er return claims, nil } -// AddKey stores the key in the keystore and creates a new cipher for it. -func (e *Encrypter) AddKey(rootKey *structs.RootKey) error { +// AddUnwrappedKey stores the key in the keystore and creates a new cipher for +// it. This is called in the RPC handlers on the leader and from the legacy +// KeyringReplicator. +func (e *Encrypter) AddUnwrappedKey(rootKey *structs.UnwrappedRootKey, isUpgraded bool) (*structs.RootKey, error) { // note: we don't lock the keyring here but inside addCipher // instead, so that we're not holding the lock while performing // local disk writes if err := e.addCipher(rootKey); err != nil { + return nil, err + } + return e.wrapRootKey(rootKey, isUpgraded) +} + +// AddWrappedKey creates decryption tasks for keys we've previously stored in +// Raft. It's only called as a goroutine by the FSM Apply for WrappedRootKeys, +// but it returns an error for ease of testing. +func (e *Encrypter) AddWrappedKey(ctx context.Context, wrappedKeys *structs.RootKey) error { + + logger := e.log.With("key_id", wrappedKeys.KeyID) + + e.lock.Lock() + + _, err := e.cipherSetByIDLocked(wrappedKeys.KeyID) + if err == nil { + + // key material for each key ID is immutable so nothing to do, but we + // can cancel and remove any running decrypt tasks + if cancel, ok := e.decryptTasks[wrappedKeys.KeyID]; ok { + cancel() + delete(e.decryptTasks, wrappedKeys.KeyID) + } + e.lock.Unlock() + return nil + } + + if cancel, ok := e.decryptTasks[wrappedKeys.KeyID]; ok { + // stop any previous tasks for this same key ID under the assumption + // they're broken or being superceded, but don't remove the CancelFunc + // from the map yet so that other callers don't think we can continue + cancel() + } + + e.lock.Unlock() + + completeCtx, cancel := context.WithCancel(ctx) + + var mErr *multierror.Error + + decryptTasks := 0 + for _, wrappedKey := range wrappedKeys.WrappedKeys { + providerID := wrappedKey.ProviderID + if providerID == "" { + providerID = string(structs.KEKProviderAEAD) + } + + provider, ok := e.providerConfigs[providerID] + if !ok { + err := fmt.Errorf("no such KMS provider %q configured", providerID) + mErr = multierror.Append(mErr, err) + continue + } + + wrapper, err := e.newKMSWrapper(provider, wrappedKeys.KeyID, wrappedKey.KeyEncryptionKey) + if err != nil { + // the errors that bubble up from this library can be a bit opaque, so + // make sure we wrap them with as much context as possible + err := fmt.Errorf("unable to create KMS wrapper for provider %q: %w", providerID, err) + mErr = multierror.Append(mErr, err) + continue + } + + // fan-out decryption tasks for HA in Nomad Enterprise. we can use the + // key whenever any one provider returns a successful decryption + go e.decryptWrappedKeyTask(completeCtx, cancel, wrapper, provider, wrappedKeys.Meta(), wrappedKey) + decryptTasks++ + } + + e.lock.Lock() + defer e.lock.Unlock() + + e.decryptTasks[wrappedKeys.KeyID] = cancel + + err = mErr.ErrorOrNil() + if err != nil { + if decryptTasks == 0 { + cancel() + } + + logger.Error("root key cannot be decrypted", "error", err) + return err + } + + return nil +} + +// decryptWrappedKeyTask attempts to decrypt a wrapped key. It blocks until +// successful or until the context is canceled (another task completes or the +// server shuts down). The error returned is only for testing and diagnostics. +func (e *Encrypter) decryptWrappedKeyTask(ctx context.Context, cancel context.CancelFunc, wrapper kms.Wrapper, provider *structs.KEKProviderConfig, meta *structs.RootKeyMeta, wrappedKey *structs.WrappedKey) error { + + var key []byte + var rsaKey []byte + + minBackoff := time.Second + maxBackoff := time.Second * 5 + + err := helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + wrappedDEK := wrappedKey.WrappedDataEncryptionKey + var err error + key, err = wrapper.Decrypt(e.srv.shutdownCtx, wrappedDEK) + if err != nil { + err := fmt.Errorf("%w (root key): %w", ErrDecryptFailed, err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + return err + } + return nil + }) + if err != nil { return err } - if err := e.saveKeyToStore(rootKey); err != nil { + + err = helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + var err error + + // Decrypt RSAKey for Workload Identity JWT signing if one exists. Prior to + // 1.7 an ed25519 key derived from the root key was used instead of an RSA + // key. + if wrappedKey.WrappedRSAKey != nil { + rsaKey, err = wrapper.Decrypt(e.srv.shutdownCtx, wrappedKey.WrappedRSAKey) + if err != nil { + err := fmt.Errorf("%w (rsa key): %w", ErrDecryptFailed, err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + } + } + return nil + }) + if err != nil { return err } + + rootKey := &structs.UnwrappedRootKey{ + Meta: meta, + Key: key, + RSAKey: rsaKey, + } + + err = helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + err := e.addCipher(rootKey) + if err != nil { + err := fmt.Errorf("could not add cipher: %w", err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + return err + } + return nil + }) + if err != nil { + return err + } + + e.lock.Lock() + defer e.lock.Unlock() + cancel() + delete(e.decryptTasks, meta.KeyID) return nil } -// addCipher stores the key in the keyring and creates a new cipher for it. -func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { +// addCipher creates a new cipherSet for the key and stores them in the keyring +func (e *Encrypter) addCipher(rootKey *structs.UnwrappedRootKey) error { if rootKey == nil || rootKey.Meta == nil { return fmt.Errorf("missing metadata") @@ -380,7 +540,7 @@ func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { ed25519Key := ed25519.NewKeyFromSeed(rootKey.Key) - ks := keyset{ + cs := cipherSet{ rootKey: rootKey, cipher: aead, eddsaPrivateKey: ed25519Key, @@ -394,53 +554,86 @@ func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { return fmt.Errorf("error parsing rsa key: %w", err) } - ks.rsaPrivateKey = rsaKey - ks.rsaPKCS1PublicKey = x509.MarshalPKCS1PublicKey(&rsaKey.PublicKey) + cs.rsaPrivateKey = rsaKey + cs.rsaPKCS1PublicKey = x509.MarshalPKCS1PublicKey(&rsaKey.PublicKey) } e.lock.Lock() defer e.lock.Unlock() - e.keyring[rootKey.Meta.KeyID] = &ks + e.keyring[rootKey.Meta.KeyID] = &cs return nil } +// waitForKey retrieves the key material by ID from the keyring, retrying with +// geometric backoff until the context expires. +func (e *Encrypter) waitForKey(ctx context.Context, keyID string) (*cipherSet, error) { + var ks *cipherSet + + err := helper.WithBackoffFunc(ctx, 50*time.Millisecond, 100*time.Millisecond, + func() error { + e.lock.RLock() + defer e.lock.RUnlock() + var err error + ks, err = e.cipherSetByIDLocked(keyID) + if err != nil { + return err + } + return nil + }) + if err != nil { + return nil, err + } + if ks == nil { + return nil, fmt.Errorf("no such key") + } + return ks, nil +} + // GetKey retrieves the key material by ID from the keyring. -func (e *Encrypter) GetKey(keyID string) (*structs.RootKey, error) { - e.lock.RLock() - defer e.lock.RUnlock() +func (e *Encrypter) GetKey(keyID string) (*structs.UnwrappedRootKey, error) { + e.lock.Lock() + defer e.lock.Unlock() - keyset, err := e.keysetByIDLocked(keyID) + ks, err := e.cipherSetByIDLocked(keyID) if err != nil { return nil, err } - return keyset.rootKey, nil + if ks == nil { + return nil, fmt.Errorf("no such key") + } + + return ks.rootKey, nil } -// activeKeySetLocked returns the keyset that belongs to the key marked as -// active in the state store (so that it's consistent with raft). The -// called must read-lock the keyring -func (e *Encrypter) activeKeySet() (*keyset, error) { +// activeCipherSetLocked returns the cipherSet that belongs to the key marked as +// active in the state store (so that it's consistent with raft). +// +// If a key is rotated immediately following a leader election, plans that are +// in-flight may get signed before the new leader has decrypted the key. Allow +// for a short timeout-and-retry to avoid rejecting plans +func (e *Encrypter) activeCipherSet() (*cipherSet, error) { store := e.srv.fsm.State() - keyMeta, err := store.GetActiveRootKeyMeta(nil) + key, err := store.GetActiveRootKey(nil) if err != nil { return nil, err } - if keyMeta == nil { + if key == nil { return nil, fmt.Errorf("keyring has not been initialized yet") } - e.lock.RLock() - defer e.lock.RUnlock() - return e.keysetByIDLocked(keyMeta.KeyID) + + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, time.Second) + defer cancel() + return e.waitForKey(ctx, key.KeyID) } -// keysetByIDLocked returns the keyset for the specified keyID. The +// cipherSetByIDLocked returns the cipherSet for the specified keyID. The // caller must read-lock the keyring -func (e *Encrypter) keysetByIDLocked(keyID string) (*keyset, error) { - keyset, ok := e.keyring[keyID] +func (e *Encrypter) cipherSetByIDLocked(keyID string) (*cipherSet, error) { + cipherSet, ok := e.keyring[keyID] if !ok { return nil, fmt.Errorf("no such key %q in keyring", keyID) } - return keyset, nil + return cipherSet, nil } // RemoveKey removes a key by ID from the keyring @@ -451,7 +644,49 @@ func (e *Encrypter) RemoveKey(keyID string) error { return nil } -func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKProviderConfig) (*structs.KeyEncryptionKeyWrapper, error) { +// wrapRootKey encrypts the key for every KEK provider and returns a RootKey +// with wrapped keys. On legacy clusters, this also serializes the wrapped key +// to the on-disk keystore. +func (e *Encrypter) wrapRootKey(rootKey *structs.UnwrappedRootKey, isUpgraded bool) (*structs.RootKey, error) { + + wrappedKeys := structs.NewRootKey(rootKey.Meta) + + for _, provider := range e.providerConfigs { + if !provider.Active { + continue + } + wrappedKey, err := e.encryptDEK(rootKey, provider) + if err != nil { + return nil, err + } + + switch { + case isUpgraded && provider.Provider == string(structs.KEKProviderAEAD): + // nothing to do but don't want to hit next case + + case isUpgraded: + wrappedKey.KeyEncryptionKey = nil + + case provider.Provider == string(structs.KEKProviderAEAD): // !isUpgraded + kek := wrappedKey.KeyEncryptionKey + wrappedKey.KeyEncryptionKey = nil + e.writeKeyToDisk(rootKey.Meta, provider, wrappedKey, kek) + + default: // !isUpgraded + wrappedKey.KeyEncryptionKey = nil + e.writeKeyToDisk(rootKey.Meta, provider, wrappedKey, nil) + } + + wrappedKeys.WrappedKeys = append(wrappedKeys.WrappedKeys, wrappedKey) + + } + return wrappedKeys, nil +} + +// encryptDEK encrypts the DEKs (one for encryption and one for signing) with +// the KMS provider and returns a WrappedKey built from the provider's +// kms.BlobInfo. This includes the cleartext KEK for the AEAD provider. +func (e *Encrypter) encryptDEK(rootKey *structs.UnwrappedRootKey, provider *structs.KEKProviderConfig) (*structs.WrappedKey, error) { if provider == nil { panic("can't encrypt DEK without a provider") } @@ -472,15 +707,16 @@ func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKPr if err != nil { return nil, fmt.Errorf("failed to encrypt root key: %w", err) } - kekWrapper := &structs.KeyEncryptionKeyWrapper{ - Meta: rootKey.Meta, - KeyEncryptionKey: kek, + + kekWrapper := &structs.WrappedKey{ Provider: provider.Provider, ProviderID: provider.ID(), WrappedDataEncryptionKey: rootBlob, + WrappedRSAKey: &kms.BlobInfo{}, + KeyEncryptionKey: kek, } - // Only keysets created after 1.7.0 will contain an RSA key. + // Only cipherSets created after 1.7.0 will contain an RSA key. if len(rootKey.RSAKey) > 0 { rsaBlob, err := wrapper.Encrypt(e.srv.shutdownCtx, rootKey.RSAKey) if err != nil { @@ -492,37 +728,38 @@ func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKPr return kekWrapper, nil } -// saveKeyToStore serializes a root key to the on-disk keystore. -func (e *Encrypter) saveKeyToStore(rootKey *structs.RootKey) error { +func (e *Encrypter) writeKeyToDisk( + meta *structs.RootKeyMeta, provider *structs.KEKProviderConfig, + wrappedKey *structs.WrappedKey, kek []byte) error { - for _, provider := range e.providerConfigs { - if !provider.Active { - continue - } - kekWrapper, err := e.encryptDEK(rootKey, provider) - if err != nil { - return err - } - - buf, err := json.Marshal(kekWrapper) - if err != nil { - return err - } + // the on-disk keystore flattens the keys wrapped for the individual + // KMS providers out to their own files + diskWrapper := &structs.KeyEncryptionKeyWrapper{ + Meta: meta, + Provider: provider.Name, + ProviderID: provider.ID(), + WrappedDataEncryptionKey: wrappedKey.WrappedDataEncryptionKey, + WrappedRSAKey: wrappedKey.WrappedRSAKey, + KeyEncryptionKey: kek, + } - filename := fmt.Sprintf("%s.%s%s", - rootKey.Meta.KeyID, provider.ID(), nomadKeystoreExtension) - path := filepath.Join(e.keystorePath, filename) - err = os.WriteFile(path, buf, 0o600) - if err != nil { - return err - } + buf, err := json.Marshal(diskWrapper) + if err != nil { + return err } + filename := fmt.Sprintf("%s.%s%s", + meta.KeyID, provider.ID(), nomadKeystoreExtension) + path := filepath.Join(e.keystorePath, filename) + err = os.WriteFile(path, buf, 0o600) + if err != nil { + return err + } return nil } // loadKeyFromStore deserializes a root key from disk. -func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { +func (e *Encrypter) loadKeyFromStore(path string) (*structs.UnwrappedRootKey, error) { raw, err := os.ReadFile(path) if err != nil { @@ -583,7 +820,7 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { } } - return &structs.RootKey{ + return &structs.UnwrappedRootKey{ Meta: meta, Key: key, RSAKey: rsaKey, @@ -592,13 +829,43 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { var ErrDecryptFailed = errors.New("unable to decrypt wrapped key") +// waitForPublicKey returns the public signing key for the requested key id or +// an error if the key could not be found. It blocks up to 1s for key material +// to be decrypted so that Workload Identities signed by a brand-new key can be +// verified for stale RPCs made to followers that might not have yet decrypted +// the key received via Raft +func (e *Encrypter) waitForPublicKey(keyID string) (*structs.KeyringPublicKey, error) { + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 1*time.Second) + defer cancel() + ks, err := e.waitForKey(ctx, keyID) + if err != nil { + return nil, err + } + + pubKey := &structs.KeyringPublicKey{ + KeyID: keyID, + Use: structs.PubKeyUseSig, + CreateTime: ks.rootKey.Meta.CreateTime, + } + + if ks.rsaPrivateKey != nil { + pubKey.PublicKey = ks.rsaPKCS1PublicKey + pubKey.Algorithm = structs.PubKeyAlgRS256 + } else { + pubKey.PublicKey = ks.eddsaPrivateKey.Public().(ed25519.PublicKey) + pubKey.Algorithm = structs.PubKeyAlgEdDSA + } + + return pubKey, nil +} + // GetPublicKey returns the public signing key for the requested key id or an // error if the key could not be found. func (e *Encrypter) GetPublicKey(keyID string) (*structs.KeyringPublicKey, error) { e.lock.RLock() defer e.lock.RUnlock() - ks, err := e.keysetByIDLocked(keyID) + ks, err := e.cipherSetByIDLocked(keyID) if err != nil { return nil, err } @@ -663,6 +930,10 @@ func (e *Encrypter) newKMSWrapper(provider *structs.KEKProviderConfig, keyID str return wrapper, nil } +// KeyringReplicator supports the legacy (pre-1.9.0) keyring management where +// wrapped keys were stored outside of Raft. +// +// COMPAT(1.12.0) - remove in 1.12.0 LTS type KeyringReplicator struct { srv *Server encrypter *Encrypter @@ -708,7 +979,7 @@ func (krr *KeyringReplicator) run(ctx context.Context) { } store := krr.srv.fsm.State() - iter, err := store.RootKeyMetas(nil) + iter, err := store.RootKeys(nil) if err != nil { krr.logger.Error("failed to fetch keyring", "error", err) continue @@ -719,22 +990,23 @@ func (krr *KeyringReplicator) run(ctx context.Context) { break } - keyMeta := raw.(*structs.RootKeyMeta) - if key, err := krr.encrypter.GetKey(keyMeta.KeyID); err == nil && len(key.Key) > 0 { + wrappedKeys := raw.(*structs.RootKey) + if key, err := krr.encrypter.GetKey(wrappedKeys.KeyID); err == nil && len(key.Key) > 0 { // the key material is immutable so if we've already got it // we can move on to the next key continue } - err := krr.replicateKey(ctx, keyMeta) + err := krr.replicateKey(ctx, wrappedKeys) if err != nil { // don't break the loop on an error, as we want to make sure // we've replicated any keys we can. the rate limiter will // prevent this case from sending excessive RPCs - krr.logger.Error(err.Error(), "key", keyMeta.KeyID) + krr.logger.Error(err.Error(), "key", wrappedKeys.KeyID) } } + } } @@ -743,8 +1015,8 @@ func (krr *KeyringReplicator) run(ctx context.Context) { // replicateKey replicates a single key from peer servers that was present in // the state store but missing from the keyring. Returns an error only if no // peers have this key. -func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs.RootKeyMeta) error { - keyID := keyMeta.KeyID +func (krr *KeyringReplicator) replicateKey(ctx context.Context, wrappedKeys *structs.RootKey) error { + keyID := wrappedKeys.KeyID krr.logger.Debug("replicating new key", "id", keyID) var err error @@ -752,7 +1024,7 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs KeyID: keyID, QueryOptions: structs.QueryOptions{ Region: krr.srv.config.Region, - MinQueryIndex: keyMeta.ModifyIndex - 1, + MinQueryIndex: wrappedKeys.ModifyIndex - 1, }, } getResp := &structs.KeyringGetRootKeyResponse{} @@ -795,7 +1067,12 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs return fmt.Errorf("failed to fetch key from any peer: %v", err) } - err = krr.encrypter.AddKey(getResp.Key) + isClusterUpgraded := ServersMeetMinimumVersion( + krr.srv.serf.Members(), krr.srv.Region(), minVersionKeyringInRaft, true) + + // In the legacy replication, we toss out the wrapped key because it's + // always persisted to disk + _, err = krr.srv.encrypter.AddUnwrappedKey(getResp.Key, isClusterUpgraded) if err != nil { return fmt.Errorf("failed to add key to keyring: %v", err) } diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index 9477f80efb6..1f65ffb98e5 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -10,6 +10,7 @@ import ( "encoding/json" "fmt" "maps" + "net/rpc" "os" "path/filepath" "testing" @@ -17,11 +18,6 @@ import ( "github.com/go-jose/go-jose/v3/jwt" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2" - "github.com/shoenig/test" - "github.com/shoenig/test/must" - "github.com/shoenig/test/wait" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" @@ -30,6 +26,10 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" + "github.com/stretchr/testify/require" ) var ( @@ -70,10 +70,12 @@ func TestEncrypter_LoadSave(t *testing.T) { for _, algo := range algos { t.Run(string(algo), func(t *testing.T) { - key, err := structs.NewRootKey(algo) + key, err := structs.NewUnwrappedRootKey(algo) must.Greater(t, 0, len(key.RSAKey)) must.NoError(t, err) - must.NoError(t, encrypter.saveKeyToStore(key)) + + _, err = encrypter.wrapRootKey(key, false) + must.NoError(t, err) // startup code path gotKey, err := encrypter.loadKeyFromStore( @@ -81,28 +83,29 @@ func TestEncrypter_LoadSave(t *testing.T) { must.NoError(t, err) must.NoError(t, encrypter.addCipher(gotKey)) must.Greater(t, 0, len(gotKey.RSAKey)) - must.NoError(t, encrypter.saveKeyToStore(key)) + _, err = encrypter.wrapRootKey(key, false) + must.NoError(t, err) - active, err := encrypter.keysetByIDLocked(key.Meta.KeyID) + active, err := encrypter.cipherSetByIDLocked(key.Meta.KeyID) must.NoError(t, err) must.Greater(t, 0, len(active.rootKey.RSAKey)) }) } t.Run("legacy aead wrapper", func(t *testing.T) { - key, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + key, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) must.NoError(t, err) // create a wrapper file identical to those before we had external KMS - kekWrapper, err := encrypter.encryptDEK(key, &structs.KEKProviderConfig{}) - kekWrapper.Provider = "" - kekWrapper.ProviderID = "" - kekWrapper.EncryptedDataEncryptionKey = kekWrapper.WrappedDataEncryptionKey.Ciphertext - kekWrapper.EncryptedRSAKey = kekWrapper.WrappedRSAKey.Ciphertext - kekWrapper.WrappedDataEncryptionKey = nil - kekWrapper.WrappedRSAKey = nil - - buf, err := json.Marshal(kekWrapper) + wrappedKey, err := encrypter.encryptDEK(key, &structs.KEKProviderConfig{}) + diskWrapper := &structs.KeyEncryptionKeyWrapper{ + Meta: key.Meta, + KeyEncryptionKey: wrappedKey.KeyEncryptionKey, + EncryptedDataEncryptionKey: wrappedKey.WrappedDataEncryptionKey.Ciphertext, + EncryptedRSAKey: wrappedKey.WrappedRSAKey.Ciphertext, + } + + buf, err := json.Marshal(diskWrapper) must.NoError(t, err) path := filepath.Join(tmpDir, key.Meta.KeyID+".nks.json") @@ -223,8 +226,9 @@ func TestEncrypter_Restore(t *testing.T) { } } -// TestEncrypter_KeyringReplication exercises key replication between servers -func TestEncrypter_KeyringReplication(t *testing.T) { +// TestEncrypter_KeyringBootstrapping exercises key decryption tasks as new +// servers come online and leaders are elected. +func TestEncrypter_KeyringBootstrapping(t *testing.T) { ci.Parallel(t) @@ -283,20 +287,35 @@ func TestEncrypter_KeyringReplication(t *testing.T) { keyID1 := listResp.Keys[0].KeyID - keyPath := filepath.Join(leader.GetConfig().DataDir, "keystore", - keyID1+".aead.nks.json") - _, err := os.Stat(keyPath) - must.NoError(t, err, must.Sprint("expected key to be found in leader keystore")) + // Helper function for checking that a specific key is in the keyring for a + // specific server + checkPublicKeyFn := func(codec rpc.ClientCodec, keyID string) bool { + listPublicReq := &structs.GenericRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + AllowStale: true, + }, + } + var listPublicResp structs.KeyringListPublicResponse + msgpackrpc.CallWithCodec(codec, "Keyring.ListPublic", listPublicReq, &listPublicResp) + for _, key := range listPublicResp.PublicKeys { + if key.KeyID == keyID && len(key.PublicKey) > 0 { + return true + } + } + return false + } + + // leader's key should already be available by the time its elected the + // leader + must.True(t, checkPublicKeyFn(codec, keyID1)) // Helper function for checking that a specific key has been - // replicated to followers - + // replicated to all followers checkReplicationFn := func(keyID string) func() bool { return func() bool { for _, srv := range servers { - keyPath := filepath.Join(srv.GetConfig().DataDir, "keystore", - keyID+".aead.nks.json") - if _, err := os.Stat(keyPath); err != nil { + if !checkPublicKeyFn(rpcClient(t, srv), keyID) { return false } } @@ -317,7 +336,7 @@ func TestEncrypter_KeyringReplication(t *testing.T) { }, } var rotateResp structs.KeyringRotateRootKeyResponse - err = msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp) + err := msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp) must.NoError(t, err) keyID2 := rotateResp.Key.KeyID @@ -332,10 +351,8 @@ func TestEncrypter_KeyringReplication(t *testing.T) { must.NoError(t, err) must.NotNil(t, getResp.Key, must.Sprint("expected key to be found on leader")) - keyPath = filepath.Join(leader.GetConfig().DataDir, "keystore", - keyID2+".aead.nks.json") - _, err = os.Stat(keyPath) - must.NoError(t, err, must.Sprint("expected key to be found in leader keystore")) + must.True(t, checkPublicKeyFn(codec, keyID1), + must.Sprint("expected key to be found in leader keystore")) must.Wait(t, wait.InitialSuccess( wait.BoolFunc(checkReplicationFn(keyID2)), @@ -526,7 +543,7 @@ func TestEncrypter_SignVerify_AlgNone(t *testing.T) { e := srv.encrypter - keyset, err := e.activeKeySet() + keyset, err := e.activeCipherSet() must.NoError(t, err) keyID := keyset.rootKey.Meta.KeyID @@ -576,8 +593,25 @@ func TestEncrypter_Upgrade17(t *testing.T) { testutil.WaitForKeyring(t, srv.RPC, "global") codec := rpcClient(t, srv) + initKey, err := srv.State().GetActiveRootKey(nil) + must.NoError(t, err) + + wr := structs.WriteRequest{ + Namespace: "default", + Region: "global", + } + + // Delete the initialization key because it's a newer WrappedRootKey from + // 1.9, which isn't under test here. + _, _, err = srv.raftApply( + structs.WrappedRootKeysDeleteRequestType, structs.KeyringDeleteRootKeyRequest{ + KeyID: initKey.KeyID, + WriteRequest: wr, + }) + must.NoError(t, err) + // Fake life as a 1.6 server by writing only ed25519 keys - oldRootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + oldRootKey, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) must.NoError(t, err) oldRootKey = oldRootKey.MakeActive() @@ -586,13 +620,10 @@ func TestEncrypter_Upgrade17(t *testing.T) { oldRootKey.RSAKey = nil // Add to keyring - must.NoError(t, srv.encrypter.AddKey(oldRootKey)) + _, err = srv.encrypter.AddUnwrappedKey(oldRootKey, false) + must.NoError(t, err) - // Write metadata to Raft - wr := structs.WriteRequest{ - Namespace: "default", - Region: "global", - } + // Write a legacy key metadata to Raft req := structs.KeyringUpdateRootKeyMetaRequest{ RootKeyMeta: oldRootKey.Meta, WriteRequest: wr, diff --git a/nomad/fsm.go b/nomad/fsm.go index 3e7d99d7c86..325075f0e14 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -66,6 +66,7 @@ const ( ACLBindingRuleSnapshot SnapshotType = 27 NodePoolSnapshot SnapshotType = 28 JobSubmissionSnapshot SnapshotType = 29 + RootKeySnapshot SnapshotType = 30 // Namespace appliers were moved from enterprise and therefore start at 64 NamespaceSnapshot SnapshotType = 64 @@ -102,6 +103,7 @@ var snapshotTypeStrings = map[SnapshotType]string{ ACLBindingRuleSnapshot: "ACLBindingRule", NodePoolSnapshot: "NodePool", JobSubmissionSnapshot: "JobSubmission", + RootKeySnapshot: "WrappedRootKeys", NamespaceSnapshot: "Namespace", } @@ -126,6 +128,7 @@ type nomadFSM struct { evalBroker *EvalBroker blockedEvals *BlockedEvals periodicDispatcher *PeriodicDispatch + encrypter *Encrypter logger hclog.Logger state *state.StateStore timetable *TimeTable @@ -171,6 +174,9 @@ type FSMConfig struct { // be added to. Blocked *BlockedEvals + // Encrypter is the encrypter where new WrappedRootKeys should be added + Encrypter *Encrypter + // Logger is the logger used by the FSM Logger hclog.Logger @@ -207,6 +213,7 @@ func NewFSM(config *FSMConfig) (*nomadFSM, error) { evalBroker: config.EvalBroker, periodicDispatcher: config.Periodic, blockedEvals: config.Blocked, + encrypter: config.Encrypter, logger: config.Logger.Named("fsm"), config: config, state: state, @@ -371,8 +378,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyVariableOperation(msgType, buf[1:], log.Index) case structs.RootKeyMetaUpsertRequestType: return n.applyRootKeyMetaUpsert(msgType, buf[1:], log.Index) - case structs.RootKeyMetaDeleteRequestType: - return n.applyRootKeyMetaDelete(msgType, buf[1:], log.Index) + case structs.WrappedRootKeysDeleteRequestType: + return n.applyWrappedRootKeysDelete(msgType, buf[1:], log.Index) case structs.ACLRolesUpsertRequestType: return n.applyACLRolesUpsert(msgType, buf[1:], log.Index) case structs.ACLRolesDeleteByIDRequestType: @@ -385,6 +392,9 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyACLBindingRulesUpsert(buf[1:], log.Index) case structs.ACLBindingRulesDeleteRequestType: return n.applyACLBindingRulesDelete(buf[1:], log.Index) + case structs.WrappedRootKeysUpsertRequestType: + return n.applyWrappedRootKeysUpsert(msgType, buf[1:], log.Index) + } // Check enterprise only message types. @@ -1827,9 +1837,33 @@ func (n *nomadFSM) restoreImpl(old io.ReadCloser, filter *FSMFilter) error { return err } - if err := restore.RootKeyMetaRestore(keyMeta); err != nil { + wrappedKeys := structs.NewRootKey(keyMeta) + if err := restore.RootKeyRestore(wrappedKeys); err != nil { + return err + } + + if n.encrypter != nil { + // only decrypt the key if we're running in a real server and + // not the 'operator snapshot' command context + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, wrappedKeys) + } + + case RootKeySnapshot: + wrappedKeys := new(structs.RootKey) + if err := dec.Decode(wrappedKeys); err != nil { + return err + } + + if err := restore.RootKeyRestore(wrappedKeys); err != nil { return err } + + if n.encrypter != nil { + // only decrypt the key if we're running in a real server and + // not the 'operator snapshot' command context + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, wrappedKeys) + } + case ACLRoleSnapshot: // Create a new ACLRole object, so we can decode the message into @@ -2303,27 +2337,52 @@ func (n *nomadFSM) applyRootKeyMetaUpsert(msgType structs.MessageType, buf []byt panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpsertRootKeyMeta(index, req.RootKeyMeta, req.Rekey); err != nil { - n.logger.Error("UpsertRootKeyMeta failed", "error", err) + wrappedRootKeys := structs.NewRootKey(req.RootKeyMeta) + + if err := n.state.UpsertRootKey(index, wrappedRootKeys, req.Rekey); err != nil { + n.logger.Error("UpsertWrappedRootKeys failed", "error", err) return err } + // start a task to decrypt the key material + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, wrappedRootKeys) + + return nil +} + +func (n *nomadFSM) applyWrappedRootKeysUpsert(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_wrapped_root_key_upsert"}, time.Now()) + + var req structs.KeyringUpsertWrappedRootKeyRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpsertRootKey(index, req.WrappedRootKeys, req.Rekey); err != nil { + n.logger.Error("UpsertWrappedRootKeys failed", "error", err) + return err + } + + // start a task to decrypt the key material + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, req.WrappedRootKeys) + return nil } -func (n *nomadFSM) applyRootKeyMetaDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { - defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_root_key_meta_delete"}, time.Now()) +func (n *nomadFSM) applyWrappedRootKeysDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_wrapped_root_key_delete"}, time.Now()) var req structs.KeyringDeleteRootKeyRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.DeleteRootKeyMeta(index, req.KeyID); err != nil { - n.logger.Error("DeleteRootKeyMeta failed", "error", err) + if err := n.state.DeleteRootKey(index, req.KeyID); err != nil { + n.logger.Error("DeleteWrappedRootKeys failed", "error", err) return err } + n.encrypter.RemoveKey(req.KeyID) return nil } @@ -2447,7 +2506,7 @@ func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { sink.Cancel() return err } - if err := s.persistRootKeyMeta(sink, encoder); err != nil { + if err := s.persistWrappedRootKeys(sink, encoder); err != nil { sink.Cancel() return err } @@ -3092,11 +3151,11 @@ func (s *nomadSnapshot) persistVariablesQuotas(sink raft.SnapshotSink, return nil } -func (s *nomadSnapshot) persistRootKeyMeta(sink raft.SnapshotSink, +func (s *nomadSnapshot) persistWrappedRootKeys(sink raft.SnapshotSink, encoder *codec.Encoder) error { ws := memdb.NewWatchSet() - keys, err := s.snap.RootKeyMetas(ws) + keys, err := s.snap.RootKeys(ws) if err != nil { return err } @@ -3106,8 +3165,8 @@ func (s *nomadSnapshot) persistRootKeyMeta(sink raft.SnapshotSink, if raw == nil { break } - key := raw.(*structs.RootKeyMeta) - sink.Write([]byte{byte(RootKeyMetaSnapshot)}) + key := raw.(*structs.RootKey) + sink.Write([]byte{byte(RootKeySnapshot)}) if err := encoder.Encode(key); err != nil { return err } diff --git a/nomad/keyring_endpoint.go b/nomad/keyring_endpoint.go index 9a383b1c5cb..692cb84ed37 100644 --- a/nomad/keyring_endpoint.go +++ b/nomad/keyring_endpoint.go @@ -54,36 +54,49 @@ func (k *Keyring) Rotate(args *structs.KeyringRotateRootKeyRequest, reply *struc return fmt.Errorf("keyring cannot be prepublished and full rotated at the same time") } - rootKey, err := structs.NewRootKey(args.Algorithm) + unwrappedKey, err := structs.NewUnwrappedRootKey(args.Algorithm) if err != nil { return err } if args.PublishTime != 0 { - rootKey.Meta = rootKey.Meta.MakePrepublished(args.PublishTime) + unwrappedKey.Meta.State = structs.RootKeyStatePrepublished + unwrappedKey.Meta.PublishTime = args.PublishTime } else { - rootKey.Meta = rootKey.Meta.MakeActive() + unwrappedKey.Meta.State = structs.RootKeyStateActive } - // make sure it's been added to the local keystore before we write - // it to raft, so that followers don't try to Get a key that - // hasn't yet been written to disk - err = k.encrypter.AddKey(rootKey) + isClusterUpgraded := ServersMeetMinimumVersion( + k.srv.serf.Members(), k.srv.Region(), minVersionKeyringInRaft, true) + + // wrap/encrypt the key before we write it to Raft + wrappedKey, err := k.encrypter.AddUnwrappedKey(unwrappedKey, isClusterUpgraded) if err != nil { return err } - // Update metadata via Raft so followers can retrieve this key - req := structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: rootKey.Meta, - Rekey: args.Full, - WriteRequest: args.WriteRequest, + var index uint64 + if isClusterUpgraded { + _, index, err = k.srv.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKey, + Rekey: args.Full, + WriteRequest: args.WriteRequest, + }) + } else { + // COMPAT(1.12.0): remove the version check and this code path + _, index, err = k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, + structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: wrappedKey.Meta(), + Rekey: args.Full, + WriteRequest: args.WriteRequest, + }) } - _, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, req) if err != nil { return err } - reply.Key = rootKey.Meta + + reply.Key = unwrappedKey.Meta reply.Index = index if args.Full { @@ -129,29 +142,23 @@ func (k *Keyring) List(args *structs.KeyringListRootKeyMetaRequest, reply *struc opts := blockingOptions{ queryOpts: &args.QueryOptions, queryMeta: &reply.QueryMeta, - run: func(ws memdb.WatchSet, s *state.StateStore) error { - - // retrieve all the key metadata - snap, err := k.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.RootKeyMetas(ws) + run: func(ws memdb.WatchSet, store *state.StateStore) error { + iter, err := store.RootKeys(ws) if err != nil { return err } - keys := []*structs.RootKeyMeta{} for { raw := iter.Next() if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) - keys = append(keys, keyMeta) + rootKey := raw.(*structs.RootKey) + keys = append(keys, rootKey.Meta()) } + reply.Keys = keys - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + return k.srv.replySetIndex(state.TableRootKeys, &reply.QueryMeta) }, } return k.srv.blockingRPC(&opts) @@ -183,22 +190,35 @@ func (k *Keyring) Update(args *structs.KeyringUpdateRootKeyRequest, reply *struc return err } + isClusterUpgraded := ServersMeetMinimumVersion( + k.srv.serf.Members(), k.srv.Region(), minVersionKeyringInRaft, true) + // make sure it's been added to the local keystore before we write // it to raft, so that followers don't try to Get a key that // hasn't yet been written to disk - err = k.encrypter.AddKey(args.RootKey) + wrappedKey, err := k.encrypter.AddUnwrappedKey(args.RootKey, isClusterUpgraded) if err != nil { return err } - // unwrap the request to turn it into a meta update only - metaReq := &structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: args.RootKey.Meta, - WriteRequest: args.WriteRequest, - } + var index uint64 + if isClusterUpgraded { + _, index, err = k.srv.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKey, + WriteRequest: args.WriteRequest, + }) + } else { + // COMPAT(1.12.0): remove the version check and this code path + // unwrap the request to turn it into a meta update only + metaReq := &structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: args.RootKey.Meta, + WriteRequest: args.WriteRequest, + } - // update the metadata via Raft - _, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, metaReq) + // update the metadata via Raft + _, index, err = k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, metaReq) + } if err != nil { return err } @@ -225,11 +245,11 @@ func (k *Keyring) validateUpdate(args *structs.KeyringUpdateRootKeyRequest) erro return err } ws := memdb.NewWatchSet() - keyMeta, err := snap.RootKeyMetaByID(ws, args.RootKey.Meta.KeyID) + rootKey, err := snap.RootKeyByID(ws, args.RootKey.Meta.KeyID) if err != nil { return err } - if keyMeta != nil && keyMeta.Algorithm != args.RootKey.Meta.Algorithm { + if rootKey != nil && rootKey.Algorithm != args.RootKey.Meta.Algorithm { return fmt.Errorf("root key algorithm cannot be changed after a key is created") } @@ -261,39 +281,29 @@ func (k *Keyring) Get(args *structs.KeyringGetRootKeyRequest, reply *structs.Key queryMeta: &reply.QueryMeta, run: func(ws memdb.WatchSet, s *state.StateStore) error { - // retrieve the key metadata snap, err := k.srv.fsm.State().Snapshot() if err != nil { return err } - keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID) + wrappedKey, err := snap.RootKeyByID(ws, args.KeyID) if err != nil { return err } - if keyMeta == nil { - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + if wrappedKey == nil { + return k.srv.replySetIndex(state.TableRootKeys, &reply.QueryMeta) } // retrieve the key material from the keyring - rootKey, err := k.encrypter.GetKey(keyMeta.KeyID) + unwrappedKey, err := k.encrypter.GetKey(wrappedKey.KeyID) if err != nil { return err } - reply.Key = rootKey - - // Use the last index that affected the policy table - index, err := s.Index(state.TableRootKeyMeta) + reply.Key = unwrappedKey + err = k.srv.replySetIndex(state.TableRootKeys, &reply.QueryMeta) if err != nil { return err } - // Ensure we never set the index to zero, otherwise a blocking query - // cannot be used. We floor the index at one, since realistically - // the first write must have a higher index. - if index == 0 { - index = 1 - } - reply.Index = index return nil }, } @@ -324,24 +334,21 @@ func (k *Keyring) Delete(args *structs.KeyringDeleteRootKeyRequest, reply *struc } // lookup any existing key and validate the delete + var index uint64 snap, err := k.srv.fsm.State().Snapshot() if err != nil { return err } ws := memdb.NewWatchSet() - keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID) + rootKey, err := snap.RootKeyByID(ws, args.KeyID) if err != nil { return err } - if keyMeta == nil { - return nil // safe to bail out early - } - if keyMeta.IsActive() { + if rootKey != nil && rootKey.IsActive() { return fmt.Errorf("active root key cannot be deleted - call rotate first") } - // update via Raft - _, index, err := k.srv.raftApply(structs.RootKeyMetaDeleteRequestType, args) + _, index, err = k.srv.raftApply(structs.WrappedRootKeysDeleteRequestType, args) if err != nil { return err } @@ -377,40 +384,33 @@ func (k *Keyring) ListPublic(args *structs.GenericRequest, reply *structs.Keyrin opts := blockingOptions{ queryOpts: &args.QueryOptions, queryMeta: &reply.QueryMeta, - run: func(ws memdb.WatchSet, s *state.StateStore) error { - - // retrieve all the key metadata - snap, err := k.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.RootKeyMetas(ws) + run: func(ws memdb.WatchSet, store *state.StateStore) error { + iter, err := store.RootKeys(ws) if err != nil { return err } - pubKeys := []*structs.KeyringPublicKey{} for { raw := iter.Next() if raw == nil { break } - - keyMeta := raw.(*structs.RootKeyMeta) - if keyMeta.State == structs.RootKeyStateDeprecated { + wrappedKeys := raw.(*structs.RootKey) + if wrappedKeys.State == structs.RootKeyStateDeprecated { // Only include valid keys continue } - pubKey, err := k.encrypter.GetPublicKey(keyMeta.KeyID) + pubKey, err := k.encrypter.GetPublicKey(wrappedKeys.KeyID) if err != nil { return err } pubKeys = append(pubKeys, pubKey) + } reply.PublicKeys = pubKeys - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + return k.srv.replySetIndex(state.TableRootKeys, &reply.QueryMeta) }, } return k.srv.blockingRPC(&opts) diff --git a/nomad/keyring_endpoint_test.go b/nomad/keyring_endpoint_test.go index 7aa999e97f9..071d17686b0 100644 --- a/nomad/keyring_endpoint_test.go +++ b/nomad/keyring_endpoint_test.go @@ -29,7 +29,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { // Upsert a new key - key, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + key, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) require.NoError(t, err) id := key.Meta.KeyID key = key.MakeActive() @@ -106,7 +106,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { require.EqualError(t, err, "active root key cannot be deleted - call rotate first") // set inactive - updateReq.RootKey.Meta = updateReq.RootKey.Meta.MakeInactive() + updateReq.RootKey = updateReq.RootKey.MakeInactive() err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp) require.NoError(t, err) @@ -139,7 +139,7 @@ func TestKeyringEndpoint_InvalidUpdates(t *testing.T) { codec := rpcClient(t, srv) // Setup an existing key - key, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + key, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) require.NoError(t, err) id := key.Meta.KeyID key = key.MakeActive() @@ -156,30 +156,30 @@ func TestKeyringEndpoint_InvalidUpdates(t *testing.T) { require.NoError(t, err) testCases := []struct { - key *structs.RootKey + key *structs.UnwrappedRootKey expectedErrMsg string }{ { - key: &structs.RootKey{}, + key: &structs.UnwrappedRootKey{}, expectedErrMsg: "root key metadata is required", }, { - key: &structs.RootKey{Meta: &structs.RootKeyMeta{}}, + key: &structs.UnwrappedRootKey{Meta: &structs.RootKeyMeta{}}, expectedErrMsg: "root key UUID is required", }, { - key: &structs.RootKey{Meta: &structs.RootKeyMeta{KeyID: "invalid"}}, + key: &structs.UnwrappedRootKey{Meta: &structs.RootKeyMeta{KeyID: "invalid"}}, expectedErrMsg: "root key UUID is required", }, { - key: &structs.RootKey{Meta: &structs.RootKeyMeta{ + key: &structs.UnwrappedRootKey{Meta: &structs.RootKeyMeta{ KeyID: id, Algorithm: structs.EncryptionAlgorithmAES256GCM, }}, expectedErrMsg: "root key state \"\" is invalid", }, { - key: &structs.RootKey{Meta: &structs.RootKeyMeta{ + key: &structs.UnwrappedRootKey{Meta: &structs.RootKeyMeta{ KeyID: id, Algorithm: structs.EncryptionAlgorithmAES256GCM, State: structs.RootKeyStateActive, @@ -188,7 +188,7 @@ func TestKeyringEndpoint_InvalidUpdates(t *testing.T) { }, { - key: &structs.RootKey{ + key: &structs.UnwrappedRootKey{ Key: []byte{0x01}, Meta: &structs.RootKeyMeta{ KeyID: id, @@ -229,11 +229,11 @@ func TestKeyringEndpoint_Rotate(t *testing.T) { codec := rpcClient(t, srv) store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NoError(t, err) // Setup an existing key - key, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + key, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) must.NoError(t, err) key1 := key.Meta diff --git a/nomad/leader.go b/nomad/leader.go index d771a3cd160..e17cc74ef1a 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -2721,6 +2721,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { } var minVersionKeyring = version.Must(version.NewVersion("1.4.0")) +var minVersionKeyringInRaft = version.Must(version.NewVersion("1.9.0-dev")) // initializeKeyring creates the first root key if the leader doesn't // already have one. The metadata will be replicated via raft and then @@ -2731,12 +2732,12 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) { logger := s.logger.Named("keyring") store := s.fsm.State() - keyMeta, err := store.GetActiveRootKeyMeta(nil) + key, err := store.GetActiveRootKey(nil) if err != nil { logger.Error("failed to get active key: %v", err) return } - if keyMeta != nil { + if key != nil { return } @@ -2759,25 +2760,39 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) { logger.Trace("initializing keyring") - rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) + rootKey, err := structs.NewUnwrappedRootKey(structs.EncryptionAlgorithmAES256GCM) rootKey = rootKey.MakeActive() if err != nil { logger.Error("could not initialize keyring: %v", err) return } - err = s.encrypter.AddKey(rootKey) + isClusterUpgraded := ServersMeetMinimumVersion( + s.serf.Members(), s.Region(), minVersionKeyringInRaft, true) + + wrappedKeys, err := s.encrypter.AddUnwrappedKey(rootKey, isClusterUpgraded) if err != nil { logger.Error("could not add initial key to keyring", "error", err) return } - - if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, - structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: rootKey.Meta, - }); err != nil { - logger.Error("could not initialize keyring", "error", err) - return + if isClusterUpgraded { + if _, _, err = s.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKeys, + }); err != nil { + logger.Error("could not initialize keyring", "error", err) + return + } + } else { + logger.Warn(fmt.Sprintf("not all servers are >=%q; initializing legacy keyring", + minVersionKeyringInRaft)) + if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, + structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: rootKey.Meta, + }); err != nil { + logger.Error("could not initialize keyring", "error", err) + return + } } logger.Info("initialized keyring", "id", rootKey.Meta.KeyID) diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go index 25525d38c65..687d5511832 100644 --- a/nomad/plan_apply_test.go +++ b/nomad/plan_apply_test.go @@ -75,7 +75,7 @@ func TestPlanApply_applyPlan(t *testing.T) { s1, cleanupS1 := TestServer(t, nil) defer cleanupS1() - testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForKeyring(t, s1.RPC, s1.Region()) // Register node node := mock.Node() @@ -251,7 +251,7 @@ func TestPlanApply_applyPlanWithNormalizedAllocs(t *testing.T) { c.Build = "1.4.0" }) defer cleanupS1() - testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForKeyring(t, s1.RPC, s1.Region()) // Register node node := mock.Node() @@ -467,6 +467,54 @@ func TestPlanApply_signAllocIdentities(t *testing.T) { } } +// TestPlanApply_KeyringNotReady asserts we safely fail to apply a plan if the +// leader's keyring is not ready +func TestPlanApply_KeyringNotReady(t *testing.T) { + ci.Parallel(t) + + srv, cleanup := TestServer(t, func(c *Config) { + c.KEKProviderConfigs = []*structs.KEKProviderConfig{{ + Provider: "no-such-provider", + Active: true, + }} + }) + defer cleanup() + testutil.WaitForLeader(t, srv.RPC) // don't WaitForKeyring + + node := mock.Node() + alloc := mock.Alloc() + deploy := mock.Deployment() + dupdates := []*structs.DeploymentStatusUpdate{ + { + DeploymentID: uuid.Generate(), + Status: "foo", + StatusDescription: "bar", + }, + } + plan := &structs.Plan{ + Job: alloc.Job, + NodeAllocation: map[string][]*structs.Allocation{ + node.ID: {alloc}, + }, + Deployment: deploy, + DeploymentUpdates: dupdates, + } + + planRes := &structs.PlanResult{ + NodeAllocation: map[string][]*structs.Allocation{ + node.ID: {alloc}, + }, + NodeUpdate: map[string][]*structs.Allocation{}, + NodePreemptions: map[string][]*structs.Allocation{}, + Deployment: deploy, + DeploymentUpdates: dupdates, + } + snap, _ := srv.State().Snapshot() + + _, err := srv.applyPlan(plan, planRes, snap) + must.EqError(t, err, "keyring has not been initialized yet") +} + func TestPlanApply_EvalPlan_Simple(t *testing.T) { ci.Parallel(t) state := testStateStore(t) diff --git a/nomad/server.go b/nomad/server.go index a488481d997..27dbc0587e9 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -553,6 +553,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc // exist before it can start. s.keyringReplicator = NewKeyringReplicator(s, encrypter) + // Block until keys are decrypted + s.encrypter.IsReady(s.shutdownCtx) + // Done return s, nil } @@ -1378,6 +1381,7 @@ func (s *Server) setupRaft() error { EvalBroker: s.evalBroker, Periodic: s.periodicDispatcher, Blocked: s.blockedEvals, + Encrypter: s.encrypter, Logger: s.logger, Region: s.Region(), EnableEventBroker: s.config.EnableEventBroker, diff --git a/nomad/state/schema.go b/nomad/state/schema.go index e27261bf111..7aaafd5cfef 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -20,7 +20,7 @@ const ( TableServiceRegistrations = "service_registrations" TableVariables = "variables" TableVariablesQuotas = "variables_quota" - TableRootKeyMeta = "root_key_meta" + TableRootKeys = "root_keys" TableACLRoles = "acl_roles" TableACLAuthMethods = "acl_auth_methods" TableACLBindingRules = "acl_binding_rules" @@ -93,7 +93,7 @@ func init() { serviceRegistrationsTableSchema, variablesTableSchema, variablesQuotasTableSchema, - variablesRootKeyMetaSchema, + wrappedRootKeySchema, aclRolesTableSchema, aclAuthMethodsTableSchema, bindingRulesTableSchema, @@ -1557,10 +1557,10 @@ func variablesQuotasTableSchema() *memdb.TableSchema { } } -// variablesRootKeyMetaSchema returns the MemDB schema for Nomad root keys -func variablesRootKeyMetaSchema() *memdb.TableSchema { +// wrappedRootKeySchema returns the MemDB schema for wrapped Nomad root keys +func wrappedRootKeySchema() *memdb.TableSchema { return &memdb.TableSchema{ - Name: TableRootKeyMeta, + Name: TableRootKeys, Indexes: map[string]*memdb.IndexSchema{ indexID: { Name: indexID, diff --git a/nomad/state/state_store_keyring.go b/nomad/state/state_store_keyring.go index c719579183d..aab27dc9372 100644 --- a/nomad/state/state_store_keyring.go +++ b/nomad/state/state_store_keyring.go @@ -10,29 +10,29 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// UpsertRootKeyMeta saves root key meta or updates it in-place. -func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKeyMeta, rekey bool) error { +// UpsertRootKey saves a root key or updates it in place. +func (s *StateStore) UpsertRootKey(index uint64, rootKey *structs.RootKey, rekey bool) error { txn := s.db.WriteTxn(index) defer txn.Abort() // get any existing key for updating - raw, err := txn.First(TableRootKeyMeta, indexID, rootKeyMeta.KeyID) + raw, err := txn.First(TableRootKeys, indexID, rootKey.KeyID) if err != nil { - return fmt.Errorf("root key metadata lookup failed: %v", err) + return fmt.Errorf("root key lookup failed: %v", err) } isRotation := false if raw != nil { - existing := raw.(*structs.RootKeyMeta) - rootKeyMeta.CreateIndex = existing.CreateIndex - rootKeyMeta.CreateTime = existing.CreateTime - isRotation = !existing.IsActive() && rootKeyMeta.IsActive() + existing := raw.(*structs.RootKey) + rootKey.CreateIndex = existing.CreateIndex + rootKey.CreateTime = existing.CreateTime + isRotation = !existing.IsActive() && rootKey.IsActive() } else { - rootKeyMeta.CreateIndex = index - isRotation = rootKeyMeta.IsActive() + rootKey.CreateIndex = index + isRotation = rootKey.IsActive() } - rootKeyMeta.ModifyIndex = index + rootKey.ModifyIndex = index if rekey && !isRotation { return fmt.Errorf("cannot rekey without setting the new key active") @@ -41,7 +41,7 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe // if the upsert is for a newly-active key, we need to set all the // other keys as inactive in the same transaction. if isRotation { - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableRootKeys, indexID) if err != nil { return err } @@ -50,7 +50,7 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.RootKey) modified := false switch key.State { @@ -72,56 +72,54 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe if modified { key.ModifyIndex = index - if err := txn.Insert(TableRootKeyMeta, key); err != nil { + if err := txn.Insert(TableRootKeys, key); err != nil { return err } - } + } } } - if err := txn.Insert(TableRootKeyMeta, rootKeyMeta); err != nil { + if err := txn.Insert(TableRootKeys, rootKey); err != nil { return err } - - // update the indexes table - if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { + if err := txn.Insert("index", &IndexEntry{TableRootKeys, index}); err != nil { return fmt.Errorf("index update failed: %v", err) } + return txn.Commit() } -// DeleteRootKeyMeta deletes a single root key, or returns an error if -// it doesn't exist. -func (s *StateStore) DeleteRootKeyMeta(index uint64, keyID string) error { +// DeleteRootKey deletes a single wrapped root key set, or returns an +// error if it doesn't exist. +func (s *StateStore) DeleteRootKey(index uint64, keyID string) error { txn := s.db.WriteTxn(index) defer txn.Abort() // find the old key - existing, err := txn.First(TableRootKeyMeta, indexID, keyID) + existing, err := txn.First(TableRootKeys, indexID, keyID) if err != nil { - return fmt.Errorf("root key metadata lookup failed: %v", err) + return fmt.Errorf("root key lookup failed: %v", err) } if existing == nil { - return fmt.Errorf("root key metadata not found") + return nil // this case should be validated in RPC } - if err := txn.Delete(TableRootKeyMeta, existing); err != nil { - return fmt.Errorf("root key metadata delete failed: %v", err) + if err := txn.Delete(TableRootKeys, existing); err != nil { + return fmt.Errorf("root key delete failed: %v", err) } - // update the indexes table - if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { + if err := txn.Insert("index", &IndexEntry{TableRootKeys, index}); err != nil { return fmt.Errorf("index update failed: %v", err) } return txn.Commit() } -// RootKeyMetas returns an iterator over all root key metadata -func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, error) { +// RootKeys returns an iterator over all root keys +func (s *StateStore) RootKeys(ws memdb.WatchSet) (memdb.ResultIterator, error) { txn := s.db.ReadTxn() - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableRootKeys, indexID) if err != nil { return nil, err } @@ -130,42 +128,42 @@ func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, erro return iter, nil } -// RootKeyMetaByID returns a specific root key meta -func (s *StateStore) RootKeyMetaByID(ws memdb.WatchSet, id string) (*structs.RootKeyMeta, error) { +// RootKeyByID returns a specific root key +func (s *StateStore) RootKeyByID(ws memdb.WatchSet, id string) (*structs.RootKey, error) { txn := s.db.ReadTxn() - watchCh, raw, err := txn.FirstWatch(TableRootKeyMeta, indexID, id) + watchCh, raw, err := txn.FirstWatch(TableRootKeys, indexID, id) if err != nil { - return nil, fmt.Errorf("root key metadata lookup failed: %v", err) + return nil, fmt.Errorf("root key lookup failed: %v", err) } ws.Add(watchCh) if raw != nil { - return raw.(*structs.RootKeyMeta), nil + return raw.(*structs.RootKey), nil } return nil, nil } -// GetActiveRootKeyMeta returns the metadata for the currently active root key -func (s *StateStore) GetActiveRootKeyMeta(ws memdb.WatchSet) (*structs.RootKeyMeta, error) { +// GetActiveRootKey returns the currently active root key +func (s *StateStore) GetActiveRootKey(ws memdb.WatchSet) (*structs.RootKey, error) { txn := s.db.ReadTxn() - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableRootKeys, indexID) if err != nil { return nil, err } ws.Add(iter.WatchCh()) - for { raw := iter.Next() if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.RootKey) if key.IsActive() { return key, nil } } + return nil, nil } diff --git a/nomad/state/state_store_keyring_test.go b/nomad/state/state_store_keyring_test.go index bc2ef965170..7a7244a1bb7 100644 --- a/nomad/state/state_store_keyring_test.go +++ b/nomad/state/state_store_keyring_test.go @@ -7,11 +7,12 @@ import ( "testing" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" ) -func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { +func TestStateStore_WrappedRootKey_CRUD(t *testing.T) { ci.Parallel(t) store := testStateStore(t) index, err := store.LatestIndex() @@ -23,34 +24,36 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { key := structs.NewRootKeyMeta() keyIDs = append(keyIDs, key.KeyID) if i == 0 { - key = key.MakeActive() + key.State = structs.RootKeyStateActive } index++ - must.NoError(t, store.UpsertRootKeyMeta(index, key, false)) + wrappedKeys := structs.NewRootKey(key) + must.NoError(t, store.UpsertRootKey(index, wrappedKeys, false)) } // retrieve the active key - activeKey, err := store.GetActiveRootKeyMeta(nil) + activeKey, err := store.GetActiveRootKey(nil) must.NoError(t, err) must.NotNil(t, activeKey) // update an inactive key to active and verify the rotation - inactiveKey, err := store.RootKeyMetaByID(nil, keyIDs[1]) + inactiveKey, err := store.RootKeyByID(nil, keyIDs[1]) must.NoError(t, err) must.NotNil(t, inactiveKey) oldCreateIndex := inactiveKey.CreateIndex - newlyActiveKey := inactiveKey.MakeActive() + newlyActiveKey := inactiveKey.Copy() + newlyActiveKey = inactiveKey.MakeActive() index++ - must.NoError(t, store.UpsertRootKeyMeta(index, newlyActiveKey, false)) + must.NoError(t, store.UpsertRootKey(index, newlyActiveKey, false)) - iter, err := store.RootKeyMetas(nil) + iter, err := store.RootKeys(nil) must.NoError(t, err) for { raw := iter.Next() if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.RootKey) if key.KeyID == newlyActiveKey.KeyID { must.True(t, key.IsActive(), must.Sprint("expected updated key to be active")) must.Eq(t, oldCreateIndex, key.CreateIndex) @@ -61,9 +64,9 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { // delete the active key and verify it's been deleted index++ - must.NoError(t, store.DeleteRootKeyMeta(index, keyIDs[1])) + must.NoError(t, store.DeleteRootKey(index, keyIDs[1])) - iter, err = store.RootKeyMetas(nil) + iter, err = store.RootKeys(nil) must.NoError(t, err) var found int for { @@ -71,10 +74,13 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.RootKey) must.NotEq(t, keyIDs[1], key.KeyID) must.False(t, key.IsActive(), must.Sprint("expected remaining keys to be inactive")) found++ } must.Eq(t, 2, found, must.Sprint("expected only 2 keys remaining")) + + // deleting non-existent keys is safe + must.NoError(t, store.DeleteRootKey(index, uuid.Generate())) } diff --git a/nomad/state/state_store_restore.go b/nomad/state/state_store_restore.go index ad2efdc2926..2072ca727d7 100644 --- a/nomad/state/state_store_restore.go +++ b/nomad/state/state_store_restore.go @@ -240,11 +240,18 @@ func (r *StateRestore) VariablesQuotaRestore(quota *structs.VariablesQuota) erro return nil } -// RootKeyMetaQuotaRestore is used to restore a single root key meta into the -// root_key_meta table. -func (r *StateRestore) RootKeyMetaRestore(quota *structs.RootKeyMeta) error { - if err := r.txn.Insert(TableRootKeyMeta, quota); err != nil { - return fmt.Errorf("root key meta insert failed: %v", err) +// RootKeyMetaRestore is used to restore a legacy root key meta entry into the +// wrapped_root_keys table. +func (r *StateRestore) RootKeyMetaRestore(meta *structs.RootKeyMeta) error { + wrappedRootKeys := structs.NewRootKey(meta) + return r.RootKeyRestore(wrappedRootKeys) +} + +// RootKeyRestore is used to restore a single wrapped root key into the +// wrapped_root_keys table. +func (r *StateRestore) RootKeyRestore(wrappedKeys *structs.RootKey) error { + if err := r.txn.Insert(TableRootKeys, wrappedKeys); err != nil { + return fmt.Errorf("wrapped root keys insert failed: %v", err) } return nil } diff --git a/nomad/structs/keyring.go b/nomad/structs/keyring.go index c5a2b36b003..a858a771ab5 100644 --- a/nomad/structs/keyring.go +++ b/nomad/structs/keyring.go @@ -15,6 +15,7 @@ import ( "time" "github.com/go-jose/go-jose/v3" + "github.com/golang/protobuf/proto" wrapping "github.com/hashicorp/go-kms-wrapping/v2" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/crypto" @@ -38,8 +39,9 @@ const ( JWKSPath = "/.well-known/jwks.json" ) -// RootKey is used to encrypt and decrypt variables. It is never stored in raft. -type RootKey struct { +// UnwrappedRootKey is used to encrypt and decrypt variables. This is the +// unencrypted key material and it is never stored in raft. +type UnwrappedRootKey struct { Meta *RootKeyMeta Key []byte // serialized to keystore as base64 blob @@ -49,12 +51,12 @@ type RootKey struct { RSAKey []byte } -// NewRootKey returns a new root key and its metadata. -func NewRootKey(algorithm EncryptionAlgorithm) (*RootKey, error) { +// NewUnwrappedRootKey returns a new root key and its metadata. +func NewUnwrappedRootKey(algorithm EncryptionAlgorithm) (*UnwrappedRootKey, error) { meta := NewRootKeyMeta() meta.Algorithm = algorithm - rootKey := &RootKey{ + rootKey := &UnwrappedRootKey{ Meta: meta, } @@ -78,8 +80,8 @@ func NewRootKey(algorithm EncryptionAlgorithm) (*RootKey, error) { return rootKey, nil } -func (k *RootKey) Copy() *RootKey { - return &RootKey{ +func (k *UnwrappedRootKey) Copy() *UnwrappedRootKey { + return &UnwrappedRootKey{ Meta: k.Meta.Copy(), Key: slices.Clone(k.Key), RSAKey: slices.Clone(k.RSAKey), @@ -87,9 +89,11 @@ func (k *RootKey) Copy() *RootKey { } // MakeInactive returns a copy of the RootKey with the meta state set to active -func (k *RootKey) MakeActive() *RootKey { - return &RootKey{ - Meta: k.Meta.MakeActive(), +func (k *UnwrappedRootKey) MakeActive() *UnwrappedRootKey { + meta := k.Meta.Copy() + meta.State = RootKeyStateActive + return &UnwrappedRootKey{ + Meta: meta, Key: slices.Clone(k.Key), RSAKey: slices.Clone(k.RSAKey), } @@ -97,16 +101,166 @@ func (k *RootKey) MakeActive() *RootKey { // MakeInactive returns a copy of the RootKey with the meta state set to // inactive -func (k *RootKey) MakeInactive() *RootKey { - return &RootKey{ - Meta: k.Meta.MakeInactive(), +func (k *UnwrappedRootKey) MakeInactive() *UnwrappedRootKey { + meta := k.Meta.Copy() + meta.State = RootKeyStateInactive + return &UnwrappedRootKey{ + Meta: meta, Key: slices.Clone(k.Key), RSAKey: slices.Clone(k.RSAKey), } } -// RootKeyMeta is the metadata used to refer to a RootKey. It is -// stored in raft. +// RootKey represents the key material encrypted by a set of KMS wrapping +// plugins, plus metadata. It is stored in Raft. +type RootKey struct { + KeyID string // UUID + Algorithm EncryptionAlgorithm + CreateTime int64 + CreateIndex uint64 + ModifyIndex uint64 + State RootKeyState + PublishTime int64 + + WrappedKeys []*WrappedKey +} + +func NewRootKey(meta *RootKeyMeta) *RootKey { + return &RootKey{ + KeyID: meta.KeyID, + Algorithm: meta.Algorithm, + CreateTime: meta.CreateTime, + CreateIndex: meta.CreateIndex, + ModifyIndex: meta.ModifyIndex, + State: meta.State, + PublishTime: meta.PublishTime, + WrappedKeys: []*WrappedKey{}, + } +} + +func (k *RootKey) Meta() *RootKeyMeta { + return &RootKeyMeta{ + KeyID: k.KeyID, + Algorithm: k.Algorithm, + CreateTime: k.CreateTime, + CreateIndex: k.CreateIndex, + ModifyIndex: k.ModifyIndex, + State: k.State, + PublishTime: k.PublishTime, + } +} + +func (k *RootKey) Copy() *RootKey { + if k == nil { + return nil + } + out := *k + out.WrappedKeys = helper.CopySlice(k.WrappedKeys) + return &out +} + +// IsActive indicates this key is the one currently being used for crypto +// operations (at most one key can be Active) +func (k *RootKey) IsActive() bool { + return k.State == RootKeyStateActive +} + +// MakeActive returns a copy of the RootKey with the state set to active +func (k *RootKey) MakeActive() *RootKey { + out := k.Copy() + if out != nil { + out.State = RootKeyStateActive + out.PublishTime = 0 + } + return out +} + +// IsRekeying indicates that variables encrypted with this key should be +// rekeyed +func (k *RootKey) IsRekeying() bool { + return k.State == RootKeyStateRekeying +} + +// MakeRekeying returns a copy of the RootKey with the state set to rekeying +func (k *RootKey) MakeRekeying() *RootKey { + out := k.Copy() + if out != nil { + out.State = RootKeyStateRekeying + } + return out +} + +// MakePrepublished returns a copy of the RootKey with the state set to +// prepublished at the time t +func (k *RootKey) MakePrepublished(t int64) *RootKey { + out := k.Copy() + if out != nil { + out.PublishTime = t + out.State = RootKeyStatePrepublished + } + return out +} + +// IsPrepublished indicates that this key has been published and is pending +// being promoted to active +func (k *RootKey) IsPrepublished() bool { + return k.State == RootKeyStatePrepublished +} + +// MakeInactive returns a copy of the RootKey with the state set to inactive +func (k *RootKey) MakeInactive() *RootKey { + out := k.Copy() + if out != nil { + out.State = RootKeyStateInactive + } + return out +} + +// IsInactive indicates that this key is no longer being used to encrypt new +// variables or workload identities. +func (k *RootKey) IsInactive() bool { + return k.State == RootKeyStateInactive || k.State == RootKeyStateDeprecated +} + +// WrappedKey represents key material encrypted by a specific KMS wrapping +// plugin. A slice of these are stored in RootKeys in Raft. +type WrappedKey struct { + // Provider is the KMS wrapping plugin + Provider string + + // ProviderID is the identifier of the specific instance of the KMS wrapping + // plugin, for Nomad Enterprise where you might have multiple KMS of the + // same kind for HA (ex. 2 Vaults) + ProviderID string + + // WrappedDataEncryptionKey is the encrypted DEK used for encrypting + // Variables. The BlobInfo includes everything needed for the KMS to decrypt + // it except the KEK. + WrappedDataEncryptionKey *wrapping.BlobInfo + + // WrappedRSAKey is the encrypted DEK used for signing Workload + // Identities. The BlobInfo includes everything needed for the KMS to + // decrypt it except the KEK. + WrappedRSAKey *wrapping.BlobInfo + + // KeyEncryptionKey is the cleartext KEK, and is only included in the struct + // we write to Raft when using the AEAD plugin + KeyEncryptionKey []byte +} + +func (w *WrappedKey) Copy() *WrappedKey { + if w == nil { + return nil + } + out := *w + copy(out.KeyEncryptionKey, w.KeyEncryptionKey) + out.WrappedDataEncryptionKey = proto.Clone(w.WrappedDataEncryptionKey).(*wrapping.BlobInfo) + out.WrappedRSAKey = proto.Clone(w.WrappedRSAKey).(*wrapping.BlobInfo) + return &out +} + +// RootKeyMeta is the metadata used to refer to a RootKey. It's a "stub" of the +// RootKey and gets used in RPC responses type RootKeyMeta struct { KeyID string // UUID Algorithm EncryptionAlgorithm @@ -202,57 +356,12 @@ func (rkm *RootKeyMeta) IsActive() bool { return rkm.State == RootKeyStateActive } -// MakeActive returns a copy of the RootKeyMeta with the state set to active -func (rkm *RootKeyMeta) MakeActive() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateActive - out.PublishTime = 0 - } - return out -} - -// IsRekeying indicates that variables encrypted with this key should be -// rekeyed -func (rkm *RootKeyMeta) IsRekeying() bool { - return rkm.State == RootKeyStateRekeying -} - -// MakeRekeying returns a copy of the RootKeyMeta with the state set to rekeying -func (rkm *RootKeyMeta) MakeRekeying() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateRekeying - } - return out -} - -// MakePrepublished returns a copy of the RootKeyMeta with the state set to -// prepublished at the time t -func (rkm *RootKeyMeta) MakePrepublished(t int64) *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.PublishTime = t - out.State = RootKeyStatePrepublished - } - return out -} - // IsPrepublished indicates that this key has been published and is pending // being promoted to active func (rkm *RootKeyMeta) IsPrepublished() bool { return rkm.State == RootKeyStatePrepublished } -// MakeInactive returns a copy of the RootKeyMeta with the state set to inactive -func (rkm *RootKeyMeta) MakeInactive() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateInactive - } - return out -} - // IsInactive indicates that this key is no longer being used to encrypt new // variables or workload identities. func (rkm *RootKeyMeta) IsInactive() bool { @@ -286,10 +395,11 @@ func (rkm *RootKeyMeta) Validate() error { return nil } -// KeyEncryptionKeyWrapper is the struct that gets serialized for the on-disk -// KMS wrapper. When using the AEAD provider, this struct includes the -// server-specific key-wrapping key. This struct should never be sent over RPC -// or written to Raft. +// KeyEncryptionKeyWrapper is a flattened version of the WrappedRootKeys struct +// that gets serialized to disk for a keyset when using the legacy on-disk +// keystore with the AEAD KMS wrapper. This struct includes the server-specific +// key-wrapping key (KEK). This struct should never be sent over RPC or written +// to Raft. type KeyEncryptionKeyWrapper struct { Meta *RootKeyMeta @@ -345,7 +455,7 @@ type KeyringListRootKeyMetaResponse struct { // for applying to the FSM with the KeyringUpdateRootKeyMetaRequest // (see below) type KeyringUpdateRootKeyRequest struct { - RootKey *RootKey + RootKey *UnwrappedRootKey Rekey bool WriteRequest } @@ -354,6 +464,15 @@ type KeyringUpdateRootKeyResponse struct { WriteMeta } +// KeyringUpsertWrappedRootKeyRequest is used by the leader during keyring +// initialization and when keys are rotated, to write a new wrapped root key to +// Raft. +type KeyringUpsertWrappedRootKeyRequest struct { + WrappedRootKeys *RootKey + Rekey bool + WriteRequest +} + // KeyringGetRootKeyRequest is used internally for key replication // only and for keyring restores. type KeyringGetRootKeyRequest struct { @@ -362,7 +481,7 @@ type KeyringGetRootKeyRequest struct { } type KeyringGetRootKeyResponse struct { - Key *RootKey + Key *UnwrappedRootKey QueryMeta } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index ead7cb0fa29..8e62e0599fc 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -117,8 +117,8 @@ const ( ServiceRegistrationDeleteByIDRequestType MessageType = 48 ServiceRegistrationDeleteByNodeIDRequestType MessageType = 49 VarApplyStateRequestType MessageType = 50 - RootKeyMetaUpsertRequestType MessageType = 51 - RootKeyMetaDeleteRequestType MessageType = 52 + RootKeyMetaUpsertRequestType MessageType = 51 // DEPRECATED + WrappedRootKeysDeleteRequestType MessageType = 52 ACLRolesUpsertRequestType MessageType = 53 ACLRolesDeleteByIDRequestType MessageType = 54 ACLAuthMethodsUpsertRequestType MessageType = 55 @@ -127,10 +127,13 @@ const ( ACLBindingRulesDeleteRequestType MessageType = 58 NodePoolUpsertRequestType MessageType = 59 NodePoolDeleteRequestType MessageType = 60 + JobVersionTagRequestType MessageType = 61 + WrappedRootKeysUpsertRequestType MessageType = 62 + NamespaceUpsertRequestType MessageType = 64 + NamespaceDeleteRequestType MessageType = 65 - // Namespace types were moved from enterprise and therefore start at 64 - NamespaceUpsertRequestType MessageType = 64 - NamespaceDeleteRequestType MessageType = 65 + // NOTE: MessageTypes are shared between CE and ENT. If you need to add a + // new type, check that ENT is not already using that value. ) const (