diff --git a/.changelog/23977.txt b/.changelog/23977.txt new file mode 100644 index 000000000000..88ecfcd9bd33 --- /dev/null +++ b/.changelog/23977.txt @@ -0,0 +1,3 @@ +```release-note:improvement +keyring: Stored wrapped data encryption keys in Raft +``` diff --git a/go.mod b/go.mod index 81555c6ea8db..a26c433530c2 100644 --- a/go.mod +++ b/go.mod @@ -128,6 +128,7 @@ require ( go.etcd.io/bbolt v1.3.9 go.uber.org/goleak v1.2.1 golang.org/x/crypto v0.27.0 + golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 golang.org/x/sync v0.8.0 golang.org/x/sys v0.25.0 golang.org/x/time v0.3.0 @@ -291,7 +292,6 @@ require ( github.com/vmware/govmomi v0.18.0 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect golang.org/x/mod v0.18.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect diff --git a/helper/backoff.go b/helper/backoff.go index e25e1f1c2b95..2d9510f2fc45 100644 --- a/helper/backoff.go +++ b/helper/backoff.go @@ -4,6 +4,8 @@ package helper import ( + "context" + "fmt" "time" ) @@ -29,3 +31,34 @@ func Backoff(backoffBase time.Duration, backoffLimit time.Duration, attempt uint return deadline } + +// WithBackoffFunc is a helper that runs a function with geometric backoff + a +// small jitter to a maximum backoff. It returns once the context closes, with +// the error wrapping over the error from the function. +func WithBackoffFunc(ctx context.Context, minBackoff, maxBackoff time.Duration, fn func() error) error { + var err error + backoff := minBackoff + t, stop := NewSafeTimer(0) + defer stop() + for { + select { + case <-ctx.Done(): + return fmt.Errorf("operation cancelled: %w", err) + case <-t.C: + } + + err = fn() + if err == nil { + return nil + } + + if backoff < maxBackoff { + backoff = backoff*2 + RandomStagger(minBackoff/10) + if backoff > maxBackoff { + backoff = maxBackoff + } + } + + t.Reset(backoff) + } +} diff --git a/helper/raftutil/msgtypes.go b/helper/raftutil/msgtypes.go index 9112eb4a6163..615881173c95 100644 --- a/helper/raftutil/msgtypes.go +++ b/helper/raftutil/msgtypes.go @@ -55,8 +55,7 @@ var msgTypeNames = map[structs.MessageType]string{ structs.ServiceRegistrationDeleteByIDRequestType: "ServiceRegistrationDeleteByIDRequestType", structs.ServiceRegistrationDeleteByNodeIDRequestType: "ServiceRegistrationDeleteByNodeIDRequestType", structs.VarApplyStateRequestType: "VarApplyStateRequestType", - structs.RootKeyMetaUpsertRequestType: "RootKeyMetaUpsertRequestType", - structs.RootKeyMetaDeleteRequestType: "RootKeyMetaDeleteRequestType", + structs.WrappedRootKeysDeleteRequestType: "WrappedRootKeysDeleteRequestType", structs.ACLRolesUpsertRequestType: "ACLRolesUpsertRequestType", structs.ACLRolesDeleteByIDRequestType: "ACLRolesDeleteByIDRequestType", structs.ACLAuthMethodsUpsertRequestType: "ACLAuthMethodsUpsertRequestType", @@ -65,6 +64,8 @@ var msgTypeNames = map[structs.MessageType]string{ structs.ACLBindingRulesDeleteRequestType: "ACLBindingRulesDeleteRequestType", structs.NodePoolUpsertRequestType: "NodePoolUpsertRequestType", structs.NodePoolDeleteRequestType: "NodePoolDeleteRequestType", + structs.JobVersionTagRequestType: "JobVersionTagRequestType", + structs.WrappedRootKeysUpsertRequestType: "WrappedRootKeysUpsertRequestType", structs.NamespaceUpsertRequestType: "NamespaceUpsertRequestType", structs.NamespaceDeleteRequestType: "NamespaceDeleteRequestType", } diff --git a/nomad/core_sched.go b/nomad/core_sched.go index fc2105b64ef5..cc9070fd8e87 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -899,6 +899,19 @@ func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool) // rootKeyRotateOrGC is used to rotate or garbage collect root keys func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error { + // migration sends updates to the leader so our view of state is no longer + // valid. we ack this core job and will pick up against at the next + // interval. + // + // COMPAT(1.12.0): remove this block in 1.12.0 LTS + wasMigrated, err := c.rootKeyMigrate(eval) + if err != nil { + return err + } + if wasMigrated { + return nil + } + // a rotation will be sent to the leader so our view of state // is no longer valid. we ack this core job and will pick up // the GC work on the next interval @@ -915,7 +928,7 @@ func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error { func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error { ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.WrappedRootKeys(ws) if err != nil { return err } @@ -931,7 +944,7 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) + keyMeta := raw.(*structs.WrappedRootKeys) if !keyMeta.IsInactive() { continue // never GC keys we're still using } @@ -970,24 +983,67 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error return nil } -// rootKeyRotate checks if the active key is old enough that we need to kick off -// a rotation. It prepublishes a key first and only promotes that prepublished -// key to active once the rotation threshold has expired -func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) { +// rootKeyMigrate checks if the cluster is fully upgraded and migrates all the +// legacy root meta keys to the new wrapped key format +// +// COMPAT(1.12.0): remove this function in 1.12.0 LTS +func (c *CoreScheduler) rootKeyMigrate(eval *structs.Evaluation) (bool, error) { + if !ServersMeetMinimumVersion( + c.srv.serf.Members(), c.srv.Region(), minVersionKeyringInRaft, true) { + return false, nil + } ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.WrappedRootKeys(ws) if err != nil { return false, err } + wasMigrated := false + for raw := iter.Next(); raw != nil; raw = iter.Next() { + wrappedKeys := raw.(*structs.WrappedRootKeys) + if len(wrappedKeys.WrappedKeys) > 0 { + continue // already migrated + } + rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID) + if err != nil { + return wasMigrated, err + } + req := &structs.KeyringUpdateRootKeyRequest{ + RootKey: rootKey, + WriteRequest: structs.WriteRequest{ + Region: c.srv.config.Region, + AuthToken: eval.LeaderACL, + }, + } + + if err := c.srv.RPC("Keyring.Update", + req, &structs.KeyringUpdateRootKeyResponse{}); err != nil { + c.logger.Error("migrating legacy key material failed", + "error", err, "key_id", wrappedKeys.KeyID) + return false, err + } + wasMigrated = true + } + return wasMigrated, nil +} + +// rootKeyRotate checks if the active key is old enough that we need to kick off +// a rotation. It prepublishes a key first and only promotes that prepublished +// key to active once the rotation threshold has expired +func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) { var ( - activeKey *structs.RootKeyMeta - prepublishedKey *structs.RootKeyMeta + activeKey *structs.WrappedRootKeys + prepublishedKey *structs.WrappedRootKeys ) + ws := memdb.NewWatchSet() + iter, err := c.snap.WrappedRootKeys(ws) + if err != nil { + return false, err + } for raw := iter.Next(); raw != nil; raw = iter.Next() { - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.WrappedRootKeys) switch key.State { case structs.RootKeyStateActive: activeKey = key @@ -1083,7 +1139,7 @@ func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) ( func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { ws := memdb.NewWatchSet() - iter, err := c.snap.RootKeyMetas(ws) + iter, err := c.snap.WrappedRootKeys(ws) if err != nil { return err } @@ -1093,11 +1149,11 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) - if !keyMeta.IsRekeying() { + wrappedKeys := raw.(*structs.WrappedRootKeys) + if !wrappedKeys.IsRekeying() { continue } - varIter, err := c.snap.GetVariablesByKeyID(ws, keyMeta.KeyID) + varIter, err := c.snap.GetVariablesByKeyID(ws, wrappedKeys.KeyID) if err != nil { return err } @@ -1106,7 +1162,7 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error { return err } - rootKey, err := c.srv.encrypter.GetKey(keyMeta.KeyID) + rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID) if err != nil { return fmt.Errorf("rotated key does not exist in keyring: %w", err) } diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index 76a04a67de93..fa8735f05fed 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -2623,7 +2623,7 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { // active key, will never be GC'd store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2648,11 +2648,11 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { must.NoError(t, err) must.True(t, rotated, must.Sprint("key should rotate")) - var key1 *structs.RootKeyMeta - iter, err := store.RootKeyMetas(nil) + var key1 *structs.WrappedRootKeys + iter, err := store.WrappedRootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.WrappedRootKeys) if k.KeyID == key0.KeyID { must.True(t, k.IsActive(), must.Sprint("expected original key to be active")) } else { @@ -2675,10 +2675,10 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { c.snap, _ = store.Snapshot() rotated, err = c.rootKeyRotate(eval, now) - iter, err = store.RootKeyMetas(nil) + iter, err = store.WrappedRootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.WrappedRootKeys) switch k.KeyID { case key0.KeyID: must.True(t, k.IsActive(), must.Sprint("original key should still be active")) @@ -2694,10 +2694,10 @@ func TestCoreScheduler_RootKeyRotate(t *testing.T) { now = time.Unix(0, key1.PublishTime+(time.Minute*10).Nanoseconds()) rotated, err = c.rootKeyRotate(eval, now) - iter, err = store.RootKeyMetas(nil) + iter, err = store.WrappedRootKeys(nil) must.NoError(t, err) for raw := iter.Next(); raw != nil; raw = iter.Next() { - k := raw.(*structs.RootKeyMeta) + k := raw.(*structs.WrappedRootKeys) switch k.KeyID { case key0.KeyID: must.True(t, k.IsInactive(), must.Sprint("original key should be inactive")) @@ -2725,7 +2725,7 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { // active key, will never be GC'd store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2733,14 +2733,14 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { yesterday := now - (24 * time.Hour).Nanoseconds() // insert an "old" inactive key - key1 := structs.NewRootKeyMeta().MakeInactive() + key1 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakeInactive() key1.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(600, key1, false)) + must.NoError(t, store.UpsertWrappedRootKeys(600, key1, false)) // insert an "old" and inactive key with a variable that's using it - key2 := structs.NewRootKeyMeta().MakeInactive() + key2 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakeInactive() key2.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(700, key2, false)) + must.NoError(t, store.UpsertWrappedRootKeys(700, key2, false)) variable := mock.VariableEncrypted() variable.KeyID = key2.KeyID @@ -2752,9 +2752,9 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { must.NoError(t, setResp.Error) // insert an "old" key that's inactive but being used by an alloc - key3 := structs.NewRootKeyMeta().MakeInactive() + key3 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakeInactive() key3.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(800, key3, false)) + must.NoError(t, store.UpsertWrappedRootKeys(800, key3, false)) // insert the allocation using key3 alloc := mock.Alloc() @@ -2764,9 +2764,9 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { structs.MsgTypeTestSetup, 850, []*structs.Allocation{alloc})) // insert an "old" key that's inactive but being used by an alloc - key4 := structs.NewRootKeyMeta().MakeInactive() + key4 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakeInactive() key4.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(900, key4, false)) + must.NoError(t, store.UpsertWrappedRootKeys(900, key4, false)) // insert the dead allocation using key4 alloc2 := mock.Alloc() @@ -2777,14 +2777,14 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { structs.MsgTypeTestSetup, 950, []*structs.Allocation{alloc2})) // insert an inactive key older than RootKeyGCThreshold but not RootKeyRotationThreshold - key5 := structs.NewRootKeyMeta().MakeInactive() + key5 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakeInactive() key5.CreateTime = now - (15 * time.Minute).Nanoseconds() - must.NoError(t, store.UpsertRootKeyMeta(1500, key5, false)) + must.NoError(t, store.UpsertWrappedRootKeys(1500, key5, false)) // prepublishing key should never be GC'd no matter how old - key6 := structs.NewRootKeyMeta().MakePrepublished(yesterday) + key6 := structs.NewWrappedRootKeys(structs.NewRootKeyMeta()).MakePrepublished(yesterday) key6.CreateTime = yesterday - must.NoError(t, store.UpsertRootKeyMeta(1600, key6, false)) + must.NoError(t, store.UpsertWrappedRootKeys(1600, key6, false)) // run the core job snap, err := store.Snapshot() @@ -2795,31 +2795,31 @@ func TestCoreScheduler_RootKeyGC(t *testing.T) { must.NoError(t, c.rootKeyGC(eval, time.Now())) ws := memdb.NewWatchSet() - key, err := store.RootKeyMetaByID(ws, key0.KeyID) + key, err := store.WrappedRootKeysByID(ws, key0.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("active key should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key1.KeyID) + key, err = store.WrappedRootKeysByID(ws, key1.KeyID) must.NoError(t, err) must.Nil(t, key, must.Sprint("old and unused inactive key should have been GCd")) - key, err = store.RootKeyMetaByID(ws, key2.KeyID) + key, err = store.WrappedRootKeysByID(ws, key2.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("old key should not have been GCd if still in use")) - key, err = store.RootKeyMetaByID(ws, key3.KeyID) + key, err = store.WrappedRootKeysByID(ws, key3.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("old key used to sign a live alloc should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key4.KeyID) + key, err = store.WrappedRootKeysByID(ws, key4.KeyID) must.NoError(t, err) must.Nil(t, key, must.Sprint("old key used to sign a terminal alloc should have been GCd")) - key, err = store.RootKeyMetaByID(ws, key5.KeyID) + key, err = store.WrappedRootKeysByID(ws, key5.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("key newer than GC+rotation threshold should not have been GCd")) - key, err = store.RootKeyMetaByID(ws, key6.KeyID) + key, err = store.WrappedRootKeysByID(ws, key6.KeyID) must.NoError(t, err) must.NotNil(t, key, must.Sprint("prepublishing key should not have been GCd")) } @@ -2835,7 +2835,7 @@ func TestCoreScheduler_VariablesRekey(t *testing.T) { testutil.WaitForKeyring(t, srv.RPC, "global") store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NotNil(t, key0, must.Sprint("expected keyring to be bootstapped")) must.NoError(t, err) @@ -2883,7 +2883,7 @@ func TestCoreScheduler_VariablesRekey(t *testing.T) { } } - originalKey, _ := store.RootKeyMetaByID(nil, key0.KeyID) + originalKey, _ := store.WrappedRootKeysByID(nil, key0.KeyID) return originalKey.IsInactive() }), ), must.Sprint("variable rekey should be complete")) diff --git a/nomad/encrypter.go b/nomad/encrypter.go index 95f0604c5942..d94eaba5caa4 100644 --- a/nomad/encrypter.go +++ b/nomad/encrypter.go @@ -35,6 +35,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/raft" + "golang.org/x/exp/maps" "golang.org/x/time/rate" ) @@ -57,8 +58,9 @@ type Encrypter struct { // issuer is the OIDC Issuer to use for workload identities if configured issuer string - keyring map[string]*keyset - lock sync.RWMutex + keyring map[string]*keyset + decryptTasks map[string]context.CancelFunc + lock sync.RWMutex } // keyset contains the key material for variable encryption and workload @@ -84,6 +86,7 @@ func NewEncrypter(srv *Server, keystorePath string) (*Encrypter, error) { keyring: make(map[string]*keyset), issuer: srv.GetConfig().OIDCIssuer, providerConfigs: map[string]*structs.KEKProviderConfig{}, + decryptTasks: map[string]context.CancelFunc{}, } providerConfigs, err := getProviderConfigs(srv) @@ -187,6 +190,22 @@ func (e *Encrypter) loadKeystore() error { }) } +func (e *Encrypter) IsReady(ctx context.Context) error { + err := helper.WithBackoffFunc(ctx, time.Millisecond*100, time.Second, func() error { + e.lock.RLock() + defer e.lock.RUnlock() + if len(e.decryptTasks) != 0 { + return fmt.Errorf("keyring is not ready - waiting for keys %s", + maps.Keys(e.decryptTasks)) + } + return nil + }) + if err != nil { + return err + } + return nil +} + // Encrypt encrypts the clear data with the cipher for the current // root key, and returns the cipher text (including the nonce), and // the key ID used to encrypt it @@ -216,10 +235,10 @@ func (e *Encrypter) Encrypt(cleartext []byte) ([]byte, string, error) { // Decrypt takes an encrypted buffer and then root key ID. It extracts // the nonce, decrypts the content, and returns the cleartext data. func (e *Encrypter) Decrypt(ciphertext []byte, keyID string) ([]byte, error) { - e.lock.RLock() - defer e.lock.RUnlock() - ks, err := e.keysetByIDLocked(keyID) + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, time.Second) + defer cancel() + ks, err := e.waitForKey(ctx, keyID) if err != nil { return nil, err } @@ -247,25 +266,9 @@ func (e *Encrypter) SignClaims(claims *structs.IdentityClaims) (string, string, return "", "", errors.New("cannot sign empty claims") } - // If a key is rotated immediately following a leader election, plans that - // are in-flight may get signed before the new leader has the key. Allow for - // a short timeout-and-retry to avoid rejecting plans ks, err := e.activeKeySet() if err != nil { - ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 5*time.Second) - defer cancel() - for { - select { - case <-ctx.Done(): - return "", "", err - default: - time.Sleep(50 * time.Millisecond) - ks, err = e.activeKeySet() - if ks != nil { - break - } - } - } + return "", "", err } // Add Issuer claim from server configuration @@ -314,7 +317,7 @@ func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, er } // Find the key material - pubKey, err := e.GetPublicKey(keyID) + pubKey, err := e.waitForPublicKey(keyID) if err != nil { return nil, err } @@ -341,21 +344,147 @@ func (e *Encrypter) VerifyClaim(tokenString string) (*structs.IdentityClaims, er return claims, nil } -// AddKey stores the key in the keystore and creates a new cipher for it. -func (e *Encrypter) AddKey(rootKey *structs.RootKey) error { +// AddUnwrappedKey stores the key in the keystore and creates a new cipher for +// it. This is called in the RPC handlers on the leader and from the legacy +// KeyringReplicator. +func (e *Encrypter) AddUnwrappedKey(rootKey *structs.RootKey, isUpgraded bool) (*structs.WrappedRootKeys, error) { // note: we don't lock the keyring here but inside addCipher // instead, so that we're not holding the lock while performing // local disk writes if err := e.addCipher(rootKey); err != nil { - return err + return nil, err } - if err := e.saveKeyToStore(rootKey); err != nil { - return err + return e.wrapRootKey(rootKey, isUpgraded) +} + +// AddWrappedKey creates decryption tasks for keys we've previously stored in +// Raft. It's only called as a goroutine by the FSM Apply for WrappedRootKeys, +// but it returns an error for ease of testing. +func (e *Encrypter) AddWrappedKey(ctx context.Context, wrappedKeys *structs.WrappedRootKeys) error { + + logger := e.log.With("key_id", wrappedKeys.KeyID) + + e.lock.Lock() + + _, err := e.keysetByIDLocked(wrappedKeys.KeyID) + if err == nil { + + // key material for each key ID is immutable so nothing to do, but we + // can cancel and remove any running decrypt tasks + if cancel, ok := e.decryptTasks[wrappedKeys.KeyID]; ok { + cancel() + delete(e.decryptTasks, wrappedKeys.KeyID) + } + e.lock.Unlock() + return nil + } + + if cancel, ok := e.decryptTasks[wrappedKeys.KeyID]; ok { + // stop any previous tasks for this same key ID under the assumption + // they're broken or being superceded, but don't remove the CancelFunc + // from the map yet so that other callers don't think we can continue + cancel() + } + + e.lock.Unlock() + + completeCtx, cancel := context.WithCancel(ctx) + + for _, wrappedKey := range wrappedKeys.WrappedKeys { + providerID := wrappedKey.ProviderID + if providerID == "" { + providerID = string(structs.KEKProviderAEAD) + } + + provider, ok := e.providerConfigs[providerID] + if !ok { + logger.Error("no such KMS provider configured - root key cannot be decrypted", + "provider_id", providerID) + cancel() + return fmt.Errorf("no such provider %q configured", providerID) + } + + wrapper, err := e.newKMSWrapper(provider, wrappedKeys.KeyID, wrappedKey.KeyEncryptionKey) + if err != nil { + // the errors that bubble up from this library can be a bit opaque, so + // make sure we wrap them with as much context as possible + logger.Error("unable to create KMS wrapper - root key cannot be decrypted", + "provider_id", providerID, "error", err) + + cancel() + return fmt.Errorf("unable to create key wrapper for provider %q: %w", providerID, err) + } + + // fan-out decryption tasks for HA in Nomad Enterprise. we can use the + // key whenever any one provider returns a successful decryption + go e.decryptWrappedKeyTask(completeCtx, cancel, wrapper, provider, wrappedKeys.Meta(), wrappedKey) } + + e.lock.Lock() + defer e.lock.Unlock() + + e.decryptTasks[wrappedKeys.KeyID] = cancel + return nil } +func (e *Encrypter) decryptWrappedKeyTask(ctx context.Context, cancel context.CancelFunc, wrapper kms.Wrapper, provider *structs.KEKProviderConfig, meta *structs.RootKeyMeta, wrappedKey *structs.WrappedRootKey) { + + var key []byte + var rsaKey []byte + var err error + + minBackoff := time.Second + maxBackoff := time.Second * 5 + + helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + wrappedDEK := wrappedKey.WrappedDataEncryptionKey + key, err = wrapper.Decrypt(e.srv.shutdownCtx, wrappedDEK) + if err != nil { + err := fmt.Errorf("%w (root key): %w", ErrDecryptFailed, err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + return err + } + return nil + }) + + helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + // Decrypt RSAKey for Workload Identity JWT signing if one exists. Prior to + // 1.7 an ed25519 key derived from the root key was used instead of an RSA + // key. + if wrappedKey.WrappedRSAKey != nil { + rsaKey, err = wrapper.Decrypt(e.srv.shutdownCtx, wrappedKey.WrappedRSAKey) + if err != nil { + err := fmt.Errorf("%w (rsa key): %w", ErrDecryptFailed, err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + } + } + return nil + }) + + rootKey := &structs.RootKey{ + Meta: meta, + Key: key, + RSAKey: rsaKey, + } + + helper.WithBackoffFunc(ctx, minBackoff, maxBackoff, func() error { + err = e.addCipher(rootKey) + if err != nil { + err := fmt.Errorf("could not add cipher: %w", err) + e.log.Error(err.Error(), "key_id", meta.KeyID) + return err + } + return nil + }) + + e.lock.Lock() + defer e.lock.Unlock() + cancel() + delete(e.decryptTasks, meta.KeyID) +} + // addCipher stores the key in the keyring and creates a new cipher for it. func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { @@ -404,33 +533,67 @@ func (e *Encrypter) addCipher(rootKey *structs.RootKey) error { return nil } +// waitForKey retrieves the key material by ID from the keyring, retrying with +// geometric backoff until the context expires. +func (e *Encrypter) waitForKey(ctx context.Context, keyID string) (*keyset, error) { + var ks *keyset + var err error + + helper.WithBackoffFunc(ctx, 50*time.Millisecond, 100*time.Millisecond, + func() error { + e.lock.RLock() + defer e.lock.RUnlock() + var err error + ks, err = e.keysetByIDLocked(keyID) + if err != nil { + return err + } + return nil + }) + if err != nil { + return nil, err + } + if ks == nil { + return nil, fmt.Errorf("no such key") + } + return ks, nil +} + // GetKey retrieves the key material by ID from the keyring. func (e *Encrypter) GetKey(keyID string) (*structs.RootKey, error) { - e.lock.RLock() - defer e.lock.RUnlock() + e.lock.Lock() + defer e.lock.Unlock() - keyset, err := e.keysetByIDLocked(keyID) + ks, err := e.keysetByIDLocked(keyID) if err != nil { return nil, err } - return keyset.rootKey, nil + if ks == nil { + return nil, fmt.Errorf("no such key") + } + + return ks.rootKey, nil } // activeKeySetLocked returns the keyset that belongs to the key marked as -// active in the state store (so that it's consistent with raft). The -// called must read-lock the keyring +// active in the state store (so that it's consistent with raft). +// +// If a key is rotated immediately following a leader election, plans that are +// in-flight may get signed before the new leader has decrypted the key. Allow +// for a short timeout-and-retry to avoid rejecting plans func (e *Encrypter) activeKeySet() (*keyset, error) { store := e.srv.fsm.State() - keyMeta, err := store.GetActiveRootKeyMeta(nil) + key, err := store.GetActiveRootKey(nil) if err != nil { return nil, err } - if keyMeta == nil { + if key == nil { return nil, fmt.Errorf("keyring has not been initialized yet") } - e.lock.RLock() - defer e.lock.RUnlock() - return e.keysetByIDLocked(keyMeta.KeyID) + + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, time.Second) + defer cancel() + return e.waitForKey(ctx, key.KeyID) } // keysetByIDLocked returns the keyset for the specified keyID. The @@ -451,7 +614,7 @@ func (e *Encrypter) RemoveKey(keyID string) error { return nil } -func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKProviderConfig) (*structs.KeyEncryptionKeyWrapper, error) { +func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKProviderConfig) (*structs.WrappedRootKey, error) { if provider == nil { panic("can't encrypt DEK without a provider") } @@ -472,12 +635,13 @@ func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKPr if err != nil { return nil, fmt.Errorf("failed to encrypt root key: %w", err) } - kekWrapper := &structs.KeyEncryptionKeyWrapper{ - Meta: rootKey.Meta, - KeyEncryptionKey: kek, + + kekWrapper := &structs.WrappedRootKey{ Provider: provider.Provider, ProviderID: provider.ID(), WrappedDataEncryptionKey: rootBlob, + WrappedRSAKey: &kms.BlobInfo{}, + KeyEncryptionKey: kek, } // Only keysets created after 1.7.0 will contain an RSA key. @@ -492,32 +656,72 @@ func (e *Encrypter) encryptDEK(rootKey *structs.RootKey, provider *structs.KEKPr return kekWrapper, nil } -// saveKeyToStore serializes a root key to the on-disk keystore. -func (e *Encrypter) saveKeyToStore(rootKey *structs.RootKey) error { +// wrapRootKey encrypts the key for every KEK provider and returns the wrapped +// key. On legacy clusters, this also serializes the wrapped key to the on-disk +// keystore. +func (e *Encrypter) wrapRootKey(rootKey *structs.RootKey, isUpgraded bool) (*structs.WrappedRootKeys, error) { + + wrappedKeys := structs.NewWrappedRootKeys(rootKey.Meta) for _, provider := range e.providerConfigs { if !provider.Active { continue } - kekWrapper, err := e.encryptDEK(rootKey, provider) + wrappedKey, err := e.encryptDEK(rootKey, provider) if err != nil { - return err + return nil, err } - buf, err := json.Marshal(kekWrapper) - if err != nil { - return err - } + switch { + case isUpgraded && provider.Provider == string(structs.KEKProviderAEAD): + // nothing to do but don't want to hit next case - filename := fmt.Sprintf("%s.%s%s", - rootKey.Meta.KeyID, provider.ID(), nomadKeystoreExtension) - path := filepath.Join(e.keystorePath, filename) - err = os.WriteFile(path, buf, 0o600) - if err != nil { - return err + case isUpgraded: + wrappedKey.KeyEncryptionKey = nil + + case provider.Provider == string(structs.KEKProviderAEAD): // !isUpgraded + kek := wrappedKey.KeyEncryptionKey + wrappedKey.KeyEncryptionKey = nil + e.writeKeyToDisk(rootKey.Meta, provider, wrappedKey, kek) + + default: // !isUpgraded + wrappedKey.KeyEncryptionKey = nil + e.writeKeyToDisk(rootKey.Meta, provider, wrappedKey, nil) } + + wrappedKeys.WrappedKeys = append(wrappedKeys.WrappedKeys, wrappedKey) + + } + return wrappedKeys, nil +} + +func (e *Encrypter) writeKeyToDisk( + meta *structs.RootKeyMeta, provider *structs.KEKProviderConfig, + wrappedKey *structs.WrappedRootKey, kek []byte) error { + + // the on-disk keystore flattens the keys wrapped for the individual + // KMS providers out to their own files + diskWrapper := &structs.KeyEncryptionKeyWrapper{ + Meta: meta, + Provider: provider.Name, + ProviderID: provider.ID(), + WrappedDataEncryptionKey: wrappedKey.WrappedDataEncryptionKey, + WrappedRSAKey: wrappedKey.WrappedRSAKey, + KeyEncryptionKey: kek, } + buf, err := json.Marshal(diskWrapper) + if err != nil { + return err + } + + filename := fmt.Sprintf("%s.%s%s", + meta.KeyID, provider.ID(), nomadKeystoreExtension) + path := filepath.Join(e.keystorePath, filename) + err = os.WriteFile(path, buf, 0o600) + if err != nil { + return err + } return nil } @@ -592,6 +796,36 @@ func (e *Encrypter) loadKeyFromStore(path string) (*structs.RootKey, error) { var ErrDecryptFailed = errors.New("unable to decrypt wrapped key") +// waitForPublicKey returns the public signing key for the requested key id or +// an error if the key could not be found. It blocks up to 1s for key material +// to be decrypted so that Workload Identities signed by a brand-new key can be +// verified for stale RPCs made to followers that might not have yet decrypted +// the key received via Raft +func (e *Encrypter) waitForPublicKey(keyID string) (*structs.KeyringPublicKey, error) { + ctx, cancel := context.WithTimeout(e.srv.shutdownCtx, 1*time.Second) + defer cancel() + ks, err := e.waitForKey(ctx, keyID) + if err != nil { + return nil, err + } + + pubKey := &structs.KeyringPublicKey{ + KeyID: keyID, + Use: structs.PubKeyUseSig, + CreateTime: ks.rootKey.Meta.CreateTime, + } + + if ks.rsaPrivateKey != nil { + pubKey.PublicKey = ks.rsaPKCS1PublicKey + pubKey.Algorithm = structs.PubKeyAlgRS256 + } else { + pubKey.PublicKey = ks.eddsaPrivateKey.Public().(ed25519.PublicKey) + pubKey.Algorithm = structs.PubKeyAlgEdDSA + } + + return pubKey, nil +} + // GetPublicKey returns the public signing key for the requested key id or an // error if the key could not be found. func (e *Encrypter) GetPublicKey(keyID string) (*structs.KeyringPublicKey, error) { @@ -663,6 +897,10 @@ func (e *Encrypter) newKMSWrapper(provider *structs.KEKProviderConfig, keyID str return wrapper, nil } +// KeyringReplicator supports the legacy (pre-1.9.0) keyring management where +// wrapped keys were stored outside of Raft. +// +// COMPAT(1.12.0) - remove in 1.12.0 LTS type KeyringReplicator struct { srv *Server encrypter *Encrypter @@ -708,7 +946,7 @@ func (krr *KeyringReplicator) run(ctx context.Context) { } store := krr.srv.fsm.State() - iter, err := store.RootKeyMetas(nil) + iter, err := store.WrappedRootKeys(nil) if err != nil { krr.logger.Error("failed to fetch keyring", "error", err) continue @@ -719,22 +957,23 @@ func (krr *KeyringReplicator) run(ctx context.Context) { break } - keyMeta := raw.(*structs.RootKeyMeta) - if key, err := krr.encrypter.GetKey(keyMeta.KeyID); err == nil && len(key.Key) > 0 { + wrappedKeys := raw.(*structs.WrappedRootKeys) + if key, err := krr.encrypter.GetKey(wrappedKeys.KeyID); err == nil && len(key.Key) > 0 { // the key material is immutable so if we've already got it // we can move on to the next key continue } - err := krr.replicateKey(ctx, keyMeta) + err := krr.replicateKey(ctx, wrappedKeys) if err != nil { // don't break the loop on an error, as we want to make sure // we've replicated any keys we can. the rate limiter will // prevent this case from sending excessive RPCs - krr.logger.Error(err.Error(), "key", keyMeta.KeyID) + krr.logger.Error(err.Error(), "key", wrappedKeys.KeyID) } } + } } @@ -743,8 +982,8 @@ func (krr *KeyringReplicator) run(ctx context.Context) { // replicateKey replicates a single key from peer servers that was present in // the state store but missing from the keyring. Returns an error only if no // peers have this key. -func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs.RootKeyMeta) error { - keyID := keyMeta.KeyID +func (krr *KeyringReplicator) replicateKey(ctx context.Context, wrappedKeys *structs.WrappedRootKeys) error { + keyID := wrappedKeys.KeyID krr.logger.Debug("replicating new key", "id", keyID) var err error @@ -752,7 +991,7 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs KeyID: keyID, QueryOptions: structs.QueryOptions{ Region: krr.srv.config.Region, - MinQueryIndex: keyMeta.ModifyIndex - 1, + MinQueryIndex: wrappedKeys.ModifyIndex - 1, }, } getResp := &structs.KeyringGetRootKeyResponse{} @@ -795,7 +1034,12 @@ func (krr *KeyringReplicator) replicateKey(ctx context.Context, keyMeta *structs return fmt.Errorf("failed to fetch key from any peer: %v", err) } - err = krr.encrypter.AddKey(getResp.Key) + isClusterUpgraded := ServersMeetMinimumVersion( + krr.srv.serf.Members(), krr.srv.Region(), minVersionKeyringInRaft, true) + + // In the legacy replication, we toss out the wrapped key because it's + // always persisted to disk + _, err = krr.srv.encrypter.AddUnwrappedKey(getResp.Key, isClusterUpgraded) if err != nil { return fmt.Errorf("failed to add key to keyring: %v", err) } diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index 9477f80efb6a..85c4cc718016 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -10,6 +10,7 @@ import ( "encoding/json" "fmt" "maps" + "net/rpc" "os" "path/filepath" "testing" @@ -17,11 +18,6 @@ import ( "github.com/go-jose/go-jose/v3/jwt" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2" - "github.com/shoenig/test" - "github.com/shoenig/test/must" - "github.com/shoenig/test/wait" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" @@ -30,6 +26,10 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" + "github.com/stretchr/testify/require" ) var ( @@ -73,7 +73,9 @@ func TestEncrypter_LoadSave(t *testing.T) { key, err := structs.NewRootKey(algo) must.Greater(t, 0, len(key.RSAKey)) must.NoError(t, err) - must.NoError(t, encrypter.saveKeyToStore(key)) + + _, err = encrypter.wrapRootKey(key, false) + must.NoError(t, err) // startup code path gotKey, err := encrypter.loadKeyFromStore( @@ -81,7 +83,8 @@ func TestEncrypter_LoadSave(t *testing.T) { must.NoError(t, err) must.NoError(t, encrypter.addCipher(gotKey)) must.Greater(t, 0, len(gotKey.RSAKey)) - must.NoError(t, encrypter.saveKeyToStore(key)) + _, err = encrypter.wrapRootKey(key, false) + must.NoError(t, err) active, err := encrypter.keysetByIDLocked(key.Meta.KeyID) must.NoError(t, err) @@ -94,15 +97,15 @@ func TestEncrypter_LoadSave(t *testing.T) { must.NoError(t, err) // create a wrapper file identical to those before we had external KMS - kekWrapper, err := encrypter.encryptDEK(key, &structs.KEKProviderConfig{}) - kekWrapper.Provider = "" - kekWrapper.ProviderID = "" - kekWrapper.EncryptedDataEncryptionKey = kekWrapper.WrappedDataEncryptionKey.Ciphertext - kekWrapper.EncryptedRSAKey = kekWrapper.WrappedRSAKey.Ciphertext - kekWrapper.WrappedDataEncryptionKey = nil - kekWrapper.WrappedRSAKey = nil - - buf, err := json.Marshal(kekWrapper) + wrappedKey, err := encrypter.encryptDEK(key, &structs.KEKProviderConfig{}) + diskWrapper := &structs.KeyEncryptionKeyWrapper{ + Meta: key.Meta, + KeyEncryptionKey: wrappedKey.KeyEncryptionKey, + EncryptedDataEncryptionKey: wrappedKey.WrappedDataEncryptionKey.Ciphertext, + EncryptedRSAKey: wrappedKey.WrappedRSAKey.Ciphertext, + } + + buf, err := json.Marshal(diskWrapper) must.NoError(t, err) path := filepath.Join(tmpDir, key.Meta.KeyID+".nks.json") @@ -223,8 +226,9 @@ func TestEncrypter_Restore(t *testing.T) { } } -// TestEncrypter_KeyringReplication exercises key replication between servers -func TestEncrypter_KeyringReplication(t *testing.T) { +// TestEncrypter_KeyringBootstrapping exercises key decryption tasks as new +// servers come online and leaders are elected. +func TestEncrypter_KeyringBootstrapping(t *testing.T) { ci.Parallel(t) @@ -283,20 +287,35 @@ func TestEncrypter_KeyringReplication(t *testing.T) { keyID1 := listResp.Keys[0].KeyID - keyPath := filepath.Join(leader.GetConfig().DataDir, "keystore", - keyID1+".aead.nks.json") - _, err := os.Stat(keyPath) - must.NoError(t, err, must.Sprint("expected key to be found in leader keystore")) + // Helper function for checking that a specific key is in the keyring for a + // specific server + checkPublicKeyFn := func(codec rpc.ClientCodec, keyID string) bool { + listPublicReq := &structs.GenericRequest{ + QueryOptions: structs.QueryOptions{ + Region: "global", + AllowStale: true, + }, + } + var listPublicResp structs.KeyringListPublicResponse + msgpackrpc.CallWithCodec(codec, "Keyring.ListPublic", listPublicReq, &listPublicResp) + for _, key := range listPublicResp.PublicKeys { + if key.KeyID == keyID && len(key.PublicKey) > 0 { + return true + } + } + return false + } + + // leader's key should already be available by the time its elected the + // leader + must.True(t, checkPublicKeyFn(codec, keyID1)) // Helper function for checking that a specific key has been - // replicated to followers - + // replicated to all followers checkReplicationFn := func(keyID string) func() bool { return func() bool { for _, srv := range servers { - keyPath := filepath.Join(srv.GetConfig().DataDir, "keystore", - keyID+".aead.nks.json") - if _, err := os.Stat(keyPath); err != nil { + if !checkPublicKeyFn(rpcClient(t, srv), keyID) { return false } } @@ -317,7 +336,7 @@ func TestEncrypter_KeyringReplication(t *testing.T) { }, } var rotateResp structs.KeyringRotateRootKeyResponse - err = msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp) + err := msgpackrpc.CallWithCodec(codec, "Keyring.Rotate", rotateReq, &rotateResp) must.NoError(t, err) keyID2 := rotateResp.Key.KeyID @@ -332,10 +351,8 @@ func TestEncrypter_KeyringReplication(t *testing.T) { must.NoError(t, err) must.NotNil(t, getResp.Key, must.Sprint("expected key to be found on leader")) - keyPath = filepath.Join(leader.GetConfig().DataDir, "keystore", - keyID2+".aead.nks.json") - _, err = os.Stat(keyPath) - must.NoError(t, err, must.Sprint("expected key to be found in leader keystore")) + must.True(t, checkPublicKeyFn(codec, keyID1), + must.Sprint("expected key to be found in leader keystore")) must.Wait(t, wait.InitialSuccess( wait.BoolFunc(checkReplicationFn(keyID2)), @@ -576,6 +593,23 @@ func TestEncrypter_Upgrade17(t *testing.T) { testutil.WaitForKeyring(t, srv.RPC, "global") codec := rpcClient(t, srv) + initKey, err := srv.State().GetActiveRootKey(nil) + must.NoError(t, err) + + wr := structs.WriteRequest{ + Namespace: "default", + Region: "global", + } + + // Delete the initialization key because it's a newer WrappedRootKey from + // 1.9, which isn't under test here. + _, _, err = srv.raftApply( + structs.WrappedRootKeysDeleteRequestType, structs.KeyringDeleteRootKeyRequest{ + KeyID: initKey.KeyID, + WriteRequest: wr, + }) + must.NoError(t, err) + // Fake life as a 1.6 server by writing only ed25519 keys oldRootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) must.NoError(t, err) @@ -586,13 +620,10 @@ func TestEncrypter_Upgrade17(t *testing.T) { oldRootKey.RSAKey = nil // Add to keyring - must.NoError(t, srv.encrypter.AddKey(oldRootKey)) + _, err = srv.encrypter.AddUnwrappedKey(oldRootKey, false) + must.NoError(t, err) - // Write metadata to Raft - wr := structs.WriteRequest{ - Namespace: "default", - Region: "global", - } + // Write a legacy key metadata to Raft req := structs.KeyringUpdateRootKeyMetaRequest{ RootKeyMeta: oldRootKey.Meta, WriteRequest: wr, diff --git a/nomad/fsm.go b/nomad/fsm.go index 3e7d99d7c86f..edf6e7cda2ec 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -66,6 +66,7 @@ const ( ACLBindingRuleSnapshot SnapshotType = 27 NodePoolSnapshot SnapshotType = 28 JobSubmissionSnapshot SnapshotType = 29 + WrappedRootKeysSnapshot SnapshotType = 30 // Namespace appliers were moved from enterprise and therefore start at 64 NamespaceSnapshot SnapshotType = 64 @@ -102,6 +103,7 @@ var snapshotTypeStrings = map[SnapshotType]string{ ACLBindingRuleSnapshot: "ACLBindingRule", NodePoolSnapshot: "NodePool", JobSubmissionSnapshot: "JobSubmission", + WrappedRootKeysSnapshot: "WrappedRootKeys", NamespaceSnapshot: "Namespace", } @@ -126,6 +128,7 @@ type nomadFSM struct { evalBroker *EvalBroker blockedEvals *BlockedEvals periodicDispatcher *PeriodicDispatch + encrypter *Encrypter logger hclog.Logger state *state.StateStore timetable *TimeTable @@ -171,6 +174,9 @@ type FSMConfig struct { // be added to. Blocked *BlockedEvals + // Encrypter is the encrypter where new WrappedRootKeys should be added + Encrypter *Encrypter + // Logger is the logger used by the FSM Logger hclog.Logger @@ -207,6 +213,7 @@ func NewFSM(config *FSMConfig) (*nomadFSM, error) { evalBroker: config.EvalBroker, periodicDispatcher: config.Periodic, blockedEvals: config.Blocked, + encrypter: config.Encrypter, logger: config.Logger.Named("fsm"), config: config, state: state, @@ -371,8 +378,8 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyVariableOperation(msgType, buf[1:], log.Index) case structs.RootKeyMetaUpsertRequestType: return n.applyRootKeyMetaUpsert(msgType, buf[1:], log.Index) - case structs.RootKeyMetaDeleteRequestType: - return n.applyRootKeyMetaDelete(msgType, buf[1:], log.Index) + case structs.WrappedRootKeysDeleteRequestType: + return n.applyWrappedRootKeysDelete(msgType, buf[1:], log.Index) case structs.ACLRolesUpsertRequestType: return n.applyACLRolesUpsert(msgType, buf[1:], log.Index) case structs.ACLRolesDeleteByIDRequestType: @@ -385,6 +392,9 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyACLBindingRulesUpsert(buf[1:], log.Index) case structs.ACLBindingRulesDeleteRequestType: return n.applyACLBindingRulesDelete(buf[1:], log.Index) + case structs.WrappedRootKeysUpsertRequestType: + return n.applyWrappedRootKeysUpsert(msgType, buf[1:], log.Index) + } // Check enterprise only message types. @@ -1830,6 +1840,17 @@ func (n *nomadFSM) restoreImpl(old io.ReadCloser, filter *FSMFilter) error { if err := restore.RootKeyMetaRestore(keyMeta); err != nil { return err } + + case WrappedRootKeysSnapshot: + wrappedKeys := new(structs.WrappedRootKeys) + if err := dec.Decode(wrappedKeys); err != nil { + return err + } + + if err := restore.WrappedRootKeysRestore(wrappedKeys); err != nil { + return err + } + case ACLRoleSnapshot: // Create a new ACLRole object, so we can decode the message into @@ -2303,27 +2324,52 @@ func (n *nomadFSM) applyRootKeyMetaUpsert(msgType structs.MessageType, buf []byt panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpsertRootKeyMeta(index, req.RootKeyMeta, req.Rekey); err != nil { - n.logger.Error("UpsertRootKeyMeta failed", "error", err) + wrappedRootKeys := structs.NewWrappedRootKeys(req.RootKeyMeta) + + if err := n.state.UpsertWrappedRootKeys(index, wrappedRootKeys, req.Rekey); err != nil { + n.logger.Error("UpsertWrappedRootKeys failed", "error", err) return err } + // start a task to decrypt the key material + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, wrappedRootKeys) + + return nil +} + +func (n *nomadFSM) applyWrappedRootKeysUpsert(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_wrapped_root_key_upsert"}, time.Now()) + + var req structs.KeyringUpsertWrappedRootKeyRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpsertWrappedRootKeys(index, req.WrappedRootKeys, req.Rekey); err != nil { + n.logger.Error("UpsertWrappedRootKeys failed", "error", err) + return err + } + + // start a task to decrypt the key material + go n.encrypter.AddWrappedKey(n.encrypter.srv.shutdownCtx, req.WrappedRootKeys) + return nil } -func (n *nomadFSM) applyRootKeyMetaDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { - defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_root_key_meta_delete"}, time.Now()) +func (n *nomadFSM) applyWrappedRootKeysDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_wrapped_root_key_delete"}, time.Now()) var req structs.KeyringDeleteRootKeyRequest if err := structs.Decode(buf, &req); err != nil { panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.DeleteRootKeyMeta(index, req.KeyID); err != nil { - n.logger.Error("DeleteRootKeyMeta failed", "error", err) + if err := n.state.DeleteWrappedRootKeys(index, req.KeyID); err != nil { + n.logger.Error("DeleteWrappedRootKeys failed", "error", err) return err } + n.encrypter.RemoveKey(req.KeyID) return nil } @@ -2447,7 +2493,7 @@ func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { sink.Cancel() return err } - if err := s.persistRootKeyMeta(sink, encoder); err != nil { + if err := s.persistWrappedRootKeys(sink, encoder); err != nil { sink.Cancel() return err } @@ -3092,11 +3138,11 @@ func (s *nomadSnapshot) persistVariablesQuotas(sink raft.SnapshotSink, return nil } -func (s *nomadSnapshot) persistRootKeyMeta(sink raft.SnapshotSink, +func (s *nomadSnapshot) persistWrappedRootKeys(sink raft.SnapshotSink, encoder *codec.Encoder) error { ws := memdb.NewWatchSet() - keys, err := s.snap.RootKeyMetas(ws) + keys, err := s.snap.WrappedRootKeys(ws) if err != nil { return err } @@ -3106,8 +3152,8 @@ func (s *nomadSnapshot) persistRootKeyMeta(sink raft.SnapshotSink, if raw == nil { break } - key := raw.(*structs.RootKeyMeta) - sink.Write([]byte{byte(RootKeyMetaSnapshot)}) + key := raw.(*structs.WrappedRootKeys) + sink.Write([]byte{byte(WrappedRootKeysSnapshot)}) if err := encoder.Encode(key); err != nil { return err } diff --git a/nomad/keyring_endpoint.go b/nomad/keyring_endpoint.go index 9a383b1c5cb9..b55ec26ec6f5 100644 --- a/nomad/keyring_endpoint.go +++ b/nomad/keyring_endpoint.go @@ -60,29 +60,42 @@ func (k *Keyring) Rotate(args *structs.KeyringRotateRootKeyRequest, reply *struc } if args.PublishTime != 0 { - rootKey.Meta = rootKey.Meta.MakePrepublished(args.PublishTime) + rootKey.Meta.State = structs.RootKeyStatePrepublished + rootKey.Meta.PublishTime = args.PublishTime } else { - rootKey.Meta = rootKey.Meta.MakeActive() + rootKey.Meta.State = structs.RootKeyStateActive } - // make sure it's been added to the local keystore before we write - // it to raft, so that followers don't try to Get a key that - // hasn't yet been written to disk - err = k.encrypter.AddKey(rootKey) + isClusterUpgraded := ServersMeetMinimumVersion( + k.srv.serf.Members(), k.srv.Region(), minVersionKeyringInRaft, true) + + // wrap/encrypt the key before we write it to Raft + wrappedKeys, err := k.encrypter.AddUnwrappedKey(rootKey, isClusterUpgraded) if err != nil { return err } - // Update metadata via Raft so followers can retrieve this key - req := structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: rootKey.Meta, - Rekey: args.Full, - WriteRequest: args.WriteRequest, + var index uint64 + if isClusterUpgraded { + _, index, err = k.srv.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKeys, + Rekey: args.Full, + WriteRequest: args.WriteRequest, + }) + } else { + // COMPAT(1.12.0): remove the version check and this code path + _, index, err = k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, + structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: rootKey.Meta, + Rekey: args.Full, + WriteRequest: args.WriteRequest, + }) } - _, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, req) if err != nil { return err } + reply.Key = rootKey.Meta reply.Index = index @@ -129,29 +142,23 @@ func (k *Keyring) List(args *structs.KeyringListRootKeyMetaRequest, reply *struc opts := blockingOptions{ queryOpts: &args.QueryOptions, queryMeta: &reply.QueryMeta, - run: func(ws memdb.WatchSet, s *state.StateStore) error { - - // retrieve all the key metadata - snap, err := k.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.RootKeyMetas(ws) + run: func(ws memdb.WatchSet, store *state.StateStore) error { + iter, err := store.WrappedRootKeys(ws) if err != nil { return err } - keys := []*structs.RootKeyMeta{} for { raw := iter.Next() if raw == nil { break } - keyMeta := raw.(*structs.RootKeyMeta) - keys = append(keys, keyMeta) + wrappedKey := raw.(*structs.WrappedRootKeys) + keys = append(keys, wrappedKey.Meta()) } + reply.Keys = keys - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + return k.srv.replySetIndex(state.TableWrappedRootKeys, &reply.QueryMeta) }, } return k.srv.blockingRPC(&opts) @@ -183,22 +190,35 @@ func (k *Keyring) Update(args *structs.KeyringUpdateRootKeyRequest, reply *struc return err } + isClusterUpgraded := ServersMeetMinimumVersion( + k.srv.serf.Members(), k.srv.Region(), minVersionKeyringInRaft, true) + // make sure it's been added to the local keystore before we write // it to raft, so that followers don't try to Get a key that // hasn't yet been written to disk - err = k.encrypter.AddKey(args.RootKey) + wrappedKeys, err := k.encrypter.AddUnwrappedKey(args.RootKey, isClusterUpgraded) if err != nil { return err } - // unwrap the request to turn it into a meta update only - metaReq := &structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: args.RootKey.Meta, - WriteRequest: args.WriteRequest, - } + var index uint64 + if isClusterUpgraded { + _, index, err = k.srv.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKeys, + WriteRequest: args.WriteRequest, + }) + } else { + // COMPAT(1.12.0): remove the version check and this code path + // unwrap the request to turn it into a meta update only + metaReq := &structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: args.RootKey.Meta, + WriteRequest: args.WriteRequest, + } - // update the metadata via Raft - _, index, err := k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, metaReq) + // update the metadata via Raft + _, index, err = k.srv.raftApply(structs.RootKeyMetaUpsertRequestType, metaReq) + } if err != nil { return err } @@ -225,11 +245,11 @@ func (k *Keyring) validateUpdate(args *structs.KeyringUpdateRootKeyRequest) erro return err } ws := memdb.NewWatchSet() - keyMeta, err := snap.RootKeyMetaByID(ws, args.RootKey.Meta.KeyID) + wrappedKeys, err := snap.WrappedRootKeysByID(ws, args.RootKey.Meta.KeyID) if err != nil { return err } - if keyMeta != nil && keyMeta.Algorithm != args.RootKey.Meta.Algorithm { + if wrappedKeys != nil && wrappedKeys.Algorithm != args.RootKey.Meta.Algorithm { return fmt.Errorf("root key algorithm cannot be changed after a key is created") } @@ -261,39 +281,29 @@ func (k *Keyring) Get(args *structs.KeyringGetRootKeyRequest, reply *structs.Key queryMeta: &reply.QueryMeta, run: func(ws memdb.WatchSet, s *state.StateStore) error { - // retrieve the key metadata snap, err := k.srv.fsm.State().Snapshot() if err != nil { return err } - keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID) + wrappedKeys, err := snap.WrappedRootKeysByID(ws, args.KeyID) if err != nil { return err } - if keyMeta == nil { - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + if wrappedKeys == nil { + return k.srv.replySetIndex(state.TableWrappedRootKeys, &reply.QueryMeta) } // retrieve the key material from the keyring - rootKey, err := k.encrypter.GetKey(keyMeta.KeyID) + rootKey, err := k.encrypter.GetKey(wrappedKeys.KeyID) if err != nil { return err } reply.Key = rootKey - - // Use the last index that affected the policy table - index, err := s.Index(state.TableRootKeyMeta) + err = k.srv.replySetIndex(state.TableWrappedRootKeys, &reply.QueryMeta) if err != nil { return err } - // Ensure we never set the index to zero, otherwise a blocking query - // cannot be used. We floor the index at one, since realistically - // the first write must have a higher index. - if index == 0 { - index = 1 - } - reply.Index = index return nil }, } @@ -324,24 +334,21 @@ func (k *Keyring) Delete(args *structs.KeyringDeleteRootKeyRequest, reply *struc } // lookup any existing key and validate the delete + var index uint64 snap, err := k.srv.fsm.State().Snapshot() if err != nil { return err } ws := memdb.NewWatchSet() - keyMeta, err := snap.RootKeyMetaByID(ws, args.KeyID) + wrappedKey, err := snap.WrappedRootKeysByID(ws, args.KeyID) if err != nil { return err } - if keyMeta == nil { - return nil // safe to bail out early - } - if keyMeta.IsActive() { + if wrappedKey != nil && wrappedKey.IsActive() { return fmt.Errorf("active root key cannot be deleted - call rotate first") } - // update via Raft - _, index, err := k.srv.raftApply(structs.RootKeyMetaDeleteRequestType, args) + _, index, err = k.srv.raftApply(structs.WrappedRootKeysDeleteRequestType, args) if err != nil { return err } @@ -377,40 +384,33 @@ func (k *Keyring) ListPublic(args *structs.GenericRequest, reply *structs.Keyrin opts := blockingOptions{ queryOpts: &args.QueryOptions, queryMeta: &reply.QueryMeta, - run: func(ws memdb.WatchSet, s *state.StateStore) error { - - // retrieve all the key metadata - snap, err := k.srv.fsm.State().Snapshot() - if err != nil { - return err - } - iter, err := snap.RootKeyMetas(ws) + run: func(ws memdb.WatchSet, store *state.StateStore) error { + iter, err := store.WrappedRootKeys(ws) if err != nil { return err } - pubKeys := []*structs.KeyringPublicKey{} for { raw := iter.Next() if raw == nil { break } - - keyMeta := raw.(*structs.RootKeyMeta) - if keyMeta.State == structs.RootKeyStateDeprecated { + wrappedKeys := raw.(*structs.WrappedRootKeys) + if wrappedKeys.State == structs.RootKeyStateDeprecated { // Only include valid keys continue } - pubKey, err := k.encrypter.GetPublicKey(keyMeta.KeyID) + pubKey, err := k.encrypter.GetPublicKey(wrappedKeys.KeyID) if err != nil { return err } pubKeys = append(pubKeys, pubKey) + } reply.PublicKeys = pubKeys - return k.srv.replySetIndex(state.TableRootKeyMeta, &reply.QueryMeta) + return k.srv.replySetIndex(state.TableWrappedRootKeys, &reply.QueryMeta) }, } return k.srv.blockingRPC(&opts) diff --git a/nomad/keyring_endpoint_test.go b/nomad/keyring_endpoint_test.go index 7aa999e97f96..481862f6a026 100644 --- a/nomad/keyring_endpoint_test.go +++ b/nomad/keyring_endpoint_test.go @@ -106,7 +106,7 @@ func TestKeyringEndpoint_CRUD(t *testing.T) { require.EqualError(t, err, "active root key cannot be deleted - call rotate first") // set inactive - updateReq.RootKey.Meta = updateReq.RootKey.Meta.MakeInactive() + updateReq.RootKey = updateReq.RootKey.MakeInactive() err = msgpackrpc.CallWithCodec(codec, "Keyring.Update", updateReq, &updateResp) require.NoError(t, err) @@ -229,7 +229,7 @@ func TestKeyringEndpoint_Rotate(t *testing.T) { codec := rpcClient(t, srv) store := srv.fsm.State() - key0, err := store.GetActiveRootKeyMeta(nil) + key0, err := store.GetActiveRootKey(nil) must.NoError(t, err) // Setup an existing key diff --git a/nomad/leader.go b/nomad/leader.go index d771a3cd1607..884955d9cda7 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -2721,6 +2721,7 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { } var minVersionKeyring = version.Must(version.NewVersion("1.4.0")) +var minVersionKeyringInRaft = version.Must(version.NewVersion("1.8.4-dev")) // initializeKeyring creates the first root key if the leader doesn't // already have one. The metadata will be replicated via raft and then @@ -2731,12 +2732,12 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) { logger := s.logger.Named("keyring") store := s.fsm.State() - keyMeta, err := store.GetActiveRootKeyMeta(nil) + key, err := store.GetActiveRootKey(nil) if err != nil { logger.Error("failed to get active key: %v", err) return } - if keyMeta != nil { + if key != nil { return } @@ -2766,18 +2767,32 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) { return } - err = s.encrypter.AddKey(rootKey) + isClusterUpgraded := ServersMeetMinimumVersion( + s.serf.Members(), s.Region(), minVersionKeyringInRaft, true) + + wrappedKeys, err := s.encrypter.AddUnwrappedKey(rootKey, isClusterUpgraded) if err != nil { logger.Error("could not add initial key to keyring", "error", err) return } - - if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, - structs.KeyringUpdateRootKeyMetaRequest{ - RootKeyMeta: rootKey.Meta, - }); err != nil { - logger.Error("could not initialize keyring", "error", err) - return + if isClusterUpgraded { + logger.Warn("cluster is upgraded to 1.9.0: initializing keyring") + if _, _, err = s.raftApply(structs.WrappedRootKeysUpsertRequestType, + structs.KeyringUpsertWrappedRootKeyRequest{ + WrappedRootKeys: wrappedKeys, + }); err != nil { + logger.Error("could not initialize keyring", "error", err) + return + } + } else { + logger.Warn("cluster is not upgraded to 1.9.0: initializing legacy keyring") + if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, + structs.KeyringUpdateRootKeyMetaRequest{ + RootKeyMeta: rootKey.Meta, + }); err != nil { + logger.Error("could not initialize keyring", "error", err) + return + } } logger.Info("initialized keyring", "id", rootKey.Meta.KeyID) diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go index 25525d38c65e..e7128ee8bff0 100644 --- a/nomad/plan_apply_test.go +++ b/nomad/plan_apply_test.go @@ -75,7 +75,7 @@ func TestPlanApply_applyPlan(t *testing.T) { s1, cleanupS1 := TestServer(t, nil) defer cleanupS1() - testutil.WaitForLeader(t, s1.RPC) + testutil.WaitForKeyring(t, s1.RPC, s1.Region()) // Register node node := mock.Node() diff --git a/nomad/server.go b/nomad/server.go index a488481d997b..27dbc0587e90 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -553,6 +553,9 @@ func NewServer(config *Config, consulCatalog consul.CatalogAPI, consulConfigFunc // exist before it can start. s.keyringReplicator = NewKeyringReplicator(s, encrypter) + // Block until keys are decrypted + s.encrypter.IsReady(s.shutdownCtx) + // Done return s, nil } @@ -1378,6 +1381,7 @@ func (s *Server) setupRaft() error { EvalBroker: s.evalBroker, Periodic: s.periodicDispatcher, Blocked: s.blockedEvals, + Encrypter: s.encrypter, Logger: s.logger, Region: s.Region(), EnableEventBroker: s.config.EnableEventBroker, diff --git a/nomad/state/schema.go b/nomad/state/schema.go index e27261bf1112..a97dba27001d 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -20,7 +20,7 @@ const ( TableServiceRegistrations = "service_registrations" TableVariables = "variables" TableVariablesQuotas = "variables_quota" - TableRootKeyMeta = "root_key_meta" + TableWrappedRootKeys = "wrapped_root_keys" TableACLRoles = "acl_roles" TableACLAuthMethods = "acl_auth_methods" TableACLBindingRules = "acl_binding_rules" @@ -93,7 +93,7 @@ func init() { serviceRegistrationsTableSchema, variablesTableSchema, variablesQuotasTableSchema, - variablesRootKeyMetaSchema, + wrappedRootKeySchema, aclRolesTableSchema, aclAuthMethodsTableSchema, bindingRulesTableSchema, @@ -1557,10 +1557,10 @@ func variablesQuotasTableSchema() *memdb.TableSchema { } } -// variablesRootKeyMetaSchema returns the MemDB schema for Nomad root keys -func variablesRootKeyMetaSchema() *memdb.TableSchema { +// wrappedRootKeySchema returns the MemDB schema for wrapped Nomad root keys +func wrappedRootKeySchema() *memdb.TableSchema { return &memdb.TableSchema{ - Name: TableRootKeyMeta, + Name: TableWrappedRootKeys, Indexes: map[string]*memdb.IndexSchema{ indexID: { Name: indexID, diff --git a/nomad/state/state_store_keyring.go b/nomad/state/state_store_keyring.go index c719579183d0..1d9b95c2702d 100644 --- a/nomad/state/state_store_keyring.go +++ b/nomad/state/state_store_keyring.go @@ -10,29 +10,29 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// UpsertRootKeyMeta saves root key meta or updates it in-place. -func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKeyMeta, rekey bool) error { +// UpsertWrappedRootKeys saves a wrapped root keys or updates them in place. +func (s *StateStore) UpsertWrappedRootKeys(index uint64, wrappedRootKeys *structs.WrappedRootKeys, rekey bool) error { txn := s.db.WriteTxn(index) defer txn.Abort() // get any existing key for updating - raw, err := txn.First(TableRootKeyMeta, indexID, rootKeyMeta.KeyID) + raw, err := txn.First(TableWrappedRootKeys, indexID, wrappedRootKeys.KeyID) if err != nil { - return fmt.Errorf("root key metadata lookup failed: %v", err) + return fmt.Errorf("root key lookup failed: %v", err) } isRotation := false if raw != nil { - existing := raw.(*structs.RootKeyMeta) - rootKeyMeta.CreateIndex = existing.CreateIndex - rootKeyMeta.CreateTime = existing.CreateTime - isRotation = !existing.IsActive() && rootKeyMeta.IsActive() + existing := raw.(*structs.WrappedRootKeys) + wrappedRootKeys.CreateIndex = existing.CreateIndex + wrappedRootKeys.CreateTime = existing.CreateTime + isRotation = !existing.IsActive() && wrappedRootKeys.IsActive() } else { - rootKeyMeta.CreateIndex = index - isRotation = rootKeyMeta.IsActive() + wrappedRootKeys.CreateIndex = index + isRotation = wrappedRootKeys.IsActive() } - rootKeyMeta.ModifyIndex = index + wrappedRootKeys.ModifyIndex = index if rekey && !isRotation { return fmt.Errorf("cannot rekey without setting the new key active") @@ -41,7 +41,7 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe // if the upsert is for a newly-active key, we need to set all the // other keys as inactive in the same transaction. if isRotation { - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableWrappedRootKeys, indexID) if err != nil { return err } @@ -50,7 +50,7 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.WrappedRootKeys) modified := false switch key.State { @@ -72,56 +72,54 @@ func (s *StateStore) UpsertRootKeyMeta(index uint64, rootKeyMeta *structs.RootKe if modified { key.ModifyIndex = index - if err := txn.Insert(TableRootKeyMeta, key); err != nil { + if err := txn.Insert(TableWrappedRootKeys, key); err != nil { return err } - } + } } } - if err := txn.Insert(TableRootKeyMeta, rootKeyMeta); err != nil { + if err := txn.Insert(TableWrappedRootKeys, wrappedRootKeys); err != nil { return err } - - // update the indexes table - if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { + if err := txn.Insert("index", &IndexEntry{TableWrappedRootKeys, index}); err != nil { return fmt.Errorf("index update failed: %v", err) } + return txn.Commit() } -// DeleteRootKeyMeta deletes a single root key, or returns an error if -// it doesn't exist. -func (s *StateStore) DeleteRootKeyMeta(index uint64, keyID string) error { +// DeleteWrappedRootKeys deletes a single wrapped root key set, or returns an +// error if it doesn't exist. +func (s *StateStore) DeleteWrappedRootKeys(index uint64, keyID string) error { txn := s.db.WriteTxn(index) defer txn.Abort() // find the old key - existing, err := txn.First(TableRootKeyMeta, indexID, keyID) + existing, err := txn.First(TableWrappedRootKeys, indexID, keyID) if err != nil { - return fmt.Errorf("root key metadata lookup failed: %v", err) + return fmt.Errorf("root key lookup failed: %v", err) } if existing == nil { - return fmt.Errorf("root key metadata not found") + return nil // this case should be validated in RPC } - if err := txn.Delete(TableRootKeyMeta, existing); err != nil { - return fmt.Errorf("root key metadata delete failed: %v", err) + if err := txn.Delete(TableWrappedRootKeys, existing); err != nil { + return fmt.Errorf("root key delete failed: %v", err) } - // update the indexes table - if err := txn.Insert("index", &IndexEntry{TableRootKeyMeta, index}); err != nil { + if err := txn.Insert("index", &IndexEntry{TableWrappedRootKeys, index}); err != nil { return fmt.Errorf("index update failed: %v", err) } return txn.Commit() } -// RootKeyMetas returns an iterator over all root key metadata -func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, error) { +// WrappedRootKeys returns an iterator over all wrapped root keys +func (s *StateStore) WrappedRootKeys(ws memdb.WatchSet) (memdb.ResultIterator, error) { txn := s.db.ReadTxn() - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableWrappedRootKeys, indexID) if err != nil { return nil, err } @@ -130,42 +128,42 @@ func (s *StateStore) RootKeyMetas(ws memdb.WatchSet) (memdb.ResultIterator, erro return iter, nil } -// RootKeyMetaByID returns a specific root key meta -func (s *StateStore) RootKeyMetaByID(ws memdb.WatchSet, id string) (*structs.RootKeyMeta, error) { +// WrappedRootKeysByID returns a specific wrapped root key set +func (s *StateStore) WrappedRootKeysByID(ws memdb.WatchSet, id string) (*structs.WrappedRootKeys, error) { txn := s.db.ReadTxn() - watchCh, raw, err := txn.FirstWatch(TableRootKeyMeta, indexID, id) + watchCh, raw, err := txn.FirstWatch(TableWrappedRootKeys, indexID, id) if err != nil { - return nil, fmt.Errorf("root key metadata lookup failed: %v", err) + return nil, fmt.Errorf("root key lookup failed: %v", err) } ws.Add(watchCh) if raw != nil { - return raw.(*structs.RootKeyMeta), nil + return raw.(*structs.WrappedRootKeys), nil } return nil, nil } -// GetActiveRootKeyMeta returns the metadata for the currently active root key -func (s *StateStore) GetActiveRootKeyMeta(ws memdb.WatchSet) (*structs.RootKeyMeta, error) { +// GetActiveRootKey returns the currently active root key +func (s *StateStore) GetActiveRootKey(ws memdb.WatchSet) (*structs.WrappedRootKeys, error) { txn := s.db.ReadTxn() - iter, err := txn.Get(TableRootKeyMeta, indexID) + iter, err := txn.Get(TableWrappedRootKeys, indexID) if err != nil { return nil, err } ws.Add(iter.WatchCh()) - for { raw := iter.Next() if raw == nil { break } - key := raw.(*structs.RootKeyMeta) - if key.IsActive() { - return key, nil + wrappedKeys := raw.(*structs.WrappedRootKeys) + if wrappedKeys.IsActive() { + return wrappedKeys, nil } } + return nil, nil } diff --git a/nomad/state/state_store_keyring_test.go b/nomad/state/state_store_keyring_test.go index bc2ef965170d..8ff88122e889 100644 --- a/nomad/state/state_store_keyring_test.go +++ b/nomad/state/state_store_keyring_test.go @@ -7,11 +7,12 @@ import ( "testing" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" ) -func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { +func TestStateStore_WrappedRootKey_CRUD(t *testing.T) { ci.Parallel(t) store := testStateStore(t) index, err := store.LatestIndex() @@ -23,34 +24,36 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { key := structs.NewRootKeyMeta() keyIDs = append(keyIDs, key.KeyID) if i == 0 { - key = key.MakeActive() + key.State = structs.RootKeyStateActive } index++ - must.NoError(t, store.UpsertRootKeyMeta(index, key, false)) + wrappedKeys := structs.NewWrappedRootKeys(key) + must.NoError(t, store.UpsertWrappedRootKeys(index, wrappedKeys, false)) } // retrieve the active key - activeKey, err := store.GetActiveRootKeyMeta(nil) + activeKey, err := store.GetActiveRootKey(nil) must.NoError(t, err) must.NotNil(t, activeKey) // update an inactive key to active and verify the rotation - inactiveKey, err := store.RootKeyMetaByID(nil, keyIDs[1]) + inactiveKey, err := store.WrappedRootKeysByID(nil, keyIDs[1]) must.NoError(t, err) must.NotNil(t, inactiveKey) oldCreateIndex := inactiveKey.CreateIndex - newlyActiveKey := inactiveKey.MakeActive() + newlyActiveKey := inactiveKey.Copy() + newlyActiveKey = inactiveKey.MakeActive() index++ - must.NoError(t, store.UpsertRootKeyMeta(index, newlyActiveKey, false)) + must.NoError(t, store.UpsertWrappedRootKeys(index, newlyActiveKey, false)) - iter, err := store.RootKeyMetas(nil) + iter, err := store.WrappedRootKeys(nil) must.NoError(t, err) for { raw := iter.Next() if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.WrappedRootKeys) if key.KeyID == newlyActiveKey.KeyID { must.True(t, key.IsActive(), must.Sprint("expected updated key to be active")) must.Eq(t, oldCreateIndex, key.CreateIndex) @@ -61,9 +64,9 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { // delete the active key and verify it's been deleted index++ - must.NoError(t, store.DeleteRootKeyMeta(index, keyIDs[1])) + must.NoError(t, store.DeleteWrappedRootKeys(index, keyIDs[1])) - iter, err = store.RootKeyMetas(nil) + iter, err = store.WrappedRootKeys(nil) must.NoError(t, err) var found int for { @@ -71,10 +74,13 @@ func TestStateStore_RootKeyMetaData_CRUD(t *testing.T) { if raw == nil { break } - key := raw.(*structs.RootKeyMeta) + key := raw.(*structs.WrappedRootKeys) must.NotEq(t, keyIDs[1], key.KeyID) must.False(t, key.IsActive(), must.Sprint("expected remaining keys to be inactive")) found++ } must.Eq(t, 2, found, must.Sprint("expected only 2 keys remaining")) + + // deleting non-existent keys is safe + must.NoError(t, store.DeleteWrappedRootKeys(index, uuid.Generate())) } diff --git a/nomad/state/state_store_restore.go b/nomad/state/state_store_restore.go index ad2efdc29265..307bdba78245 100644 --- a/nomad/state/state_store_restore.go +++ b/nomad/state/state_store_restore.go @@ -240,11 +240,18 @@ func (r *StateRestore) VariablesQuotaRestore(quota *structs.VariablesQuota) erro return nil } -// RootKeyMetaQuotaRestore is used to restore a single root key meta into the -// root_key_meta table. -func (r *StateRestore) RootKeyMetaRestore(quota *structs.RootKeyMeta) error { - if err := r.txn.Insert(TableRootKeyMeta, quota); err != nil { - return fmt.Errorf("root key meta insert failed: %v", err) +// RootKeyMetaRestore is used to restore a legacy root key meta entry into the +// wrapped_root_keys table. +func (r *StateRestore) RootKeyMetaRestore(meta *structs.RootKeyMeta) error { + wrappedRootKeys := structs.NewWrappedRootKeys(meta) + return r.WrappedRootKeysRestore(wrappedRootKeys) +} + +// WrappedRootKeysRestore is used to restore a single wrapped root key into the +// wrapped_root_keys table. +func (r *StateRestore) WrappedRootKeysRestore(wrappedKeys *structs.WrappedRootKeys) error { + if err := r.txn.Insert(TableWrappedRootKeys, wrappedKeys); err != nil { + return fmt.Errorf("wrapped root keys insert failed: %v", err) } return nil } diff --git a/nomad/structs/keyring.go b/nomad/structs/keyring.go index c5a2b36b0032..6d0e960b6790 100644 --- a/nomad/structs/keyring.go +++ b/nomad/structs/keyring.go @@ -15,6 +15,7 @@ import ( "time" "github.com/go-jose/go-jose/v3" + "github.com/golang/protobuf/proto" wrapping "github.com/hashicorp/go-kms-wrapping/v2" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/crypto" @@ -38,7 +39,8 @@ const ( JWKSPath = "/.well-known/jwks.json" ) -// RootKey is used to encrypt and decrypt variables. It is never stored in raft. +// RootKey is used to encrypt and decrypt variables. This is the "unwrapped" key +// and it is never stored in raft. type RootKey struct { Meta *RootKeyMeta Key []byte // serialized to keystore as base64 blob @@ -88,8 +90,10 @@ func (k *RootKey) Copy() *RootKey { // MakeInactive returns a copy of the RootKey with the meta state set to active func (k *RootKey) MakeActive() *RootKey { + meta := k.Meta.Copy() + meta.State = RootKeyStateActive return &RootKey{ - Meta: k.Meta.MakeActive(), + Meta: meta, Key: slices.Clone(k.Key), RSAKey: slices.Clone(k.RSAKey), } @@ -98,13 +102,164 @@ func (k *RootKey) MakeActive() *RootKey { // MakeInactive returns a copy of the RootKey with the meta state set to // inactive func (k *RootKey) MakeInactive() *RootKey { + meta := k.Meta.Copy() + meta.State = RootKeyStateInactive return &RootKey{ - Meta: k.Meta.MakeInactive(), + Meta: meta, Key: slices.Clone(k.Key), RSAKey: slices.Clone(k.RSAKey), } } +// WrappedRootKeys represents a RootKey encrypted by a set of KMS wrapping +// plugings. It is stored in Raft. +type WrappedRootKeys struct { + KeyID string // UUID + Algorithm EncryptionAlgorithm + CreateTime int64 + CreateIndex uint64 + ModifyIndex uint64 + State RootKeyState + PublishTime int64 + + WrappedKeys []*WrappedRootKey +} + +func NewWrappedRootKeys(meta *RootKeyMeta) *WrappedRootKeys { + return &WrappedRootKeys{ + KeyID: meta.KeyID, + Algorithm: meta.Algorithm, + CreateTime: meta.CreateTime, + CreateIndex: meta.CreateIndex, + ModifyIndex: meta.ModifyIndex, + State: meta.State, + PublishTime: meta.PublishTime, + WrappedKeys: []*WrappedRootKey{}, + } +} + +func (wrk *WrappedRootKeys) Meta() *RootKeyMeta { + return &RootKeyMeta{ + KeyID: wrk.KeyID, + Algorithm: wrk.Algorithm, + CreateTime: wrk.CreateTime, + CreateIndex: wrk.CreateIndex, + ModifyIndex: wrk.ModifyIndex, + State: wrk.State, + PublishTime: wrk.PublishTime, + } +} + +func (wrk *WrappedRootKeys) Copy() *WrappedRootKeys { + if wrk == nil { + return nil + } + out := *wrk + out.WrappedKeys = helper.CopySlice(wrk.WrappedKeys) + return &out +} + +// IsActive indicates this key is the one currently being used for crypto +// operations (at most one key can be Active) +func (wrk *WrappedRootKeys) IsActive() bool { + return wrk.State == RootKeyStateActive +} + +// MakeActive returns a copy of the WrappedRootKeys with the state set to active +func (wrk *WrappedRootKeys) MakeActive() *WrappedRootKeys { + out := wrk.Copy() + if out != nil { + out.State = RootKeyStateActive + out.PublishTime = 0 + } + return out +} + +// IsRekeying indicates that variables encrypted with this key should be +// rekeyed +func (wrk *WrappedRootKeys) IsRekeying() bool { + return wrk.State == RootKeyStateRekeying +} + +// MakeRekeying returns a copy of the WrappedRootKeys with the state set to +// rekeying +func (wrk *WrappedRootKeys) MakeRekeying() *WrappedRootKeys { + out := wrk.Copy() + if out != nil { + out.State = RootKeyStateRekeying + } + return out +} + +// MakePrepublished returns a copy of the WrappedRootKeys with the state set to +// prepublished at the time t +func (wrk *WrappedRootKeys) MakePrepublished(t int64) *WrappedRootKeys { + out := wrk.Copy() + if out != nil { + out.PublishTime = t + out.State = RootKeyStatePrepublished + } + return out +} + +// IsPrepublished indicates that this key has been published and is pending +// being promoted to active +func (wrk *WrappedRootKeys) IsPrepublished() bool { + return wrk.State == RootKeyStatePrepublished +} + +// MakeInactive returns a copy of the WrappedRootKeys with the state set to inactive +func (wrk *WrappedRootKeys) MakeInactive() *WrappedRootKeys { + out := wrk.Copy() + if out != nil { + out.State = RootKeyStateInactive + } + return out +} + +// IsInactive indicates that this key is no longer being used to encrypt new +// variables or workload identities. +func (wrk *WrappedRootKeys) IsInactive() bool { + return wrk.State == RootKeyStateInactive || wrk.State == RootKeyStateDeprecated +} + +// WrappedRootKey represents a RootKey encrypted by a specific KMS wrapping +// plugin. A slice of these are stored in WrappedRootKeys in Raft. +type WrappedRootKey struct { + // Provider is the KMS wrapping plugin + Provider string + + // ProviderID is the identifier of the specific instance of the KMS wrapping + // plugin, for Nomad Enterprise where you might have multiple KMS of the + // same kind for HA (ex. 2 Vaults) + ProviderID string + + // WrappedDataEncryptionKey is the encrypted DEK used for encrypting + // Variables. The BlobInfo includes everything needed for the KMS to decrypt + // it except the KEK. + WrappedDataEncryptionKey *wrapping.BlobInfo + + // WrappedRSAKey is the encrypted DEK used for signing Workload + // Identities. The BlobInfo includes everything needed for the KMS to + // decrypt it except the KEK. + WrappedRSAKey *wrapping.BlobInfo + + // KeyEncryptionKey is the cleartext KEK, and is only included in the struct + // we write to Raft when using the AEAD plugin + KeyEncryptionKey []byte +} + +func (w *WrappedRootKey) Copy() *WrappedRootKey { + if w == nil { + return nil + } + out := *w + copy(out.KeyEncryptionKey, w.KeyEncryptionKey) + out.WrappedDataEncryptionKey = proto.Clone(w.WrappedDataEncryptionKey).(*wrapping.BlobInfo) + out.WrappedRSAKey = proto.Clone(w.WrappedRSAKey).(*wrapping.BlobInfo) + return &out +} + // RootKeyMeta is the metadata used to refer to a RootKey. It is // stored in raft. type RootKeyMeta struct { @@ -202,57 +357,12 @@ func (rkm *RootKeyMeta) IsActive() bool { return rkm.State == RootKeyStateActive } -// MakeActive returns a copy of the RootKeyMeta with the state set to active -func (rkm *RootKeyMeta) MakeActive() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateActive - out.PublishTime = 0 - } - return out -} - -// IsRekeying indicates that variables encrypted with this key should be -// rekeyed -func (rkm *RootKeyMeta) IsRekeying() bool { - return rkm.State == RootKeyStateRekeying -} - -// MakeRekeying returns a copy of the RootKeyMeta with the state set to rekeying -func (rkm *RootKeyMeta) MakeRekeying() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateRekeying - } - return out -} - -// MakePrepublished returns a copy of the RootKeyMeta with the state set to -// prepublished at the time t -func (rkm *RootKeyMeta) MakePrepublished(t int64) *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.PublishTime = t - out.State = RootKeyStatePrepublished - } - return out -} - // IsPrepublished indicates that this key has been published and is pending // being promoted to active func (rkm *RootKeyMeta) IsPrepublished() bool { return rkm.State == RootKeyStatePrepublished } -// MakeInactive returns a copy of the RootKeyMeta with the state set to inactive -func (rkm *RootKeyMeta) MakeInactive() *RootKeyMeta { - out := rkm.Copy() - if out != nil { - out.State = RootKeyStateInactive - } - return out -} - // IsInactive indicates that this key is no longer being used to encrypt new // variables or workload identities. func (rkm *RootKeyMeta) IsInactive() bool { @@ -286,10 +396,11 @@ func (rkm *RootKeyMeta) Validate() error { return nil } -// KeyEncryptionKeyWrapper is the struct that gets serialized for the on-disk -// KMS wrapper. When using the AEAD provider, this struct includes the -// server-specific key-wrapping key. This struct should never be sent over RPC -// or written to Raft. +// KeyEncryptionKeyWrapper is a flattened version of the WrappedRootKeys struct +// that gets serialized to disk for a keyset when using the legacy on-disk +// keystore with the AEAD KMS wrapper. This struct includes the server-specific +// key-wrapping key (KEK). This struct should never be sent over RPC or written +// to Raft. type KeyEncryptionKeyWrapper struct { Meta *RootKeyMeta @@ -354,6 +465,15 @@ type KeyringUpdateRootKeyResponse struct { WriteMeta } +// KeyringUpsertWrappedRootKeyRequest is used by the leader during keyring +// initialization and when keys are rotated, to write a new wrapped root key to +// Raft. +type KeyringUpsertWrappedRootKeyRequest struct { + WrappedRootKeys *WrappedRootKeys + Rekey bool + WriteRequest +} + // KeyringGetRootKeyRequest is used internally for key replication // only and for keyring restores. type KeyringGetRootKeyRequest struct { diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index ead7cb0fa29a..8e62e0599fc5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -117,8 +117,8 @@ const ( ServiceRegistrationDeleteByIDRequestType MessageType = 48 ServiceRegistrationDeleteByNodeIDRequestType MessageType = 49 VarApplyStateRequestType MessageType = 50 - RootKeyMetaUpsertRequestType MessageType = 51 - RootKeyMetaDeleteRequestType MessageType = 52 + RootKeyMetaUpsertRequestType MessageType = 51 // DEPRECATED + WrappedRootKeysDeleteRequestType MessageType = 52 ACLRolesUpsertRequestType MessageType = 53 ACLRolesDeleteByIDRequestType MessageType = 54 ACLAuthMethodsUpsertRequestType MessageType = 55 @@ -127,10 +127,13 @@ const ( ACLBindingRulesDeleteRequestType MessageType = 58 NodePoolUpsertRequestType MessageType = 59 NodePoolDeleteRequestType MessageType = 60 + JobVersionTagRequestType MessageType = 61 + WrappedRootKeysUpsertRequestType MessageType = 62 + NamespaceUpsertRequestType MessageType = 64 + NamespaceDeleteRequestType MessageType = 65 - // Namespace types were moved from enterprise and therefore start at 64 - NamespaceUpsertRequestType MessageType = 64 - NamespaceDeleteRequestType MessageType = 65 + // NOTE: MessageTypes are shared between CE and ENT. If you need to add a + // new type, check that ENT is not already using that value. ) const (