Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keyring in raft #23977

Merged
merged 13 commits into from
Sep 19, 2024
3 changes: 3 additions & 0 deletions .changelog/23977.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
keyring: Stored wrapped data encryption keys in Raft
```
33 changes: 33 additions & 0 deletions helper/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
package helper

import (
"context"
"fmt"
"time"
)

Expand All @@ -29,3 +31,34 @@ func Backoff(backoffBase time.Duration, backoffLimit time.Duration, attempt uint

return deadline
}

// WithBackoffFunc is a helper that runs a function with geometric backoff + a
// small jitter to a maximum backoff. It returns once the context closes, with
// the error wrapping over the error from the function.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewers: this logic was lifted from the csi_hook, which is reasonably battle-tested at this point. Once we merge this I'll go back and update the csi_hook to use this function.

func WithBackoffFunc(ctx context.Context, minBackoff, maxBackoff time.Duration, fn func() error) error {
var err error
backoff := minBackoff
t, stop := NewSafeTimer(0)
defer stop()
for {
select {
case <-ctx.Done():
return fmt.Errorf("operation cancelled: %w", err)
case <-t.C:
}

err = fn()
if err == nil {
return nil
}

if backoff < maxBackoff {
backoff = backoff*2 + RandomStagger(minBackoff/10)
if backoff > maxBackoff {
backoff = maxBackoff
}
}

t.Reset(backoff)
}
}
5 changes: 3 additions & 2 deletions helper/raftutil/msgtypes.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions nomad/acl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ func TestAuthenticate_mTLS(t *testing.T) {
testutil.WaitForLeader(t, leader.RPC)

testutil.Wait(t, func() (bool, error) {
keyset, err := follower.encrypter.activeKeySet()
return keyset != nil, err
cs, err := follower.encrypter.activeCipherSet()
return cs != nil, err
})

rootToken := uuid.Generate()
Expand Down
2 changes: 1 addition & 1 deletion nomad/alloc_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1690,7 +1690,7 @@ func TestAlloc_SignIdentities_Bad(t *testing.T) {
s1, cleanupS1 := TestServer(t, nil)
t.Cleanup(cleanupS1)
codec := rpcClient(t, s1)
testutil.WaitForLeader(t, s1.RPC)
testutil.WaitForKeyring(t, s1.RPC, s1.Region())

node := mock.Node()
must.NoError(t, s1.fsm.State().UpsertNode(structs.MsgTypeTestSetup, 100, node))
Expand Down
102 changes: 80 additions & 22 deletions nomad/core_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -899,14 +899,27 @@ func (c *CoreScheduler) expiredACLTokenGC(eval *structs.Evaluation, global bool)
// rootKeyRotateOrGC is used to rotate or garbage collect root keys
func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error {

// migration sends updates to the leader so our view of state is no longer
// valid. we ack this core job and will pick up against at the next
// interval.
//
// COMPAT(1.12.0): remove this block in 1.12.0 LTS
stateChanged, err := c.rootKeyMigrate(eval)
if err != nil {
return err
}
if stateChanged {
return nil
}

// a rotation will be sent to the leader so our view of state
// is no longer valid. we ack this core job and will pick up
// the GC work on the next interval
wasRotated, err := c.rootKeyRotate(eval, time.Now())
stateChanged, err = c.rootKeyRotate(eval, time.Now())
if err != nil {
return err
}
if wasRotated {
if stateChanged {
return nil
}
return c.rootKeyGC(eval, time.Now())
Expand All @@ -915,7 +928,7 @@ func (c *CoreScheduler) rootKeyRotateOrGC(eval *structs.Evaluation) error {
func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error {

ws := memdb.NewWatchSet()
iter, err := c.snap.RootKeyMetas(ws)
iter, err := c.snap.RootKeys(ws)
if err != nil {
return err
}
Expand All @@ -931,21 +944,21 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error
if raw == nil {
break
}
keyMeta := raw.(*structs.RootKeyMeta)
if !keyMeta.IsInactive() {
rootKey := raw.(*structs.RootKey)
if !rootKey.IsInactive() {
continue // never GC keys we're still using
}

c.logger.Trace("checking inactive key eligibility for gc",
"create_time", keyMeta.CreateTime, "threshold", rotationThreshold.UnixNano())
"create_time", rootKey.CreateTime, "threshold", rotationThreshold.UnixNano())

if keyMeta.CreateTime > rotationThreshold.UnixNano() {
if rootKey.CreateTime > rotationThreshold.UnixNano() {
continue // don't GC keys with potentially live Workload Identities
}

// don't GC keys used to encrypt Variables or sign legacy non-expiring
// Workload Identities
inUse, err := c.snap.IsRootKeyInUse(keyMeta.KeyID)
inUse, err := c.snap.IsRootKeyInUse(rootKey.KeyID)
if err != nil {
return err
}
Expand All @@ -954,7 +967,7 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error
}

req := &structs.KeyringDeleteRootKeyRequest{
KeyID: keyMeta.KeyID,
KeyID: rootKey.KeyID,
WriteRequest: structs.WriteRequest{
Region: c.srv.config.Region,
AuthToken: eval.LeaderACL,
Expand All @@ -970,24 +983,69 @@ func (c *CoreScheduler) rootKeyGC(eval *structs.Evaluation, now time.Time) error
return nil
}

// rootKeyRotate checks if the active key is old enough that we need to kick off
// a rotation. It prepublishes a key first and only promotes that prepublished
// key to active once the rotation threshold has expired
func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) {
// rootKeyMigrate checks if the cluster is fully upgraded and migrates all the
// legacy root key material to the new wrapped key format. It returns true if
// any of the keys were migrated, because the caller should now treat the
// snapshot as invalid.
//
// COMPAT(1.12.0): remove this function in 1.12.0 LTS
func (c *CoreScheduler) rootKeyMigrate(eval *structs.Evaluation) (bool, error) {
if !ServersMeetMinimumVersion(
c.srv.serf.Members(), c.srv.Region(), minVersionKeyringInRaft, true) {
return false, nil
}

ws := memdb.NewWatchSet()
iter, err := c.snap.RootKeyMetas(ws)
iter, err := c.snap.RootKeys(ws)
if err != nil {
return false, err
}
stateChanged := false
for raw := iter.Next(); raw != nil; raw = iter.Next() {
wrappedKeys := raw.(*structs.RootKey)
if len(wrappedKeys.WrappedKeys) > 0 {
continue // already migrated
}
rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID)
if err != nil {
return stateChanged, err
}
req := &structs.KeyringUpdateRootKeyRequest{
RootKey: rootKey,
WriteRequest: structs.WriteRequest{
Region: c.srv.config.Region,
AuthToken: eval.LeaderACL,
},
}

if err := c.srv.RPC("Keyring.Update",
req, &structs.KeyringUpdateRootKeyResponse{}); err != nil {
c.logger.Error("migrating legacy key material failed",
"error", err, "key_id", wrappedKeys.KeyID)
return false, err
}
stateChanged = true
}

return stateChanged, nil
}

// rootKeyRotate checks if the active key is old enough that we need to kick off
// a rotation. It prepublishes a key first and only promotes that prepublished
// key to active once the rotation threshold has expired
func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (bool, error) {
var (
activeKey *structs.RootKeyMeta
prepublishedKey *structs.RootKeyMeta
activeKey *structs.RootKey
prepublishedKey *structs.RootKey
)

ws := memdb.NewWatchSet()
iter, err := c.snap.RootKeys(ws)
if err != nil {
return false, err
}
for raw := iter.Next(); raw != nil; raw = iter.Next() {
key := raw.(*structs.RootKeyMeta)
key := raw.(*structs.RootKey)
switch key.State {
case structs.RootKeyStateActive:
activeKey = key
Expand Down Expand Up @@ -1083,7 +1141,7 @@ func (c *CoreScheduler) rootKeyRotate(eval *structs.Evaluation, now time.Time) (
func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error {

ws := memdb.NewWatchSet()
iter, err := c.snap.RootKeyMetas(ws)
iter, err := c.snap.RootKeys(ws)
if err != nil {
return err
}
Expand All @@ -1093,11 +1151,11 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error {
if raw == nil {
break
}
keyMeta := raw.(*structs.RootKeyMeta)
if !keyMeta.IsRekeying() {
wrappedKeys := raw.(*structs.RootKey)
if !wrappedKeys.IsRekeying() {
continue
}
varIter, err := c.snap.GetVariablesByKeyID(ws, keyMeta.KeyID)
varIter, err := c.snap.GetVariablesByKeyID(ws, wrappedKeys.KeyID)
if err != nil {
return err
}
Expand All @@ -1106,7 +1164,7 @@ func (c *CoreScheduler) variablesRekey(eval *structs.Evaluation) error {
return err
}

rootKey, err := c.srv.encrypter.GetKey(keyMeta.KeyID)
rootKey, err := c.srv.encrypter.GetKey(wrappedKeys.KeyID)
if err != nil {
return fmt.Errorf("rotated key does not exist in keyring: %w", err)
}
Expand Down
Loading