diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rollout/client.go
similarity index 98%
rename from lib/autoupdate/rolloutcontroller/client.go
rename to lib/autoupdate/rollout/client.go
index 4dead0f9dee19..16b9f5197fb2b 100644
--- a/lib/autoupdate/rolloutcontroller/client.go
+++ b/lib/autoupdate/rollout/client.go
@@ -16,7 +16,7 @@
* along with this program. If not, see .
*/
-package rolloutcontroller
+package rollout
import (
"context"
diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rollout/client_test.go
similarity index 98%
rename from lib/autoupdate/rolloutcontroller/client_test.go
rename to lib/autoupdate/rollout/client_test.go
index ba204ffb77db3..fc0e49bd04763 100644
--- a/lib/autoupdate/rolloutcontroller/client_test.go
+++ b/lib/autoupdate/rollout/client_test.go
@@ -16,7 +16,7 @@
* along with this program. If not, see .
*/
-package rolloutcontroller
+package rollout
import (
"context"
@@ -29,7 +29,7 @@ import (
)
// mockClient is a mock implementation if the Client interface for testing purposes.
-// This is used to precisely check which calls are made by the Reconciler during tests.
+// This is used to precisely check which calls are made by the reconciler during tests.
// Use newMockClient to create one from stubs. Once the test is over, you must call
// mockClient.checkIfEmpty to validate all expected calls were made.
type mockClient struct {
diff --git a/lib/autoupdate/rollout/controller.go b/lib/autoupdate/rollout/controller.go
new file mode 100644
index 0000000000000..53a3741f8050a
--- /dev/null
+++ b/lib/autoupdate/rollout/controller.go
@@ -0,0 +1,106 @@
+/*
+ * Teleport
+ * Copyright (C) 2024 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package rollout
+
+import (
+ "context"
+ "log/slog"
+ "time"
+
+ "github.com/gravitational/trace"
+ "github.com/jonboulle/clockwork"
+
+ "github.com/gravitational/teleport/api/utils/retryutils"
+ "github.com/gravitational/teleport/lib/utils/interval"
+)
+
+const (
+ reconcilerPeriod = time.Minute
+)
+
+// Controller wakes up every minute to reconcile the autoupdate_agent_rollout resource.
+// See the reconciler godoc for more details about the reconciliation process.
+// We currently wake up every minute, in the future we might decide to also watch for events
+// (from autoupdate_config and autoupdate_version changefeed) to react faster.
+type Controller struct {
+ // TODO(hugoShaka) add prometheus metrics describing the reconciliation status
+ reconciler reconciler
+ clock clockwork.Clock
+ log *slog.Logger
+}
+
+// NewController creates a new Controller for the autoupdate_agent_rollout kind.
+func NewController(client Client, log *slog.Logger, clock clockwork.Clock) (*Controller, error) {
+ if client == nil {
+ return nil, trace.BadParameter("missing client")
+ }
+ if log == nil {
+ return nil, trace.BadParameter("missing log")
+ }
+ if clock == nil {
+ return nil, trace.BadParameter("missing clock")
+ }
+ return &Controller{
+ clock: clock,
+ log: log,
+ reconciler: reconciler{
+ clt: client,
+ log: log,
+ },
+ }, nil
+}
+
+// Run the autoupdate_agent_rollout controller. This function returns only when its context is canceled.
+func (c *Controller) Run(ctx context.Context) error {
+ config := interval.Config{
+ Duration: reconcilerPeriod,
+ FirstDuration: reconcilerPeriod,
+ Jitter: retryutils.SeventhJitter,
+ Clock: c.clock,
+ }
+ ticker := interval.New(config)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-ctx.Done():
+ c.log.InfoContext(ctx, "Stopping autoupdate_agent_rollout controller", "reason", ctx.Err())
+ return ctx.Err()
+ case <-ticker.Next():
+ c.log.DebugContext(ctx, "Reconciling autoupdate_agent_rollout")
+ if err := c.tryAndCatch(ctx); err != nil {
+ c.log.ErrorContext(ctx, "Failed to reconcile autoudpate_agent_controller", "error", err)
+ }
+ }
+ }
+}
+
+// tryAndCatch tries to run the controller reconciliation logic and recovers from potential panic by converting them
+// into errors. This ensures that a critical bug in the reconciler cannot bring down the whole Teleport cluster.
+func (c *Controller) tryAndCatch(ctx context.Context) (err error) {
+ // If something terribly bad happens during the reconciliation, we recover and return an error
+ defer func() {
+ if r := recover(); r != nil {
+ c.log.ErrorContext(ctx, "Recovered from panic in the autoupdate_agent_rollout controller", "panic", r)
+ err = trace.NewAggregate(err, trace.Errorf("Panic recovered during reconciliation: %v", r))
+ }
+ }()
+ err = trace.Wrap(c.reconciler.reconcile(ctx))
+ return
+}
diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rollout/reconciler.go
similarity index 95%
rename from lib/autoupdate/rolloutcontroller/reconciler.go
rename to lib/autoupdate/rollout/reconciler.go
index 78989c4ec4a6b..2fc04634c72f9 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler.go
+++ b/lib/autoupdate/rollout/reconciler.go
@@ -16,7 +16,7 @@
* along with this program. If not, see .
*/
-package rolloutcontroller
+package rollout
import (
"context"
@@ -37,11 +37,11 @@ const (
maxConflictRetry = 3
)
-// Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and
+// reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and
// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because:
// - we reconcile 2 resources with one
// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs
-type Reconciler struct {
+type reconciler struct {
clt Client
log *slog.Logger
@@ -49,9 +49,9 @@ type Reconciler struct {
mutex sync.Mutex
}
-// Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths
+// reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths
// are racing), in this case we retry the reconciliation immediately.
-func (r *Reconciler) Reconcile(ctx context.Context) error {
+func (r *reconciler) reconcile(ctx context.Context) error {
r.mutex.Lock()
defer r.mutex.Unlock()
@@ -88,7 +88,7 @@ func (r *Reconciler) Reconcile(ctx context.Context) error {
// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError
// if the resource change while we were computing it.
// The caller must handle those error and retry the reconciliation.
-func (r *Reconciler) tryReconcile(ctx context.Context) error {
+func (r *reconciler) tryReconcile(ctx context.Context) error {
// get autoupdate_config
var config *autoupdate.AutoUpdateConfig
if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil {
@@ -171,7 +171,7 @@ func (r *Reconciler) tryReconcile(ctx context.Context) error {
return trace.Wrap(err, "updating rollout")
}
-func (r *Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
+func (r *reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
// reconcile mode
mode, err := getMode(config.GetMode(), version.GetMode())
if err != nil {
diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rollout/reconciler_test.go
similarity index 96%
rename from lib/autoupdate/rolloutcontroller/reconciler_test.go
rename to lib/autoupdate/rollout/reconciler_test.go
index 340451d8da46d..4d24563f7b32f 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler_test.go
+++ b/lib/autoupdate/rollout/reconciler_test.go
@@ -16,7 +16,7 @@
* along with this program. If not, see .
*/
-package rolloutcontroller
+package rollout
import (
"context"
@@ -307,7 +307,7 @@ func TestTryReconcile(t *testing.T) {
// Test execution: Running the reconciliation
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
@@ -375,13 +375,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.NoError(t, reconciler.Reconcile(ctx))
+ require.NoError(t, reconciler.reconcile(ctx))
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -397,13 +397,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.NoError(t, reconciler.Reconcile(ctx))
+ require.NoError(t, reconciler.reconcile(ctx))
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -421,13 +421,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.NoError(t, reconciler.Reconcile(ctx))
+ require.NoError(t, reconciler.reconcile(ctx))
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -461,13 +461,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.NoError(t, reconciler.Reconcile(ctx))
+ require.NoError(t, reconciler.reconcile(ctx))
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -499,13 +499,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.NoError(t, reconciler.Reconcile(ctx))
+ require.NoError(t, reconciler.reconcile(ctx))
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -523,13 +523,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.ErrorContains(t, reconciler.Reconcile(ctx), "the DB fell on the floor")
+ require.ErrorContains(t, reconciler.reconcile(ctx), "the DB fell on the floor")
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
@@ -553,13 +553,13 @@ func TestReconciler_Reconcile(t *testing.T) {
}
client := newMockClient(t, stubs)
- reconciler := &Reconciler{
+ reconciler := &reconciler{
clt: client,
log: log,
}
// Test execution: run the reconciliation loop
- require.ErrorContains(t, reconciler.Reconcile(cancelableCtx), "canceled")
+ require.ErrorIs(t, reconciler.reconcile(cancelableCtx), context.Canceled)
// Test validation: check that all the expected calls were received
client.checkIfEmpty(t)
diff --git a/lib/service/service.go b/lib/service/service.go
index 3167fb2bb9af3..ef30805a0fa7d 100644
--- a/lib/service/service.go
+++ b/lib/service/service.go
@@ -98,6 +98,7 @@ import (
"github.com/gravitational/teleport/lib/auth/storage"
"github.com/gravitational/teleport/lib/authz"
"github.com/gravitational/teleport/lib/automaticupgrades"
+ "github.com/gravitational/teleport/lib/autoupdate/rollout"
"github.com/gravitational/teleport/lib/backend"
"github.com/gravitational/teleport/lib/backend/dynamo"
_ "github.com/gravitational/teleport/lib/backend/etcdbk"
@@ -2430,6 +2431,14 @@ func (process *TeleportProcess) initAuthService() error {
return trace.Wrap(spiffeFedSyncer.Run(process.GracefulExitContext()), "running SPIFFEFederation Syncer")
})
+ agentRolloutController, err := rollout.NewController(authServer, logger, process.Clock)
+ if err != nil {
+ return trace.Wrap(err, "creating the rollout controller")
+ }
+ process.RegisterFunc("auth.autoupdate_agent_rollout_controller", func() error {
+ return trace.Wrap(agentRolloutController.Run(process.GracefulExitContext()), "running autoupdate_agent_rollout controller")
+ })
+
process.RegisterFunc("auth.server_info", func() error {
return trace.Wrap(auth.ReconcileServerInfos(process.GracefulExitContext(), authServer))
})
diff --git a/lib/service/service_test.go b/lib/service/service_test.go
index a9308e0503a4d..0ff48c3055395 100644
--- a/lib/service/service_test.go
+++ b/lib/service/service_test.go
@@ -47,7 +47,9 @@ import (
"github.com/gravitational/teleport"
"github.com/gravitational/teleport/api/breaker"
+ autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types"
+ autoupdate "github.com/gravitational/teleport/api/types/autoupdate"
apiutils "github.com/gravitational/teleport/api/utils"
"github.com/gravitational/teleport/entitlements"
"github.com/gravitational/teleport/lib"
@@ -1845,3 +1847,76 @@ func TestInitDatabaseService(t *testing.T) {
})
}
}
+
+// TestAgentRolloutController validates that the agent rollout controller is started
+// when we run the Auth Service. It does so by creating a dummy autoupdate_version resource
+// and checking that the corresponding autoupdate_agent_rollout resource is created by the auth.
+// If you want to test the reconciliation logic, add tests to the rolloutcontroller package instead.
+func TestAgentRolloutController(t *testing.T) {
+ t.Parallel()
+
+ // Test setup: create a Teleport Auth config
+ fakeClock := clockwork.NewFakeClock()
+
+ dataDir := t.TempDir()
+
+ cfg := servicecfg.MakeDefaultConfig()
+ cfg.Clock = fakeClock
+ cfg.DataDir = dataDir
+ cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"})
+ cfg.Auth.Enabled = true
+ cfg.Proxy.Enabled = false
+ cfg.SSH.Enabled = false
+ cfg.DebugService.Enabled = false
+ cfg.Auth.StorageConfig.Params["path"] = dataDir
+ cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
+ cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig()
+
+ process, err := NewTeleport(cfg)
+ require.NoError(t, err)
+
+ // Test setup: start the Teleport auth and wait for it to beocme ready
+ require.NoError(t, process.Start())
+
+ // Test setup: wait for every service to start
+ ctx, cancel := context.WithTimeout(process.ExitContext(), 30*time.Second)
+ defer cancel()
+ for _, eventName := range []string{AuthTLSReady, InstanceReady} {
+ _, err := process.WaitForEvent(ctx, eventName)
+ require.NoError(t, err)
+ }
+
+ // Test cleanup: close the Teleport process and wait for every service to exist before returning.
+ // This ensures that a service will not make the test fail by writing a file to the temporary directory while it's
+ // being removed.
+ t.Cleanup(func() {
+ require.NoError(t, process.Close())
+ require.NoError(t, process.Wait())
+ })
+
+ // Test execution: create the autoupdate_version resource
+ authServer := process.GetAuthServer()
+ version, err := autoupdate.NewAutoUpdateVersion(&autoupdatepb.AutoUpdateVersionSpec{
+ Agents: &autoupdatepb.AutoUpdateVersionSpecAgents{
+ StartVersion: "1.2.3",
+ TargetVersion: "1.2.4",
+ Schedule: autoupdate.AgentsScheduleImmediate,
+ Mode: autoupdate.AgentsUpdateModeEnabled,
+ },
+ })
+ require.NoError(t, err)
+ version, err = authServer.CreateAutoUpdateVersion(ctx, version)
+ require.NoError(t, err)
+
+ // Test execution: advance clock to trigger a reconciliation
+ fakeClock.Advance(2 * time.Minute)
+
+ // Test validation: check that a new autoupdate_agent_rollout config was created
+ require.Eventually(t, func() bool {
+ rollout, err := authServer.GetAutoUpdateAgentRollout(ctx)
+ if err != nil {
+ return false
+ }
+ return rollout.Spec.GetTargetVersion() == version.Spec.GetAgents().GetTargetVersion()
+ }, time.Second, 10*time.Millisecond)
+}