Skip to content

Commit

Permalink
Attempt to deflake TestDynamicClientReuse (#49079)
Browse files Browse the repository at this point in the history
The test was only waiting for a subset of services to be ready,
proceeding with the test, and then closing the process. This caused
a few problems that contributed to the flakiness. First, not calling
`process.Wait` resulted in some services still being active and
writing to the data directory while the testing framework was cleaning
up the temp directory. Second, adding the Wait alone, would cause
deadlocks because the test did not wait for all services to be
initialized and ready before shutting down.

In addition to making both of th changes above, the test was also
modified to reduce the number of services being launched to slightly
speed up the test.

Closes #46958.
  • Loading branch information
rosstimothy authored Nov 15, 2024
1 parent 1744dba commit 63d4c9e
Showing 1 changed file with 23 additions and 5 deletions.
28 changes: 23 additions & 5 deletions lib/service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import (
"github.com/gravitational/teleport/lib/integrations/externalauditstorage"
"github.com/gravitational/teleport/lib/limiter"
"github.com/gravitational/teleport/lib/modules"
"github.com/gravitational/teleport/lib/multiplexer"
"github.com/gravitational/teleport/lib/reversetunnelclient"
"github.com/gravitational/teleport/lib/service/servicecfg"
"github.com/gravitational/teleport/lib/services"
Expand Down Expand Up @@ -191,24 +192,39 @@ func TestDynamicClientReuse(t *testing.T) {

cfg := servicecfg.MakeDefaultConfig()
cfg.Clock = fakeClock
var err error
cfg.DataDir = t.TempDir()
cfg.DiagnosticAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"})
cfg.Auth.Enabled = true
cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}
cfg.Auth.SessionRecordingConfig.SetMode(types.RecordOff)
cfg.Auth.NoAudit = true
cfg.Proxy.Enabled = true
cfg.Proxy.DisableDatabaseProxy = true
cfg.Proxy.DisableWebInterface = true
cfg.Proxy.DisableReverseTunnel = true
cfg.Proxy.IdP.SAMLIdP.Enabled = false
cfg.Proxy.PROXYProtocolMode = multiplexer.PROXYProtocolOff
cfg.Proxy.WebAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "localhost:0"}
cfg.SSH.Enabled = false
cfg.DebugService.Enabled = false
cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig()

process, err := NewTeleport(cfg)
require.NoError(t, err)

require.NoError(t, process.Start())
t.Cleanup(func() { require.NoError(t, process.Close()) })

ctx, cancel := context.WithTimeout(process.ExitContext(), 30*time.Second)
defer cancel()
for _, eventName := range []string{AuthTLSReady, ProxySSHReady, ProxyWebServerReady, InstanceReady} {
_, err := process.WaitForEvent(ctx, eventName)
require.NoError(t, err)
}

t.Cleanup(func() {
require.NoError(t, process.Close())
require.NoError(t, process.Wait())
})

// wait for instance connector
iconn, err := process.WaitForConnector(InstanceIdentityEvent, process.logger)
Expand Down Expand Up @@ -236,17 +252,19 @@ func TestDynamicClientReuse(t *testing.T) {
// initial static set of system roles that got applied to the instance cert.
require.NotSame(t, iconn.Client, nconn.Client)

nconn.Close()
require.NoError(t, nconn.Close())

// node connector closure should not affect proxy client
_, err = pconn.Client.Ping(context.Background())
require.NoError(t, err)

pconn.Close()
require.NoError(t, pconn.Close())

// proxy connector closure should not affect instance client
_, err = iconn.Client.Ping(context.Background())
require.NoError(t, err)

require.NoError(t, iconn.Close())
}

func TestMonitor(t *testing.T) {
Expand Down

0 comments on commit 63d4c9e

Please sign in to comment.