From 99c426dffa4f06dd3efe00cab1255cd4808a24e4 Mon Sep 17 00:00:00 2001 From: Andrew Burke <31974658+atburke@users.noreply.github.com> Date: Wed, 12 Jun 2024 12:04:16 -0700 Subject: [PATCH] Fix GCP metadata errors crashing Teleport (#42871) --- lib/cloud/gcp/vm_test.go | 3 +- lib/cloud/imds/gcp/imds.go | 25 +++++--- lib/cloud/imds/gcp/imds_test.go | 47 ++++++++++++-- lib/service/service.go | 20 ++++-- lib/service/service_test.go | 109 ++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 22 deletions(-) diff --git a/lib/cloud/gcp/vm_test.go b/lib/cloud/gcp/vm_test.go index 77cc248f84ba7..8bfbecd08d994 100644 --- a/lib/cloud/gcp/vm_test.go +++ b/lib/cloud/gcp/vm_test.go @@ -73,7 +73,8 @@ func TestConvertAPIError(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - require.True(t, trace.IsAccessDenied(convertAPIError(tc.err))) + err := convertAPIError(tc.err) + require.True(t, trace.IsAccessDenied(err), "unexpected error of type %T: %v", tc.err, err) require.Contains(t, tc.err.Error(), "abcd1234") }) } diff --git a/lib/cloud/imds/gcp/imds.go b/lib/cloud/imds/gcp/imds.go index 0830ab3938a9c..49525dc533794 100644 --- a/lib/cloud/imds/gcp/imds.go +++ b/lib/cloud/imds/gcp/imds.go @@ -76,12 +76,23 @@ func NewInstanceMetadataClient(ctx context.Context) (*InstanceMetadataClient, er // IsAvailable checks if instance metadata is available. func (client *InstanceMetadataClient) IsAvailable(ctx context.Context) bool { - instanceData, err := client.getMetadata(ctx, "instance") - return err == nil && instanceData != "" + _, err := client.getNumericID(ctx) + return err == nil } -// GetTags gets all of the GCP instance's labels (note: these are separate from -// its tags, which we do not use). +func (client *InstanceMetadataClient) getNumericID(ctx context.Context) (uint64, error) { + idStr, err := client.GetID(ctx) + if err != nil { + return 0, trace.Wrap(err) + } + id, err := strconv.ParseUint(idStr, 10, 64) + if err != nil || id == 0 { + return 0, trace.BadParameter("Invalid instance ID %q", idStr) + } + return id, nil +} + +// GetTags gets all of the GCP instance's labels and tags. func (client *InstanceMetadataClient) GetTags(ctx context.Context) (map[string]string, error) { // Get a bunch of info from instance metadata. projectID, err := client.GetProjectID(ctx) @@ -96,11 +107,7 @@ func (client *InstanceMetadataClient) GetTags(ctx context.Context) (map[string]s if err != nil { return nil, trace.Wrap(err) } - idStr, err := client.GetID(ctx) - if err != nil { - return nil, trace.Wrap(err) - } - id, err := strconv.ParseUint(idStr, 10, 64) + id, err := client.getNumericID(ctx) if err != nil { return nil, trace.Wrap(err) } diff --git a/lib/cloud/imds/gcp/imds_test.go b/lib/cloud/imds/gcp/imds_test.go index 10ee2c1a2f5f6..ab33e44b4ddec 100644 --- a/lib/cloud/imds/gcp/imds_test.go +++ b/lib/cloud/imds/gcp/imds_test.go @@ -58,16 +58,51 @@ func (m *mockInstanceGetter) GetInstanceTags(ctx context.Context, req *gcp.Insta func TestIsInstanceMetadataAvailable(t *testing.T) { t.Parallel() - t.Run("not available", func(t *testing.T) { - client := &InstanceMetadataClient{ + tests := []struct { + name string + getMetadata metadataGetter + assert require.BoolAssertionFunc + }{ + { + name: "not available", getMetadata: func(ctx context.Context, path string) (string, error) { return "", trace.NotFound("") }, - } - require.False(t, client.IsAvailable(context.Background())) - }) + assert: require.False, + }, + { + name: "not on gcp", + getMetadata: func(ctx context.Context, path string) (string, error) { + return "non-numeric id", nil + }, + assert: require.False, + }, + { + name: "zero ID", + getMetadata: func(ctx context.Context, path string) (string, error) { + return "0", nil + }, + assert: require.False, + }, + { + name: "on mocked gcp", + getMetadata: func(ctx context.Context, path string) (string, error) { + return "12345678", nil + }, + assert: require.True, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + client := &InstanceMetadataClient{ + getMetadata: tc.getMetadata, + } + tc.assert(t, client.IsAvailable(context.Background())) + }) + } - t.Run("on gcp", func(t *testing.T) { + t.Run("on real gcp", func(t *testing.T) { if os.Getenv("TELEPORT_TEST_GCP") == "" { t.Skip("not on gcp") } diff --git a/lib/service/service.go b/lib/service/service.go index d708dbdbd5549..cfbe4fa97ba21 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -967,8 +967,14 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) { imClient := cfg.InstanceMetadataClient if imClient == nil { imClient, err = cloud.DiscoverInstanceMetadata(supervisor.ExitContext()) - if err != nil && !trace.IsNotFound(err) { - return nil, trace.Wrap(err) + if err == nil { + cfg.Logger.InfoContext(supervisor.ExitContext(), + "Found an instance metadata service. Teleport will import labels from this cloud instance.", + "type", imClient.GetType()) + } else if !trace.IsNotFound(err) { + cfg.Logger.ErrorContext(supervisor.ExitContext(), "Error looking for cloud instance metadata", "error", err) + // Keep going. Not being able to fetch labels isn't necessarily an error (e.g. the user doesn't need imported + // labels and hasn't configured their cloud instance for it). } } @@ -977,15 +983,16 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) { if err == nil { cloudHostname = strings.ReplaceAll(cloudHostname, " ", "_") if utils.IsValidHostname(cloudHostname) { - cfg.Logger.InfoContext(supervisor.ExitContext(), "Overriding hostname with value from cloud tag TeleportHostname.", "hostname", cloudHostname) + cfg.Logger.InfoContext(supervisor.ExitContext(), "Overriding hostname with value from cloud tag TeleportHostname", "hostname", cloudHostname) cfg.Hostname = cloudHostname // cloudHostname exists but is not a valid hostname. } else if cloudHostname != "" { - cfg.Logger.InfoContext(supervisor.ExitContext(), "Found invalid hostname in cloud tag TeleportHostname.", "hostname", cloudHostname) + cfg.Logger.InfoContext(supervisor.ExitContext(), "Found invalid hostname in cloud tag TeleportHostname", "hostname", cloudHostname) } } else if !trace.IsNotFound(err) { - return nil, trace.Wrap(err) + cfg.Logger.ErrorContext(supervisor.ExitContext(), "Error looking for hostname tag", "error", err) + // Keep going. } cloudLabels, err = labels.NewCloudImporter(supervisor.ExitContext(), &labels.CloudConfig{ @@ -993,7 +1000,8 @@ func NewTeleport(cfg *servicecfg.Config) (*TeleportProcess, error) { Clock: cfg.Clock, }) if err != nil { - return nil, trace.Wrap(err) + cfg.Logger.ErrorContext(supervisor.ExitContext(), "Cloud labels will not be imported", "error", err) + // Keep going. } } diff --git a/lib/service/service_test.go b/lib/service/service_test.go index 4c928e553f7e2..981ca212cf528 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -1619,3 +1619,112 @@ func TestDebugServiceStartSocket(t *testing.T) { defer req.Body.Close() require.Equal(t, http.StatusNotFound, req.StatusCode) } + +type mockInstanceMetadata struct { + hostname string + hostnameErr error +} + +func (m *mockInstanceMetadata) IsAvailable(ctx context.Context) bool { + return true +} + +func (m *mockInstanceMetadata) GetTags(ctx context.Context) (map[string]string, error) { + return nil, nil +} + +func (m *mockInstanceMetadata) GetHostname(ctx context.Context) (string, error) { + return m.hostname, m.hostnameErr +} + +func (m *mockInstanceMetadata) GetType() types.InstanceMetadataType { + return "mock" +} + +func (m *mockInstanceMetadata) GetID(ctx context.Context) (string, error) { + return "", nil +} + +func TestInstanceMetadata(t *testing.T) { + t.Parallel() + + newCfg := func() *servicecfg.Config { + cfg := servicecfg.MakeDefaultConfig() + cfg.Hostname = "default.example.com" + cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}) + cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} + cfg.Proxy.Enabled = false + cfg.SSH.Enabled = false + cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig() + return cfg + } + + tests := []struct { + name string + imClient imds.Client + expectCloudLabels bool + expectedHostname string + }{ + { + name: "no instance metadata", + imClient: imds.NewDisabledIMDSClient(), + expectCloudLabels: false, + expectedHostname: "default.example.com", + }, + { + name: "instance metadata with valid hostname", + imClient: &mockInstanceMetadata{ + hostname: "new.example.com", + }, + expectCloudLabels: true, + expectedHostname: "new.example.com", + }, + { + name: "instance metadata with no hostname", + imClient: &mockInstanceMetadata{ + hostnameErr: trace.NotFound(""), + }, + expectCloudLabels: true, + expectedHostname: "default.example.com", + }, + { + name: "instance metadata with invalid hostname", + imClient: &mockInstanceMetadata{ + hostname: ")7%#(*&@())", + }, + expectCloudLabels: true, + expectedHostname: "default.example.com", + }, + { + name: "instance metadata with hostname error", + imClient: &mockInstanceMetadata{ + hostnameErr: trace.Errorf(""), + }, + expectCloudLabels: true, + expectedHostname: "default.example.com", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cfg := newCfg() + cfg.DataDir = t.TempDir() + cfg.Auth.StorageConfig.Params["path"] = t.TempDir() + cfg.InstanceMetadataClient = tc.imClient + + process, err := NewTeleport(cfg) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, process.Close()) + }) + + if tc.expectCloudLabels { + require.NotNil(t, process.cloudLabels) + } else { + require.Nil(t, process.cloudLabels) + } + + require.Equal(t, tc.expectedHostname, cfg.Hostname) + }) + } +}