Skip to content

Commit

Permalink
Fix to allow etcd-druid to patch STS when there is only a change of m…
Browse files Browse the repository at this point in the history
…ount-path even if all pods are not up and running (#915) (#918)
  • Loading branch information
unmarshall authored Nov 12, 2024
1 parent 5ee1464 commit 5305221
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 19 deletions.
6 changes: 6 additions & 0 deletions internal/common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,14 @@ const (
VolumeNameEtcdClientTLS = "etcd-client-tls"
// VolumeNameEtcdPeerCA is the name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for peer communication.
VolumeNameEtcdPeerCA = "etcd-peer-ca"
// OldVolumeNameEtcdPeerCA is the old name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for peer communication.
// TODO: (i062009) remove this when all clusters have started to use the new volume names.
OldVolumeNameEtcdPeerCA = "peer-url-ca-etcd"
// VolumeNameEtcdPeerServerTLS is the name of the volume that contains the server certificate-key pair used to set up the peer server.
VolumeNameEtcdPeerServerTLS = "etcd-peer-server-tls"
// OldVolumeNameEtcdPeerServerTLS is the old name of the volume that contains the server certificate-key pair used to set up the peer server.
// TODO: (i062009) remove this when all clusters have started to use the new volume names.
OldVolumeNameEtcdPeerServerTLS = "peer-url-etcd-server-tls"
// VolumeNameBackupRestoreCA is the name of the volume that contains the CA certificate bundle and CA certificate key used to sign certificates for backup-restore communication.
VolumeNameBackupRestoreCA = "backup-restore-ca"
// VolumeNameBackupRestoreServerTLS is the name of the volume that contains the server certificate-key pair used to set up the backup-restore server.
Expand Down
4 changes: 3 additions & 1 deletion internal/component/clientservice/clientservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ func buildResource(etcd *druidv1alpha1.Etcd, svc *corev1.Service) {
svc.OwnerReferences = []metav1.OwnerReference{druidv1alpha1.GetAsOwnerReference(etcd.ObjectMeta)}
svc.Spec.Type = corev1.ServiceTypeClusterIP
svc.Spec.SessionAffinity = corev1.ServiceAffinityNone
svc.Spec.Selector = druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta)
// Client service should only target StatefulSet pods. Default labels are going to be present on anything that is managed by etcd-druid and started for an etcd cluster.
// Therefore, only using default labels as label selector can cause issues as we have already seen in https://github.com/gardener/etcd-druid/issues/914
svc.Spec.Selector = utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
svc.Spec.Ports = getPorts(etcd)
}

Expand Down
3 changes: 2 additions & 1 deletion internal/component/clientservice/clientservice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ func matchClientService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Ser
expectedAnnotations = etcd.Spec.Etcd.ClientService.Annotations
expectedLabels = utils.MergeMaps(etcd.Spec.Etcd.ClientService.Labels, druidv1alpha1.GetDefaultLabels(etcdObjMeta))
}
expectedLabelSelector := utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcdObjMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})

g.Expect(actualSvc).To(MatchFields(IgnoreExtras, Fields{
"ObjectMeta": MatchFields(IgnoreExtras, Fields{
Expand All @@ -284,7 +285,7 @@ func matchClientService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Ser
"Spec": MatchFields(IgnoreExtras, Fields{
"Type": Equal(corev1.ServiceTypeClusterIP),
"SessionAffinity": Equal(corev1.ServiceAffinityNone),
"Selector": Equal(druidv1alpha1.GetDefaultLabels(etcdObjMeta)),
"Selector": Equal(expectedLabelSelector),
"Ports": ConsistOf(
Equal(corev1.ServicePort{
Name: "client",
Expand Down
4 changes: 3 additions & 1 deletion internal/component/peerservice/peerservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ func buildResource(etcd *druidv1alpha1.Etcd, svc *corev1.Service) {
svc.Spec.Type = corev1.ServiceTypeClusterIP
svc.Spec.ClusterIP = corev1.ClusterIPNone
svc.Spec.SessionAffinity = corev1.ServiceAffinityNone
svc.Spec.Selector = druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta)
// Peer service should only target StatefulSet pods. Default labels are going to be present on anything that is managed by etcd-druid and started for an etcd cluster.
// Therefore, only using default labels as label selector can cause issues as we have already seen in https://github.com/gardener/etcd-druid/issues/914
svc.Spec.Selector = utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcd.ObjectMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
svc.Spec.PublishNotReadyAddresses = true
svc.Spec.Ports = getPorts(etcd)
}
Expand Down
4 changes: 3 additions & 1 deletion internal/component/peerservice/peerservice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/gardener/etcd-druid/internal/common"
"github.com/gardener/etcd-druid/internal/component"
druiderr "github.com/gardener/etcd-druid/internal/errors"
"github.com/gardener/etcd-druid/internal/utils"
testutils "github.com/gardener/etcd-druid/test/utils"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -246,6 +247,7 @@ func newPeerService(etcd *druidv1alpha1.Etcd) *corev1.Service {
func matchPeerService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Service) {
peerPort := ptr.Deref(etcd.Spec.Etcd.ServerPort, common.DefaultPortEtcdPeer)
etcdObjMeta := etcd.ObjectMeta
expectedLabelSelector := utils.MergeMaps(druidv1alpha1.GetDefaultLabels(etcdObjMeta), map[string]string{druidv1alpha1.LabelComponentKey: common.ComponentNameStatefulSet})
g.Expect(actualSvc).To(MatchFields(IgnoreExtras, Fields{
"ObjectMeta": MatchFields(IgnoreExtras, Fields{
"Name": Equal(druidv1alpha1.GetPeerServiceName(etcdObjMeta)),
Expand All @@ -257,7 +259,7 @@ func matchPeerService(g *WithT, etcd *druidv1alpha1.Etcd, actualSvc corev1.Servi
"Type": Equal(corev1.ServiceTypeClusterIP),
"ClusterIP": Equal(corev1.ClusterIPNone),
"SessionAffinity": Equal(corev1.ServiceAffinityNone),
"Selector": Equal(druidv1alpha1.GetDefaultLabels(etcdObjMeta)),
"Selector": Equal(expectedLabelSelector),
"Ports": ConsistOf(
Equal(corev1.ServicePort{
Name: "peer",
Expand Down
26 changes: 16 additions & 10 deletions internal/component/statefulset/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,22 @@ func getEtcdContainerSecretVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.Volum
},
)
}
if etcd.Spec.Etcd.PeerUrlTLS != nil {
secretVolumeMounts = append(secretVolumeMounts, getEtcdContainerPeerVolumeMounts(etcd)...)
if etcd.Spec.Backup.TLS != nil {
secretVolumeMounts = append(secretVolumeMounts,
corev1.VolumeMount{
Name: common.VolumeNameBackupRestoreCA,
MountPath: common.VolumeMountPathBackupRestoreCA,
},
)
}
return secretVolumeMounts
}

func getEtcdContainerPeerVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.VolumeMount {
peerTLSVolMounts := make([]corev1.VolumeMount, 0, 2)
if etcd.Spec.Etcd.PeerUrlTLS != nil {
peerTLSVolMounts = append(peerTLSVolMounts,
corev1.VolumeMount{
Name: common.VolumeNameEtcdPeerCA,
MountPath: common.VolumeMountPathEtcdPeerCA,
Expand All @@ -720,15 +734,7 @@ func getEtcdContainerSecretVolumeMounts(etcd *druidv1alpha1.Etcd) []corev1.Volum
},
)
}
if etcd.Spec.Backup.TLS != nil {
secretVolumeMounts = append(secretVolumeMounts,
corev1.VolumeMount{
Name: common.VolumeNameBackupRestoreCA,
MountPath: common.VolumeMountPathBackupRestoreCA,
},
)
}
return secretVolumeMounts
return peerTLSVolMounts
}

// getPodVolumes gets volumes that needs to be mounted onto the etcd StatefulSet pods
Expand Down
13 changes: 10 additions & 3 deletions internal/component/statefulset/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ func (r _resource) handleTLSChanges(ctx component.OperatorContext, etcd *druidv1
// and the cluster will be stuck in a bad state. Updating peer URL is a cluster wide operation as all members will need to know that a peer TLS has changed.
// If not all members are ready then rolling-update of StatefulSet can potentially cause a healthy node to be restarted causing loss of quorum from which
// there will not be an automatic recovery.
if shouldRequeueForMultiNodeEtcdIfPodsNotReady(existingSts) {
if !r.hasTLSEnablementForPeerURLReflectedOnSTS(etcd, existingSts) && shouldRequeueForMultiNodeEtcdIfPodsNotReady(existingSts) {
return druiderr.New(
druiderr.ErrRequeueAfter,
component.OperationSync,
Expand Down Expand Up @@ -309,10 +309,17 @@ func shouldRequeueForMultiNodeEtcdIfPodsNotReady(sts *appsv1.StatefulSet) bool {
sts.Status.ReadyReplicas < *sts.Spec.Replicas
}

func isStatefulSetTLSConfigInSync(etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) bool {
func (r _resource) hasTLSEnablementForPeerURLReflectedOnSTS(etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) bool {
newEtcdWrapperPeerTLSVolMounts := getEtcdContainerPeerVolumeMounts(etcd)
containerPeerTLSVolMounts := utils.GetEtcdContainerPeerTLSVolumeMounts(existingSts)
r.logger.Info("Checking if peer URL TLS enablement is reflected on StatefulSet", "len(newEtcdWrapperPeerTLSVolMounts)", len(newEtcdWrapperPeerTLSVolMounts), "len(containerPeerTLSVolMounts)", len(containerPeerTLSVolMounts))
return len(containerPeerTLSVolMounts) == len(newEtcdWrapperPeerTLSVolMounts)
}

func isStatefulSetTLSConfigInSync(etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) bool {
newEtcdbrTLSVolMounts := getBackupRestoreContainerSecretVolumeMounts(etcd)
newEtcdWrapperTLSVolMounts := getEtcdContainerSecretVolumeMounts(etcd)
containerTLSVolMounts := utils.GetStatefulSetContainerTLSVolumeMounts(sts)
containerTLSVolMounts := utils.GetStatefulSetContainerTLSVolumeMounts(existingSts)
return !hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcd], newEtcdWrapperTLSVolMounts) &&
!hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcdBackupRestore], newEtcdbrTLSVolMounts)
}
Expand Down
38 changes: 36 additions & 2 deletions internal/utils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,47 @@ func FetchPVCWarningMessagesForStatefulSet(ctx context.Context, cl client.Client
}

var (
etcdTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdCA, common.VolumeNameEtcdServerTLS, common.VolumeNameEtcdClientTLS, common.VolumeNameEtcdPeerCA, common.VolumeNameEtcdPeerServerTLS, common.VolumeNameBackupRestoreCA)
etcdbrTLSVolumeMountNames = sets.New[string](common.VolumeNameBackupRestoreServerTLS, common.VolumeNameEtcdCA, common.VolumeNameEtcdClientTLS)
etcdTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdCA,
common.VolumeNameEtcdServerTLS,
common.VolumeNameEtcdClientTLS,
common.VolumeNameEtcdPeerCA,
common.VolumeNameEtcdPeerServerTLS,
common.VolumeNameBackupRestoreCA)
possiblePeerTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdPeerCA,
common.OldVolumeNameEtcdPeerCA,
common.VolumeNameEtcdPeerServerTLS,
common.OldVolumeNameEtcdPeerServerTLS)
etcdbrTLSVolumeMountNames = sets.New[string](common.VolumeNameBackupRestoreServerTLS,
common.VolumeNameEtcdCA,
common.VolumeNameEtcdClientTLS)
)

// GetEtcdContainerPeerTLSVolumeMounts returns the volume mounts for the etcd container that are related to peer TLS.
// It will look at both older names (present in version <= v0.22) and new names (present in version >= v0.23) to create the slice.
func GetEtcdContainerPeerTLSVolumeMounts(sts *appsv1.StatefulSet) []corev1.VolumeMount {
volumeMounts := make([]corev1.VolumeMount, 0, 2)
if sts == nil {
return volumeMounts
}
for _, container := range sts.Spec.Template.Spec.Containers {
if container.Name == common.ContainerNameEtcd {
for _, volMount := range container.VolumeMounts {
if possiblePeerTLSVolumeMountNames.Has(volMount.Name) {
volumeMounts = append(volumeMounts, volMount)
}
}
break
}
}
return volumeMounts
}

// GetStatefulSetContainerTLSVolumeMounts returns a map of container name to TLS volume mounts for the given StatefulSet.
func GetStatefulSetContainerTLSVolumeMounts(sts *appsv1.StatefulSet) map[string][]corev1.VolumeMount {
containerVolMounts := make(map[string][]corev1.VolumeMount, 2) // each pod is a 2 container pod. Init containers are not counted as containers.
if sts == nil {
return containerVolMounts
}
for _, container := range sts.Spec.Template.Spec.Containers {
if _, ok := containerVolMounts[container.Name]; !ok {
// Assuming 6 volume mounts per container. If there are more in future then this map's capacity will be increased by the golang runtime.
Expand Down
128 changes: 128 additions & 0 deletions internal/utils/statefulset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"testing"

"github.com/gardener/etcd-druid/internal/client/kubernetes"
"github.com/gardener/etcd-druid/internal/common"
testutils "github.com/gardener/etcd-druid/test/utils"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -270,3 +271,130 @@ func TestFetchPVCWarningMessagesForStatefulSet(t *testing.T) {
})
}
}

func TestGetEtcdContainerPeerTLSVolumeMounts(t *testing.T) {
testCases := []struct {
name string
isSTSNil bool
oldVolMountNames bool
expectedVolumeMounts []corev1.VolumeMount
}{
{
name: "sts is nil",
isSTSNil: true,
expectedVolumeMounts: []corev1.VolumeMount{},
},
{
name: "sts with old volume mount names",
oldVolMountNames: true,
expectedVolumeMounts: []corev1.VolumeMount{
{Name: common.OldVolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.OldVolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
},
{
name: "sts with new volume mount names",
expectedVolumeMounts: []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
},
}
g := NewWithT(t)
t.Parallel()
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
var sts *appsv1.StatefulSet
if tc.isSTSNil {
g.Expect(GetEtcdContainerPeerTLSVolumeMounts(sts)).To(HaveLen(0))
} else {
sts = testutils.CreateStatefulSet("test-sts", "test-ns", uuid.NewUUID(), 3)
sts.Spec.Template.Spec.Containers = append(sts.Spec.Template.Spec.Containers, corev1.Container{Name: common.ContainerNameEtcd})
if tc.oldVolMountNames {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.OldVolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.OldVolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
} else {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
}
g.Expect(GetEtcdContainerPeerTLSVolumeMounts(sts)).To(Equal(tc.expectedVolumeMounts))
}
})
}
}

func TestGetStatefulSetContainerTLSVolumeMounts(t *testing.T) {
testCases := []struct {
name string
isSTSNil bool
peerTLSEnabled bool
expectedVolumeMounts map[string][]corev1.VolumeMount
}{
{
name: "sts is nil",
isSTSNil: true,
expectedVolumeMounts: map[string][]corev1.VolumeMount{},
},
{
name: "sts with peer TLS enabled",
peerTLSEnabled: true,
expectedVolumeMounts: map[string][]corev1.VolumeMount{
common.ContainerNameEtcd: {
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
},
common.ContainerNameEtcdBackupRestore: {
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
},
},
},
{
name: "sts with peer TLS disabled",
peerTLSEnabled: false,
expectedVolumeMounts: map[string][]corev1.VolumeMount{
common.ContainerNameEtcd: {},
common.ContainerNameEtcdBackupRestore: {
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
},
},
},
}
g := NewWithT(t)
t.Parallel()
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
var sts *appsv1.StatefulSet
if tc.isSTSNil {
g.Expect(GetStatefulSetContainerTLSVolumeMounts(sts)).To(HaveLen(0))
} else {
sts = testutils.CreateStatefulSet("test-sts", "test-ns", uuid.NewUUID(), 3)
sts.Spec.Template.Spec.Containers = []corev1.Container{
{Name: common.ContainerNameEtcd},
{Name: common.ContainerNameEtcdBackupRestore},
}
if tc.peerTLSEnabled {
sts.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameEtcdPeerCA, MountPath: common.VolumeMountPathEtcdPeerCA},
{Name: common.VolumeNameEtcdPeerServerTLS, MountPath: common.VolumeMountPathEtcdPeerServerTLS},
}
}
sts.Spec.Template.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{
{Name: common.VolumeNameBackupRestoreServerTLS, MountPath: common.VolumeMountPathBackupRestoreServerTLS},
{Name: common.VolumeNameEtcdCA, MountPath: common.VolumeMountPathEtcdCA},
{Name: common.VolumeNameEtcdClientTLS, MountPath: common.VolumeMountPathEtcdClientTLS},
}
g.Expect(GetStatefulSetContainerTLSVolumeMounts(sts)).To(Equal(tc.expectedVolumeMounts))
}
})
}
}

0 comments on commit 5305221

Please sign in to comment.