Skip to content

Commit

Permalink
feat: Drop the hostname requirement when handling PV topology for rel…
Browse files Browse the repository at this point in the history
…ease-v0.32.x (#1018) (#1081)

Co-authored-by: Jason Deal <jmdeal@amazon.com>
  • Loading branch information
jonathan-innis and jmdeal authored Mar 11, 2024
1 parent da1016b commit 8861740
Show file tree
Hide file tree
Showing 7 changed files with 417 additions and 1 deletion.
9 changes: 9 additions & 0 deletions hack/toolchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ kubebuilder() {
fi
ln -sf $(setup-envtest use -p path "${K8S_VERSION}" --arch="${arch}" --bin-dir="${KUBEBUILDER_ASSETS}")/* ${KUBEBUILDER_ASSETS}
find $KUBEBUILDER_ASSETS

# Install latest binaries for 1.25.x (contains CEL fix)
if [[ "${K8S_VERSION}" = "1.25.x" ]] && [[ "$OSTYPE" == "linux"* ]]; then
for binary in 'kube-apiserver' 'kubectl'; do
rm $KUBEBUILDER_ASSETS/$binary
wget -P $KUBEBUILDER_ASSETS dl.k8s.io/v1.25.16/bin/linux/${arch}/${binary}
chmod +x $KUBEBUILDER_ASSETS/$binary
done
fi
}

main "$@"
100 changes: 100 additions & 0 deletions pkg/controllers/disruption/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,106 @@ var _ = AfterEach(func() {
ExpectCleanedUp(ctx, env.Client)
})

var _ = Describe("Simulate Scheduling", func() {
var nodePool *v1beta1.NodePool
BeforeEach(func() {
nodePool = test.NodePool()
})
It("can replace node with a local PV (ignoring hostname affinity)", func() {
nodeClaim, node := test.NodeClaimAndNode(v1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: mostExpensiveInstance.Name,
v1beta1.CapacityTypeLabelKey: mostExpensiveOffering.CapacityType,
v1.LabelTopologyZone: mostExpensiveOffering.Zone,
},
},
Status: v1beta1.NodeClaimStatus{
Allocatable: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("32"),
v1.ResourcePods: resource.MustParse("100"),
},
},
})
labels := map[string]string{
"app": "test",
}
// create our RS so we can link a pod to it
ss := test.StatefulSet()
ExpectApplied(ctx, env.Client, ss)

// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass := test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
persistentVolume := test.PersistentVolume(test.PersistentVolumeOptions{UseLocal: true})
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
// This PV is only valid for use against this node
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{node.Name},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
pod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{Labels: labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "StatefulSet",
Name: ss.Name,
UID: ss.UID,
Controller: ptr.Bool(true),
BlockOwnerDeletion: ptr.Bool(true),
},
},
},
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectApplied(ctx, env.Client, ss, pod, nodeClaim, node, nodePool, storageClass, persistentVolume, persistentVolumeClaim)

// bind pods to node
ExpectManualBinding(ctx, env.Client, pod, node)

// inform cluster state about nodes and nodeclaims
ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*v1.Node{node}, []*v1beta1.NodeClaim{nodeClaim})

// disruption won't delete the old node until the new node is ready
var wg sync.WaitGroup
ExpectTriggerVerifyAction(&wg)
ExpectMakeNewNodeClaimsReady(ctx, env.Client, &wg, cluster, cloudProvider, 1)
ExpectReconcileSucceeded(ctx, disruptionController, types.NamespacedName{})
wg.Wait()

// Cascade any deletion of the nodeClaim to the node
ExpectNodeClaimsCascadeDeletion(ctx, env.Client, nodeClaim)

// Expect that the new nodeClaim was created, and it's different than the original
// We should succeed in getting a replacement, since we assume that the node affinity requirement will be invalid
// once we spin-down the old node
ExpectNotFound(ctx, env.Client, nodeClaim, node)
nodeclaims := ExpectNodeClaims(ctx, env.Client)
nodes := ExpectNodes(ctx, env.Client)
Expect(nodeclaims).To(HaveLen(1))
Expect(nodes).To(HaveLen(1))
Expect(nodeclaims[0].Name).ToNot(Equal(nodeClaim.Name))
Expect(nodes[0].Name).ToNot(Equal(node.Name))
})
})

var _ = Describe("Disruption Taints", func() {
var provisioner *v1alpha5.Provisioner
var machine *v1alpha5.Machine
Expand Down
111 changes: 111 additions & 0 deletions pkg/controllers/provisioning/nodepool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,117 @@ var _ = Describe("NodePool/Provisioning", func() {
node := ExpectScheduled(ctx, env.Client, pod)
Expect(node.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-3"))
})
DescribeTable("should ignore hostname affinity scheduling when using local path volumes",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
// Create a PersistentVolume that is using a random node name for its affinity
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that we are still able to schedule this pod to a node, even though we had a hostname affinity on it
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
DescribeTable("should ignore hostname affinity scheduling when using local path volumes (ephemeral volume)",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
pod := test.UnschedulablePod(test.PodOptions{
EphemeralVolumeTemplates: []test.EphemeralVolumeTemplateOptions{
{
StorageClassName: &storageClass.Name,
},
},
})
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", pod.Name, pod.Spec.Volumes[0].Name),
},
VolumeName: persistentVolume.Name,
StorageClassName: &storageClass.Name,
})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, pod, persistentVolumeClaim, persistentVolume)
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
It("should not ignore hostname affinity when using non-local path volumes", func() {
// This PersistentVolume is going to use a standard CSI volume for provisioning
persistentVolume := test.PersistentVolume()
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.NodePool(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that this pod can't schedule because we have a hostname affinity, and we don't currently have a pod that we can schedule to
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should not schedule if volume zones are incompatible", func() {
persistentVolume := test.PersistentVolume(test.PersistentVolumeOptions{Zones: []string{"test-zone-3"}})
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
Expand Down
111 changes: 111 additions & 0 deletions pkg/controllers/provisioning/provisioner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,117 @@ var _ = Describe("Provisioner/Provisioning", func() {
node := ExpectScheduled(ctx, env.Client, pod)
Expect(node.Labels).To(HaveKeyWithValue(v1.LabelTopologyZone, "test-zone-3"))
})
DescribeTable("should ignore hostname affinity scheduling when using local path volumes",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
// Create a PersistentVolume that is using a random node name for its affinity
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.Provisioner(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that we are still able to schedule this pod to a node, even though we had a hostname affinity on it
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
DescribeTable("should ignore hostname affinity scheduling when using local path volumes (ephemeral volume)",
func(volumeOptions test.PersistentVolumeOptions) {
// StorageClass that references "no-provisioner" and is used for local volume storage
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "local-path",
},
Provisioner: lo.ToPtr("kubernetes.io/no-provisioner"),
})
pod := test.UnschedulablePod(test.PodOptions{
EphemeralVolumeTemplates: []test.EphemeralVolumeTemplateOptions{
{
StorageClassName: &storageClass.Name,
},
},
})
persistentVolume := test.PersistentVolume(volumeOptions)
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", pod.Name, pod.Spec.Volumes[0].Name),
},
VolumeName: persistentVolume.Name,
StorageClassName: &storageClass.Name,
})
ExpectApplied(ctx, env.Client, test.Provisioner(), storageClass, pod, persistentVolumeClaim, persistentVolume)
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
},
Entry("when using local volumes", test.PersistentVolumeOptions{UseLocal: true}),
Entry("when using hostpath volumes", test.PersistentVolumeOptions{UseHostPath: true}),
)
It("should not ignore hostname affinity when using non-local path volumes", func() {
// This PersistentVolume is going to use a standard CSI volume for provisioning
persistentVolume := test.PersistentVolume()
persistentVolume.Spec.NodeAffinity = &v1.VolumeNodeAffinity{
Required: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: v1.LabelHostname,
Operator: v1.NodeSelectorOpIn,
Values: []string{test.RandomName()},
},
},
},
},
},
}
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
ExpectApplied(ctx, env.Client, test.Provisioner(), storageClass, persistentVolumeClaim, persistentVolume)
pod := test.UnschedulablePod(test.PodOptions{
PersistentVolumeClaims: []string{persistentVolumeClaim.Name},
})
ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod)
// Expect that this pod can't schedule because we have a hostname affinity, and we don't currently have a pod that we can schedule to
ExpectNotScheduled(ctx, env.Client, pod)
})
It("should not schedule if volume zones are incompatible", func() {
persistentVolume := test.PersistentVolume(test.PersistentVolumeOptions{Zones: []string{"test-zone-3"}})
persistentVolumeClaim := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{VolumeName: persistentVolume.Name, StorageClassName: &storageClass.Name})
Expand Down
9 changes: 8 additions & 1 deletion pkg/controllers/provisioning/scheduling/volumetopology.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,14 @@ func (v *VolumeTopology) getPersistentVolumeRequirements(ctx context.Context, po
var requirements []v1.NodeSelectorRequirement
if len(pv.Spec.NodeAffinity.Required.NodeSelectorTerms) > 0 {
// Terms are ORed, only use the first term
requirements = append(requirements, pv.Spec.NodeAffinity.Required.NodeSelectorTerms[0].MatchExpressions...)
requirements = pv.Spec.NodeAffinity.Required.NodeSelectorTerms[0].MatchExpressions
// If we are using a Local volume or a HostPath volume, then we should ignore the Hostname affinity
// on it because re-scheduling this pod to a new node means not using the same Hostname affinity that we currently have
if pv.Spec.Local != nil || pv.Spec.HostPath != nil {
requirements = lo.Reject(requirements, func(n v1.NodeSelectorRequirement, _ int) bool {
return n.Key == v1.LabelHostname
})
}
}
return requirements, nil
}
Expand Down
Loading

0 comments on commit 8861740

Please sign in to comment.