Skip to content

Commit

Permalink
Get rid of node condition delinquent.
Browse files Browse the repository at this point in the history
Signed-off-by: James Munson <james.munson@suse.com>
  • Loading branch information
james-munson committed Jul 16, 2024
1 parent d74e3a2 commit 17aae5a
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 65 deletions.
2 changes: 0 additions & 2 deletions controller/engine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -622,8 +622,6 @@ func (ec *EngineController) DeleteInstance(obj interface{}) (err error) {
}
}

// TODO: I think we should not put this logic here.
// We should still try to delete but ignore the error
// If the node is unreachable, don't bother with the successive timeouts we would spend attempting to contact
// its client proxy to delete the engine.
if isRWXVolume {
Expand Down
22 changes: 2 additions & 20 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,26 +519,8 @@ func (nc *NodeController) syncNode(key string) (err error) {
return nil
}

// Getting here is enough proof of life to clear Lease-based delinquency.
// The node clears itself here; setting it is done by share-manager lease checker.
foundCondition := false
isDelinquent := false
for _, nodeCondition := range node.Status.Conditions {
if nodeCondition.Type == longhorn.NodeConditionTypeDelinquent {
foundCondition = true
if nodeCondition.Status == longhorn.ConditionStatusTrue {
isDelinquent = true
}
}
}
if !foundCondition || isDelinquent {
node.Status.Conditions = types.SetCondition(node.Status.Conditions,
longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse,
"", fmt.Sprintf("Node %v clears delinquency", node.Name))
log.Infof("Node %v clears its delinquency condition.", node.Name)
}

// Likewise for the suppressed webhook endpoint.
// Getting here is enough proof of life to Turn on the webhook endpoint,
// if it has been turned off for RWX failover.
if err := nc.ds.AddLabelToManagerPod(node.Name, types.GetAdmissionWebhookLabel()); err != nil {
log.Warnf("Node %v faied to restore its admission webhook", node.Name)

Check failure on line 525 in controller/node_controller.go

View workflow job for this annotation

GitHub Actions / codespell

faied ==> failed, fade
}
Expand Down
10 changes: 0 additions & 10 deletions controller/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ func (s *NodeControllerSuite) TestManagerPodUp(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -271,7 +270,6 @@ func (s *NodeControllerSuite) TestManagerPodDown(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonManagerPodDown),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonNoMountPropagationSupport),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -359,7 +357,6 @@ func (s *NodeControllerSuite) TestKubeNodeDown(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKubernetesNodeNotReady),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -447,7 +444,6 @@ func (s *NodeControllerSuite) TestKubeNodePressure(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKubernetesNodePressure),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -570,7 +566,6 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -722,7 +717,6 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -880,7 +874,6 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -1009,7 +1002,6 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -1155,7 +1147,6 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down Expand Up @@ -1271,7 +1262,6 @@ func (s *NodeControllerSuite) TestCleanupAllInstanceManagers(c *C) {
Conditions: []longhorn.Condition{
newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusFalse, ""),
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
Expand Down
16 changes: 0 additions & 16 deletions controller/share_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1500,22 +1500,6 @@ func (c *ShareManagerController) isResponsibleFor(sm *longhorn.ShareManager) (bo

// This code should be moved from here to the sync loop, but it requires care.
// The first naive attempt caused repeated calls to cleanup and delete/create.
// Mark expired lease holder node as delinquent - best effort.
node, err := c.ds.GetNode(leaseHolder)
if err != nil {
log.WithError(err).Warnf("Failed to get node to set delinquent condition for %v", leaseHolder)
} else {
node.Status.Conditions = types.SetConditionAndRecord(node.Status.Conditions,
longhorn.NodeConditionTypeDelinquent, longhorn.ConditionStatusTrue,
string(longhorn.NodeConditionReasonMissedLeaseRenewal),
fmt.Sprintf("Node %v marks %v as delinquent", c.controllerID, leaseHolder),
c.eventRecorder, node, corev1.EventTypeWarning)

if _, err := c.ds.UpdateNodeStatus(node); err != nil {
log.WithError(err).Warnf("Failed to update node to set delinquent condition for %v", leaseHolder)
}
}

if err := c.markShareManagerDelinquent(sm); err != nil {
log.WithError(err).Warnf("Failed to update leease to set delinquent condition for %v", sm.Name)
}
Expand Down
17 changes: 1 addition & 16 deletions datastore/longhorn.go
Original file line number Diff line number Diff line change
Expand Up @@ -2936,23 +2936,8 @@ func (s *DataStore) IsNodeDelinquent(nodeName string, volumeName string) (bool,
return false, errors.New("no node name provided to IsNodeDelinquent")
}

/*
node, err := s.GetNodeRO(nodeName)
if err != nil {
if apierrors.IsNotFound(err) {
return true, nil
}
return false, err
}
cond := types.GetCondition(node.Status.Conditions, longhorn.NodeConditionTypeDelinquent)
if cond.Status == longhorn.ConditionStatusTrue {
return true, nil
}
*/

if volumeName == "" {
return false, nil
return false, errors.New("no volume name provided to IsNodeDelinquent")
}
isRWX, _ := s.IsRegularRWXVolume(volumeName)
if isRWX {
Expand Down
1 change: 0 additions & 1 deletion k8s/pkg/apis/longhorn/v1beta2/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ const (
NodeConditionTypeRequiredPackages = "RequiredPackages"
NodeConditionTypeNFSClientInstalled = "NFSClientInstalled"
NodeConditionTypeSchedulable = "Schedulable"
NodeConditionTypeDelinquent = "Delinquent"
)

const (
Expand Down

0 comments on commit 17aae5a

Please sign in to comment.