diff --git a/controller/volume_controller.go b/controller/volume_controller.go index a7a9a5a7c4..8299e3a057 100644 --- a/controller/volume_controller.go +++ b/controller/volume_controller.go @@ -2304,10 +2304,15 @@ func (c *VolumeController) listReadySchedulableAndScheduledNodes(volume *longhor return nil, err } + allowEmptyNodeSelectorVolume, err := c.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume) + if err != nil { + return nil, errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyNodeSelectorVolume) + } + filteredReadyNodes := readyNodes if len(volume.Spec.NodeSelector) != 0 { for nodeName, node := range readyNodes { - if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector) { + if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector, allowEmptyNodeSelectorVolume) { delete(filteredReadyNodes, nodeName) } } diff --git a/scheduler/replica_scheduler.go b/scheduler/replica_scheduler.go index ba05d2bd0d..b68c407890 100644 --- a/scheduler/replica_scheduler.go +++ b/scheduler/replica_scheduler.go @@ -147,7 +147,9 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod nodeSoftAntiAffinity, err := rcs.ds.GetSettingAsBool(types.SettingNameReplicaSoftAntiAffinity) if err != nil { - logrus.Errorf("error getting replica soft anti-affinity setting: %v", err) + err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameReplicaSoftAntiAffinity) + multiError.Append(util.NewMultiError(err.Error())) + return map[string]*Disk{}, multiError } if volume.Spec.ReplicaSoftAntiAffinity != longhorn.ReplicaSoftAntiAffinityDefault && @@ -157,7 +159,9 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod zoneSoftAntiAffinity, err := rcs.ds.GetSettingAsBool(types.SettingNameReplicaZoneSoftAntiAffinity) if err != nil { - logrus.Errorf("Error getting replica zone soft anti-affinity setting: %v", err) + err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameReplicaZoneSoftAntiAffinity) + multiError.Append(util.NewMultiError(err.Error())) + return map[string]*Disk{}, multiError } if volume.Spec.ReplicaZoneSoftAntiAffinity != longhorn.ReplicaZoneSoftAntiAffinityDefault && volume.Spec.ReplicaZoneSoftAntiAffinity != "" { @@ -202,6 +206,13 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod return result } + allowEmptyNodeSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume) + if err != nil { + err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyNodeSelectorVolume) + multiError.Append(util.NewMultiError(err.Error())) + return map[string]*Disk{}, multiError + } + unusedNodes := map[string]*longhorn.Node{} unusedNodesInNewZones := map[string]*longhorn.Node{} nodesInUnusedZones := map[string]*longhorn.Node{} @@ -209,7 +220,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod for nodeName, node := range nodeInfo { // Filter Nodes. If the Nodes don't match the tags, don't bother marking them as candidates. - if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector) { + if !types.IsSelectorsInTags(node.Spec.Tags, volume.Spec.NodeSelector, allowEmptyNodeSelectorVolume) { continue } if _, ok := usedNodes[nodeName]; !ok { @@ -291,6 +302,13 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk multiError = util.NewMultiError() preferredDisks = map[string]*Disk{} + allowEmptyDiskSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyDiskSelectorVolume) + if err != nil { + err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyDiskSelectorVolume) + multiError.Append(util.NewMultiError(err.Error())) + return preferredDisks, multiError + } + if len(disks) == 0 { multiError.Append(util.NewMultiError(longhorn.ErrorReplicaScheduleDiskUnavailable)) return preferredDisks, multiError @@ -349,7 +367,7 @@ func (rcs *ReplicaScheduler) filterNodeDisksForReplica(node *longhorn.Node, disk } // Check if the Disk's Tags are valid. - if !types.IsSelectorsInTags(diskSpec.Tags, volume.Spec.DiskSelector) { + if !types.IsSelectorsInTags(diskSpec.Tags, volume.Spec.DiskSelector, allowEmptyDiskSelectorVolume) { multiError.Append(util.NewMultiError(longhorn.ErrorReplicaScheduleTagsNotFulfilled)) continue } @@ -445,7 +463,11 @@ func (rcs *ReplicaScheduler) CheckAndReuseFailedReplica(replicas map[string]*lon availableNodeDisksMap := map[string]map[string]struct{}{} reusableNodeReplicasMap := map[string][]*longhorn.Replica{} for _, r := range replicas { - if !rcs.isFailedReplicaReusable(r, volume, allNodesInfo, hardNodeAffinity) { + isReusable, err := rcs.isFailedReplicaReusable(r, volume, allNodesInfo, hardNodeAffinity) + if err != nil { + return nil, err + } + if !isReusable { continue } @@ -539,29 +561,34 @@ func (rcs *ReplicaScheduler) RequireNewReplica(replicas map[string]*longhorn.Rep return lastDegradedAt.Add(waitInterval).Sub(now) + time.Second } -func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *longhorn.Volume, nodeInfo map[string]*longhorn.Node, hardNodeAffinity string) bool { +func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *longhorn.Volume, nodeInfo map[string]*longhorn.Node, hardNodeAffinity string) (bool, error) { if r.Spec.FailedAt == "" { - return false + return false, nil } if r.Spec.NodeID == "" || r.Spec.DiskID == "" { - return false + return false, nil } if r.Spec.RebuildRetryCount >= FailedReplicaMaxRetryCount { - return false + return false, nil } if r.Status.EvictionRequested { - return false + return false, nil } if hardNodeAffinity != "" && r.Spec.NodeID != hardNodeAffinity { - return false + return false, nil } if isReady, _ := rcs.ds.CheckEngineImageReadiness(r.Spec.EngineImage, r.Spec.NodeID); !isReady { - return false + return false, nil + } + + allowEmptyDiskSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyDiskSelectorVolume) + if err != nil { + return false, errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyDiskSelectorVolume) } node, exists := nodeInfo[r.Spec.NodeID] if !exists { - return false + return false, nil } diskFound := false for diskName, diskStatus := range node.Status.DiskStatus { @@ -590,30 +617,30 @@ func (rcs *ReplicaScheduler) isFailedReplicaReusable(r *longhorn.Replica, v *lon diskFound = true diskSpec, exists := node.Spec.Disks[diskName] if !exists { - return false + return false, nil } if !diskSpec.AllowScheduling || diskSpec.EvictionRequested { - return false + return false, nil } - if !types.IsSelectorsInTags(diskSpec.Tags, v.Spec.DiskSelector) { - return false + if !types.IsSelectorsInTags(diskSpec.Tags, v.Spec.DiskSelector, allowEmptyDiskSelectorVolume) { + return false, nil } } } if !diskFound { - return false + return false, nil } im, err := rcs.ds.GetInstanceManagerByInstance(r) if err != nil { logrus.Errorf("failed to get instance manager when checking replica %v is reusable: %v", r.Name, err) - return false + return false, nil } if im.DeletionTimestamp != nil || im.Status.CurrentState != longhorn.InstanceManagerStateRunning { - return false + return false, nil } - return true + return true, nil } // IsPotentiallyReusableReplica is used to check if a failed replica is potentially reusable. diff --git a/types/setting.go b/types/setting.go index c6fe2be1c6..c130b6548f 100644 --- a/types/setting.go +++ b/types/setting.go @@ -107,6 +107,8 @@ const ( SettingNameV2DataEngine = SettingName("v2-data-engine") SettingNameV2DataEngineHugepageLimit = SettingName("v2-data-engine-hugepage-limit") SettingNameOfflineReplicaRebuilding = SettingName("offline-replica-rebuilding") + SettingNameAllowEmptyNodeSelectorVolume = SettingName("allow-empty-node-selector-volume") + SettingNameAllowEmptyDiskSelectorVolume = SettingName("allow-empty-disk-selector-volume") ) var ( @@ -179,6 +181,8 @@ var ( SettingNameV2DataEngine, SettingNameV2DataEngineHugepageLimit, SettingNameOfflineReplicaRebuilding, + SettingNameAllowEmptyNodeSelectorVolume, + SettingNameAllowEmptyDiskSelectorVolume, } ) @@ -277,6 +281,8 @@ var ( SettingNameV2DataEngine: SettingDefinitionV2DataEngine, SettingNameV2DataEngineHugepageLimit: SettingDefinitionV2DataEngineHugepageLimit, SettingNameOfflineReplicaRebuilding: SettingDefinitionOfflineReplicaRebuilding, + SettingNameAllowEmptyNodeSelectorVolume: SettingDefinitionAllowEmptyNodeSelectorVolume, + SettingNameAllowEmptyDiskSelectorVolume: SettingDefinitionAllowEmptyDiskSelectorVolume, } SettingDefinitionBackupTarget = SettingDefinition{ @@ -1103,6 +1109,26 @@ var ( ReadOnly: true, Default: "1024", } + + SettingDefinitionAllowEmptyNodeSelectorVolume = SettingDefinition{ + DisplayName: "Allow Scheduling Empty Node Selector Volumes To Any Node", + Description: "Allow replica of the volume without node selector to be scheduled on node with tags, default true", + Category: SettingCategoryScheduling, + Type: SettingTypeBool, + Required: true, + ReadOnly: false, + Default: "true", + } + + SettingDefinitionAllowEmptyDiskSelectorVolume = SettingDefinition{ + DisplayName: "Allow Scheduling Empty Disk Selector Volumes To Any Disk", + Description: "Allow replica of the volume without disk selector to be scheduled on disk with tags, default true", + Category: SettingCategoryScheduling, + Type: SettingTypeBool, + Required: true, + ReadOnly: false, + Default: "true", + } ) type NodeDownPodDeletionPolicy string @@ -1198,6 +1224,10 @@ func ValidateSetting(name, value string) (err error) { fallthrough case SettingNameV2DataEngine: fallthrough + case SettingNameAllowEmptyNodeSelectorVolume: + fallthrough + case SettingNameAllowEmptyDiskSelectorVolume: + fallthrough case SettingNameAllowCollectingLonghornUsage: if value != "true" && value != "false" { return fmt.Errorf("value %v of setting %v should be true or false", value, sName) diff --git a/types/types.go b/types/types.go index 953578af2c..9615a06963 100644 --- a/types/types.go +++ b/types/types.go @@ -995,12 +995,18 @@ func GetLHVolumeAttachmentNameFromVolumeName(volName string) string { // IsSelectorsInTags checks if all the selectors are present in the tags slice. // It returns true if all selectors are found, false otherwise. -func IsSelectorsInTags(tags, selectors []string) bool { +func IsSelectorsInTags(tags, selectors []string, allowEmptySelector bool) bool { if !sort.StringsAreSorted(tags) { logrus.Debug("BUG: Tags are not sorted, sorting now") sort.Strings(tags) } + if len(selectors) == 0 { + if !allowEmptySelector && len(tags) != 0 { + return false + } + } + for _, selector := range selectors { index := sort.SearchStrings(tags, selector) // If the selector is not found or the index is out of bounds, return false. diff --git a/types/types_test.go b/types/types_test.go index 020c9dd8ed..30558fd4a9 100644 --- a/types/types_test.go +++ b/types/types_test.go @@ -123,43 +123,55 @@ func (s *TestSuite) TestParseToleration(c *C) { func (s *TestSuite) TestIsSelectorsInTags(c *C) { type testCase struct { - inputTags []string - inputSelectors []string + inputTags []string + inputSelectors []string + allowEmptySelector bool expected bool } testCases := map[string]testCase{ "selectors exist": { - inputTags: []string{"aaa", "bbb", "ccc"}, - inputSelectors: []string{"aaa", "bbb", "ccc"}, - expected: true, + inputTags: []string{"aaa", "bbb", "ccc"}, + inputSelectors: []string{"aaa", "bbb", "ccc"}, + allowEmptySelector: true, + expected: true, }, "selectors mis-matched": { - inputTags: []string{"aaa", "bbb", "ccc"}, - inputSelectors: []string{"aaa", "b", "ccc"}, - expected: false, + inputTags: []string{"aaa", "bbb", "ccc"}, + inputSelectors: []string{"aaa", "b", "ccc"}, + allowEmptySelector: true, + expected: false, }, - "selectors empty": { - inputTags: []string{"aaa", "bbb", "ccc"}, - inputSelectors: []string{}, - expected: true, + "selectors empty and tolerate": { + inputTags: []string{"aaa", "bbb", "ccc"}, + inputSelectors: []string{}, + allowEmptySelector: true, + expected: true, + }, + "selectors empty and not tolerate": { + inputTags: []string{"aaa", "bbb", "ccc"}, + inputSelectors: []string{}, + allowEmptySelector: false, + expected: false, }, "tags unsorted": { - inputTags: []string{"bbb", "aaa", "ccc"}, - inputSelectors: []string{"aaa", "bbb", "ccc"}, - expected: true, + inputTags: []string{"bbb", "aaa", "ccc"}, + inputSelectors: []string{"aaa", "bbb", "ccc"}, + allowEmptySelector: true, + expected: true, }, "tags empty": { - inputTags: []string{}, - inputSelectors: []string{"aaa", "bbb", "ccc"}, - expected: false, + inputTags: []string{}, + inputSelectors: []string{"aaa", "bbb", "ccc"}, + allowEmptySelector: true, + expected: false, }, } for testName, testCase := range testCases { fmt.Printf("testing %v\n", testName) - actual := IsSelectorsInTags(testCase.inputTags, testCase.inputSelectors) + actual := IsSelectorsInTags(testCase.inputTags, testCase.inputSelectors, testCase.allowEmptySelector) c.Assert(actual, Equals, testCase.expected, Commentf(TestErrResultFmt, testName)) } }