Skip to content

Commit

Permalink
feat(backingimage): backingimage ha eviction enhancement
Browse files Browse the repository at this point in the history
ref: longhorn/longhorn 2856

Signed-off-by: Jack Lin <jack.lin@suse.com>
  • Loading branch information
ChanYiLin committed Apr 26, 2024
1 parent f63611c commit f6f2738
Show file tree
Hide file tree
Showing 11 changed files with 423 additions and 18 deletions.
187 changes: 185 additions & 2 deletions controller/backing_image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ func NewBackingImageController(
}
bic.cacheSyncs = append(bic.cacheSyncs, ds.ReplicaInformer.HasSynced)

if _, err = ds.NodeInformer.AddEventHandlerWithResyncPeriod(cache.ResourceEventHandlerFuncs{
UpdateFunc: bic.enqueueBackingImageForNodeUpdate,
}, 0); err != nil {
return nil, err
}
bic.cacheSyncs = append(bic.cacheSyncs, ds.NodeInformer.HasSynced)

return bic, nil
}

Expand Down Expand Up @@ -254,6 +261,12 @@ func (bic *BackingImageController) syncBackingImage(key string) (err error) {
if err != nil {
return
}
if !reflect.DeepEqual(existingBackingImage.Spec, backingImage.Spec) {
if _, err := bic.ds.UpdateBackingImage(backingImage); err != nil && apierrors.IsConflict(errors.Cause(err)) {
log.WithError(err).Debugf("Requeue %v due to conflict", key)
bic.enqueueBackingImage(backingImage)
}
}
if reflect.DeepEqual(existingBackingImage.Status, backingImage.Status) {
return
}
Expand Down Expand Up @@ -291,9 +304,123 @@ func (bic *BackingImageController) syncBackingImage(key string) (err error) {
return err
}

if err := bic.replenishBackingImageCopies(backingImage); err != nil {
return err
}

bic.cleanupEvictionRequestedBackingImages(backingImage)

return nil
}

func (bic *BackingImageController) replenishBackingImageCopies(bi *longhorn.BackingImage) (err error) {
bids, err := bic.ds.GetBackingImageDataSource(bi.Name)
if err != nil {
if apierrors.IsNotFound(err) {
return nil
}
return errors.Wrap(err, "failed to get the backing image data source")
}
// only maintain the minNumberOfReplicas after BackingImage is transferred to BackingImageManager
if !bids.Spec.FileTransferred {
return nil
}

logrus.Infof("[DEBUG] replenishBackingImageCopies")

nonFailedCopies := 0
usedDisks := map[string]bool{}
for diskUUID := range bi.Spec.Disks {
fileStatus, exists := bi.Status.DiskFileStatusMap[diskUUID]
if !exists || (fileStatus.State != longhorn.BackingImageStateFailed &&
fileStatus.State != longhorn.BackingImageStateFailedAndCleanUp &&
fileStatus.State != longhorn.BackingImageStateUnknown) {

// Non-existing file in status could due to not being synced from backing image manager yet.
// Consider it as newly created copy and count it as non-failed copies.
// So we don't create extra copy when handling copies evictions.
usedDisks[diskUUID] = true
nonFailedCopies += 1
}
}
logrus.Infof("[DEBUG] nonFailedCopies: %v", nonFailedCopies)

if nonFailedCopies == 0 {
return nil
} else if nonFailedCopies >= bi.Spec.MinNumberOfCopies {
if err := bic.handleBackingImageCopiesEvictions(nonFailedCopies, bi, usedDisks); err != nil {
return nil
}
} else { //nonFailedCopies < bi.Spec.MinNumberOfCopies
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, usedDisks)
logrus.Infof("[DEBUG] replicate the copy to node: %v, disk: %v", readyNode, readyDiskName)
if err != nil {
logrus.WithError(err).Warnf("failed to create the backing image copy")
return nil
}
// BackingImageManager will then sync the BackingImage to the disk
bi.Spec.Disks[readyNode.Status.DiskStatus[readyDiskName].DiskUUID] = ""
}

return nil
}

// handleBackingImageCopiesEvictions do creating one more replica for eviction, if requested
func (bic *BackingImageController) handleBackingImageCopiesEvictions(nonFailedCopies int, bi *longhorn.BackingImage, usedDisks map[string]bool) (err error) {
log := getLoggerForBackingImage(bic.logger, bi)
NonEvictingCount := nonFailedCopies

for _, fileStatus := range bi.Status.DiskFileStatusMap {
if fileStatus.EvictionRequested {
NonEvictingCount--
}
}

if NonEvictingCount < bi.Spec.MinNumberOfCopies {
log.Infof("[DEBUG] Creating one more backing image copy for eviction")
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, usedDisks)
if err != nil {
logrus.WithError(err).Warnf("[DEBUG] failed to create the backing image copy")
return nil
}
// BackingImageManager will then sync the BackingImage to the disk
bi.Spec.Disks[readyNode.Status.DiskStatus[readyDiskName].DiskUUID] = ""
}

return nil
}

func (bic *BackingImageController) cleanupEvictionRequestedBackingImages(bi *longhorn.BackingImage) {
log := getLoggerForBackingImage(bic.logger, bi)

// If there is no non-evicting healthy backing image copy,
// Longhorn should retain one evicting healthy backing image copy for replenishing.
hasNonEvictingHealthyBackingImageCopy := false
evictingHealthyBackingImageCopyDiskUUID := ""
for diskUUID, fileStatus := range bi.Status.DiskFileStatusMap {
if fileStatus.State != longhorn.BackingImageStateReady {
continue
}
if !fileStatus.EvictionRequested {
hasNonEvictingHealthyBackingImageCopy = true
break
}
evictingHealthyBackingImageCopyDiskUUID = diskUUID
}

for diskUUID, fileStatus := range bi.Status.DiskFileStatusMap {
if !fileStatus.EvictionRequested {
continue
}
if !hasNonEvictingHealthyBackingImageCopy && diskUUID == evictingHealthyBackingImageCopyDiskUUID {
log.Warnf("[DEBUG] Failed to evict backing image copy on disk %v for now since there is no other healthy backing image copy", diskUUID)
continue
}
delete(bi.Spec.Disks, diskUUID)
log.Infof("[DEBUG] Evicted backing image copy on disk %v", diskUUID)
}
}

func (bic *BackingImageController) IsBackingImageDataSourceCleaned(bi *longhorn.BackingImage) (cleaned bool, err error) {
bids, err := bic.ds.GetBackingImageDataSource(bi.Name)
if err != nil {
Expand Down Expand Up @@ -396,8 +523,10 @@ func (bic *BackingImageController) handleBackingImageDataSource(bi *longhorn.Bac
}
}
}

// JackLin: BackingIamge Data Source choose node/disk
if !foundReadyDisk {
readyNode, readyDiskName, err := bic.ds.GetRandomReadyNodeDisk()
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, map[string]bool{})
if err != nil {
return err
}
Expand Down Expand Up @@ -528,7 +657,7 @@ func (bic *BackingImageController) handleBackingImageDataSource(bi *longhorn.Bac
changeNodeDisk := err != nil || node.Name != bids.Spec.NodeID || node.Spec.Disks[diskName].Path != bids.Spec.DiskPath || node.Status.DiskStatus[diskName].DiskUUID != bids.Spec.DiskUUID
if changeNodeDisk {
log.Warn("Backing image data source current node and disk is not ready, need to switch to another ready node and disk")
readyNode, readyDiskName, err := bic.ds.GetRandomReadyNodeDisk()
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, map[string]bool{})
if err != nil {
return err
}
Expand Down Expand Up @@ -827,6 +956,60 @@ func (bic *BackingImageController) enqueueBackingImageForBackingImageDataSource(
bic.enqueueBackingImage(obj)
}

func (bic *BackingImageController) enqueueBackingImageForNodeUpdate(oldObj, currObj interface{}) {
oldNode, ok := oldObj.(*longhorn.Node)
if !ok {
deletedState, ok := oldObj.(cache.DeletedFinalStateUnknown)
if !ok {
utilruntime.HandleError(fmt.Errorf("received unexpected obj: %#v", oldObj))
return
}

// use the last known state, to enqueue, dependent objects
oldNode, ok = deletedState.Obj.(*longhorn.Node)
if !ok {
utilruntime.HandleError(fmt.Errorf("DeletedFinalStateUnknown contained invalid object: %#v", deletedState.Obj))
return
}
}

currNode, ok := currObj.(*longhorn.Node)
if !ok {
deletedState, ok := currObj.(cache.DeletedFinalStateUnknown)
if !ok {
utilruntime.HandleError(fmt.Errorf("received unexpected obj: %#v", currObj))
return
}

// use the last known state, to enqueue, dependent objects
currNode, ok = deletedState.Obj.(*longhorn.Node)
if !ok {
utilruntime.HandleError(fmt.Errorf("DeletedFinalStateUnknown contained invalid object: %#v", deletedState.Obj))
return
}
}

diskBackingImageMap, err := bic.ds.GetDiskBackingImageMap(oldNode)
if err != nil {
utilruntime.HandleError(fmt.Errorf("failed to get disk backing image map when handling node udpate"))

Check failure on line 994 in controller/backing_image_controller.go

View workflow job for this annotation

GitHub Actions / codespell

udpate ==> update
return
}

// if a node or disk changes its EvictionRequested, enqueue all backing image copies on that node/disk
evictionRequestedChangeOnNodeLevel := currNode.Spec.EvictionRequested != oldNode.Spec.EvictionRequested
for diskName, newDiskSpec := range currNode.Spec.Disks {
oldDiskSpec, ok := oldNode.Spec.Disks[diskName]
evictionRequestedChangeOnDiskLevel := !ok || (newDiskSpec.EvictionRequested != oldDiskSpec.EvictionRequested)
if diskStatus, existed := currNode.Status.DiskStatus[diskName]; existed && (evictionRequestedChangeOnNodeLevel || evictionRequestedChangeOnDiskLevel) {
diskUUID := diskStatus.DiskUUID
for _, backingImage := range diskBackingImageMap[diskUUID] {
bic.enqueueBackingImage(backingImage)
}
}
}

}

func (bic *BackingImageController) enqueueBackingImageForReplica(obj interface{}) {
replica, isReplica := obj.(*longhorn.Replica)
if !isReplica {
Expand Down
96 changes: 89 additions & 7 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,10 @@ func (nc *NodeController) syncNode(key string) (err error) {
return err
}

if err := nc.syncBackingImageEvictionRequested(node); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -1187,7 +1191,7 @@ func (nc *NodeController) cleanUpBackingImagesInDisks(node *longhorn.Node) error
continue
}
existingBackingImage := bi.DeepCopy()
BackingImageDiskFileCleanup(node, bi, bids, waitInterval, 1)
BackingImageDiskFileCleanup(node, bi, bids, waitInterval, bi.Spec.MinNumberOfCopies)
if !reflect.DeepEqual(existingBackingImage.Spec, bi.Spec) {
if _, err := nc.ds.UpdateBackingImage(bi); err != nil {
log.WithError(err).Warn("Failed to update backing image when cleaning up the images in disks")
Expand All @@ -1201,13 +1205,13 @@ func (nc *NodeController) cleanUpBackingImagesInDisks(node *longhorn.Node) error
return nil
}

func BackingImageDiskFileCleanup(node *longhorn.Node, bi *longhorn.BackingImage, bids *longhorn.BackingImageDataSource, waitInterval time.Duration, haRequirement int) {
func BackingImageDiskFileCleanup(node *longhorn.Node, bi *longhorn.BackingImage, bids *longhorn.BackingImageDataSource, waitInterval time.Duration, minNumberOfCopies int) {
if bi.Spec.Disks == nil || bi.Status.DiskLastRefAtMap == nil {
return
}

if haRequirement < 1 {
haRequirement = 1
if minNumberOfCopies < 1 {
minNumberOfCopies = 1
}

var readyDiskFileCount, handlingDiskFileCount, failedDiskFileCount int
Expand Down Expand Up @@ -1263,17 +1267,17 @@ func BackingImageDiskFileCleanup(node *longhorn.Node, bi *longhorn.BackingImage,
}
switch fileStatus.State {
case longhorn.BackingImageStateFailed:
if haRequirement >= readyDiskFileCount+handlingDiskFileCount+failedDiskFileCount {
if minNumberOfCopies >= readyDiskFileCount+handlingDiskFileCount+failedDiskFileCount {
continue
}
failedDiskFileCount--
case longhorn.BackingImageStateReadyForTransfer, longhorn.BackingImageStateReady:
if haRequirement >= readyDiskFileCount {
if minNumberOfCopies >= readyDiskFileCount {
continue
}
readyDiskFileCount--
default:
if haRequirement >= readyDiskFileCount+handlingDiskFileCount {
if minNumberOfCopies >= readyDiskFileCount+handlingDiskFileCount {
continue
}
handlingDiskFileCount--
Expand Down Expand Up @@ -1584,6 +1588,70 @@ func (nc *NodeController) createSnapshotMonitor() (mon monitor.Monitor, err erro
return mon, nil
}

func (nc *NodeController) syncBackingImageEvictionRequested(node *longhorn.Node) error {
// preventing periodically list all backingimage.
if !isNodeOrDisksEvictionRequested(node) {
return nil
}
log := getLoggerForNode(nc.logger, node)

diskBackingImageMap, err := nc.ds.GetDiskBackingImageMap(node)
if err != nil {
return err
}

type backingImageToSync struct {
*longhorn.BackingImage
diskUUID string
evict bool
}
backingImagesToSync := []backingImageToSync{}

for diskName, diskSpec := range node.Spec.Disks {
diskStatus := node.Status.DiskStatus[diskName]
diskUUID := diskStatus.DiskUUID

if diskSpec.EvictionRequested || node.Spec.EvictionRequested {
for _, backingImage := range diskBackingImageMap[diskUUID] {
// trigger eviction request
backingImage.Status.DiskFileStatusMap[diskUUID].EvictionRequested = true
backingImagesToSync = append(backingImagesToSync, backingImageToSync{backingImage, diskUUID, true})
}
} else {
for _, backingImage := range diskBackingImageMap[diskUUID] {
if backingImage.Status.DiskFileStatusMap[diskUUID].EvictionRequested {
// if it is previously set to true, cancel the eviction request
backingImage.Status.DiskFileStatusMap[diskUUID].EvictionRequested = false
backingImagesToSync = append(backingImagesToSync, backingImageToSync{backingImage, diskUUID, false})
}
}
}
}

for _, backingImageToSync := range backingImagesToSync {
backingImageLog := log.WithField("backingimage", backingImageToSync.Name).WithField("disk", backingImageToSync.diskUUID)
if backingImageToSync.evict {
backingImageLog.Infof("Requesting backing image copy eviction")
if _, err := nc.ds.UpdateBackingImageStatus(backingImageToSync.BackingImage); err != nil {
backingImageLog.Warn("Failed to request backing image copy eviction, will enqueue then resync the node")
nc.enqueueNodeRateLimited(node)
continue
}
nc.eventRecorder.Eventf(backingImageToSync.BackingImage, corev1.EventTypeNormal, constant.EventReasonEvictionUserRequested, "Requesting backing image %v eviction from node %v and disk %v", backingImageToSync.Name, node.Spec.Name, backingImageToSync.diskUUID)
} else {
backingImageLog.Infof("Cancelling backing image copy eviction")
if _, err := nc.ds.UpdateBackingImageStatus(backingImageToSync.BackingImage); err != nil {
backingImageLog.Warn("Failed to cancel backing image copy eviction, will enqueue then resync the node")
nc.enqueueNodeRateLimited(node)
continue
}
nc.eventRecorder.Eventf(backingImageToSync.BackingImage, corev1.EventTypeNormal, constant.EventReasonEvictionCanceled, "Requesting backing image %v eviction from node %v and disk %v", backingImageToSync.Name, node.Spec.Name, backingImageToSync.diskUUID)
}
}

return nil
}

func (nc *NodeController) syncReplicaEvictionRequested(node *longhorn.Node, kubeNode *corev1.Node) error {
log := getLoggerForNode(nc.logger, node)
node.Status.AutoEvicting = false
Expand Down Expand Up @@ -1690,3 +1758,17 @@ func (nc *NodeController) shouldEvictReplica(node *longhorn.Node, kubeNode *core

return false, constant.EventReasonEvictionCanceled, nil
}

func isNodeOrDisksEvictionRequested(node *longhorn.Node) bool {
if node.Spec.EvictionRequested {
return true
}

for _, diskSpec := range node.Spec.Disks {
if diskSpec.EvictionRequested {
return true
}
}

return false
}
Loading

0 comments on commit f6f2738

Please sign in to comment.