Skip to content

Commit

Permalink
fix(BackingImage): use Spec.DiskFileSpecMap as useddisks when getting…
Browse files Browse the repository at this point in the history
… ready node and disk for BackingImage

ref: longhorn/longhorn 2856

Signed-off-by: Jack Lin <jack.lin@suse.com>
  • Loading branch information
ChanYiLin committed Jul 1, 2024
1 parent bbb473d commit c538153
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 64 deletions.
2 changes: 1 addition & 1 deletion client/generated_backing_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ type BackingImage struct {

DeletionTimestamp string `json:"deletionTimestamp,omitempty" yaml:"deletion_timestamp,omitempty"`

DiskFileStatusMap map[string]interface{} `json:"diskFileStatusMap,omitempty" yaml:"disk_file_status_map,omitempty"`
DiskFileStatusMap map[string]BackingImageDiskFileStatus `json:"diskFileStatusMap,omitempty" yaml:"disk_file_status_map,omitempty"`

DiskSelector []string `json:"diskSelector,omitempty" yaml:"disk_selector,omitempty"`

Expand Down
68 changes: 24 additions & 44 deletions controller/backing_image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,61 +337,40 @@ func (bic *BackingImageController) replenishBackingImageCopies(bi *longhorn.Back
}

nonFailedCopies := 0
usedDisks := map[string]bool{}
for diskUUID := range bi.Spec.DiskFileSpecMap {
fileStatus, exists := bi.Status.DiskFileStatusMap[diskUUID]
if !exists || (fileStatus.State != longhorn.BackingImageStateFailed &&
fileStatus.State != longhorn.BackingImageStateFailedAndCleanUp &&
fileStatus.State != longhorn.BackingImageStateUnknown) {

// Non-existing file in status could due to not being synced from backing image manager yet.
// Consider it as newly created copy and count it as non-failed copies.
// So we don't create extra copy when handling copies evictions.
usedDisks[diskUUID] = true
nonFailedCopies += 1
}
}

// Need to count the evicted copies in the nonFailedCopies then handle it in handleBackingImageCopiesEvictions
// so we can distinguish the case of "0 healthy copy" and "there is 1 copy but being evicted".
if nonFailedCopies == 0 {
return nil
} else if nonFailedCopies >= bi.Spec.MinNumberOfCopies {
if err := bic.handleBackingImageCopiesEvictions(nonFailedCopies, bi, usedDisks); err != nil {
return nil
}
} else { //nonFailedCopies < bi.Spec.MinNumberOfCopies
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, usedDisks)
logrus.Infof("replicate the copy to node: %v, disk: %v", readyNode, readyDiskName)
if err != nil {
logrus.WithError(err).Warnf("failed to create the backing image copy")
return nil
}
// BackingImageManager will then sync the BackingImage to the disk
bi.Spec.DiskFileSpecMap[readyNode.Status.DiskStatus[readyDiskName].DiskUUID] = &longhorn.BackingImageDiskFileSpec{}
}

return nil
}

// handleBackingImageCopiesEvictions do creating one more replica for eviction, if requested
func (bic *BackingImageController) handleBackingImageCopiesEvictions(nonFailedCopies int, bi *longhorn.BackingImage, usedDisks map[string]bool) (err error) {
log := getLoggerForBackingImage(bic.logger, bi)
NonEvictingCount := nonFailedCopies

for _, fileSpec := range bi.Spec.DiskFileSpecMap {
if fileSpec.EvictionRequested {
NonEvictingCount--
} else {

Check notice on line 356 in controller/backing_image_controller.go

View check run for this annotation

codefactor.io / CodeFactor

controller/backing_image_controller.go#L356

If block ends with a return statement, so drop this else and outdent its block. (indent-error-flow)
nonEvictingCount := nonFailedCopies
for _, fileSpec := range bi.Spec.DiskFileSpecMap {
if fileSpec.EvictionRequested {
nonEvictingCount--
}
}
}

if NonEvictingCount < bi.Spec.MinNumberOfCopies {
log.Infof("Creating one more backing image copy for eviction")
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, usedDisks)
if err != nil {
logrus.WithError(err).Warnf("failed to create the backing image copy")
return nil
if nonEvictingCount < bi.Spec.MinNumberOfCopies {
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi)
logrus.Infof("replicate the copy to node: %v, disk: %v", readyNode, readyDiskName)
if err != nil {
logrus.WithError(err).Warnf("failed to create the backing image copy")
return nil
}
// BackingImageManager will then sync the BackingImage to the disk
bi.Spec.DiskFileSpecMap[readyNode.Status.DiskStatus[readyDiskName].DiskUUID] = &longhorn.BackingImageDiskFileSpec{}
}
// BackingImageManager will then sync the BackingImage to the disk
bi.Spec.DiskFileSpecMap[readyNode.Status.DiskStatus[readyDiskName].DiskUUID] = &longhorn.BackingImageDiskFileSpec{}
}

return nil
Expand All @@ -405,8 +384,10 @@ func (bic *BackingImageController) cleanupEvictionRequestedBackingImageCopies(bi
hasNonEvictingHealthyBackingImageCopy := false
evictingHealthyBackingImageCopyDiskUUID := ""
for diskUUID, fileSpec := range bi.Spec.DiskFileSpecMap {
fileStatus, exists := bi.Status.DiskFileStatusMap[diskUUID]
if exists && fileStatus != nil {
fileStatus := bi.Status.DiskFileStatusMap[diskUUID]
if fileStatus == nil { // it is newly added, consider it as non healthy
continue
} else {

Check notice on line 390 in controller/backing_image_controller.go

View check run for this annotation

codefactor.io / CodeFactor

controller/backing_image_controller.go#L390

If block ends with a continue statement, so drop this else and outdent its block. (superfluous-else)
if fileStatus.State != longhorn.BackingImageStateReady {
continue
}
Expand Down Expand Up @@ -537,9 +518,8 @@ func (bic *BackingImageController) handleBackingImageDataSource(bi *longhorn.Bac
}
}

// JackLin: BackingIamge Data Source choose node/disk
if !foundReadyDisk {
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, map[string]bool{})
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi)
if err != nil {
return err
}
Expand Down Expand Up @@ -670,7 +650,7 @@ func (bic *BackingImageController) handleBackingImageDataSource(bi *longhorn.Bac
changeNodeDisk := err != nil || node.Name != bids.Spec.NodeID || node.Spec.Disks[diskName].Path != bids.Spec.DiskPath || node.Status.DiskStatus[diskName].DiskUUID != bids.Spec.DiskUUID
if changeNodeDisk {
log.Warn("Backing image data source current node and disk is not ready, need to switch to another ready node and disk")
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi, map[string]bool{})
readyNode, readyDiskName, err := bic.ds.GetReadyNodeDiskForBackingImage(bi)
if err != nil {
return err
}
Expand Down Expand Up @@ -1002,7 +982,7 @@ func (bic *BackingImageController) enqueueBackingImageForNodeUpdate(oldObj, curr
}
}

diskBackingImageMap, err := bic.ds.GetDiskBackingImageMap(oldNode)
diskBackingImageMap, err := bic.ds.GetDiskBackingImageMap()
if err != nil {
utilruntime.HandleError(fmt.Errorf("failed to get disk backing image map when handling node update"))
return
Expand Down
11 changes: 11 additions & 0 deletions controller/backing_image_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"reflect"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -360,6 +361,16 @@ func (c *BackingImageManagerController) cleanupBackingImageManager(bim *longhorn
}
}
}

// delete in progress record of this manager from the inProgressReplenishingMap
// each manager only controls only one disk
diskUUID := bim.Spec.DiskUUID
for biNameDiskID := range c.inProgressReplenishingMap {
if strings.Contains(biNameDiskID, diskUUID) {
delete(c.inProgressReplenishingMap, biNameDiskID)
}
}

if c.isMonitoring(bim.Name) {
c.stopMonitoring(bim.Name)
}
Expand Down
9 changes: 6 additions & 3 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,9 @@ func (nc *NodeController) cleanUpBackingImagesInDisks(node *longhorn.Node) error
log.WithError(err).Warn("Failed to get the backing image data source when cleaning up the images in disks")
continue
}
if bids == nil {
continue
}
existingBackingImage := bi.DeepCopy()
BackingImageDiskFileCleanup(node, bi, bids, waitInterval, bi.Spec.MinNumberOfCopies)
if !reflect.DeepEqual(existingBackingImage.Spec, bi.Spec) {
Expand Down Expand Up @@ -1591,7 +1594,7 @@ func (nc *NodeController) syncBackingImageEvictionRequested(node *longhorn.Node)
}
log := getLoggerForNode(nc.logger, node)

diskBackingImageMap, err := nc.ds.GetDiskBackingImageMap(node)
diskBackingImageMap, err := nc.ds.GetDiskBackingImageMap()
if err != nil {
return err
}
Expand Down Expand Up @@ -1628,15 +1631,15 @@ func (nc *NodeController) syncBackingImageEvictionRequested(node *longhorn.Node)
backingImageLog := log.WithField("backingimage", backingImageToSync.Name).WithField("disk", backingImageToSync.diskUUID)
if backingImageToSync.evict {
backingImageLog.Infof("Requesting backing image copy eviction")
if _, err := nc.ds.UpdateBackingImageStatus(backingImageToSync.BackingImage); err != nil {
if _, err := nc.ds.UpdateBackingImage(backingImageToSync.BackingImage); err != nil {
backingImageLog.Warn("Failed to request backing image copy eviction, will enqueue then resync the node")
nc.enqueueNodeRateLimited(node)
continue
}
nc.eventRecorder.Eventf(backingImageToSync.BackingImage, corev1.EventTypeNormal, constant.EventReasonEvictionUserRequested, "Requesting backing image %v eviction from node %v and disk %v", backingImageToSync.Name, node.Spec.Name, backingImageToSync.diskUUID)
} else {
backingImageLog.Infof("Cancelling backing image copy eviction")
if _, err := nc.ds.UpdateBackingImageStatus(backingImageToSync.BackingImage); err != nil {
if _, err := nc.ds.UpdateBackingImage(backingImageToSync.BackingImage); err != nil {
backingImageLog.Warn("Failed to cancel backing image copy eviction, will enqueue then resync the node")
nc.enqueueNodeRateLimited(node)
continue
Expand Down
3 changes: 1 addition & 2 deletions controller/replica_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,7 @@ func (rc *ReplicaController) GetBackingImagePathForReplicaStarting(r *longhorn.R
log.Warnf("Failed to check if backing image %v can be put on disk %v", bi.Name, r.Spec.DiskID)
}
if !res {
log.Warnf("The backing image %v can not be put on the disk %v", bi.Name, r.Spec.DiskID)
return "", nil
return "", fmt.Errorf("The backing image %v can not be put on the disk %v", bi.Name, r.Spec.DiskID)
}
bi.Spec.DiskFileSpecMap[r.Spec.DiskID] = &longhorn.BackingImageDiskFileSpec{}
log.Infof("Asking backing image %v to download file to node %v disk %v", bi.Name, r.Spec.NodeID, r.Spec.DiskID)
Expand Down
6 changes: 3 additions & 3 deletions datastore/longhorn.go
Original file line number Diff line number Diff line change
Expand Up @@ -2666,7 +2666,7 @@ func (s *DataStore) ListReadyNodesContainingEngineImageRO(image string) (map[str

// GetReadyNodeDiskForBackingImage a list of all Node the in the given namespace and
// returns the first Node && the first Disk of the Node marked with condition ready and allow scheduling
func (s *DataStore) GetReadyNodeDiskForBackingImage(backingImage *longhorn.BackingImage, usedDisks map[string]bool) (*longhorn.Node, string, error) {
func (s *DataStore) GetReadyNodeDiskForBackingImage(backingImage *longhorn.BackingImage) (*longhorn.Node, string, error) {
logrus.Info("Preparing to find a random ready node disk")
nodes, err := s.ListNodesRO()
if err != nil {
Expand Down Expand Up @@ -2704,7 +2704,7 @@ func (s *DataStore) GetReadyNodeDiskForBackingImage(backingImage *longhorn.Backi
if !types.IsSelectorsInTags(diskSpec.Tags, backingImage.Spec.DiskSelector, allowEmptyDiskSelectorVolume) {
continue
}
if _, exists := usedDisks[diskStatus.DiskUUID]; exists {
if _, exists := backingImage.Spec.DiskFileSpecMap[diskStatus.DiskUUID]; exists {
continue
}
// TODO: Jack add block type disk for spdk version BackingImage
Expand Down Expand Up @@ -5073,7 +5073,7 @@ func (s *DataStore) IsV2DataEngineDisabledForNode(nodeName string) (bool, error)
return false, nil
}

func (s *DataStore) GetDiskBackingImageMap(node *longhorn.Node) (map[string][]*longhorn.BackingImage, error) {
func (s *DataStore) GetDiskBackingImageMap() (map[string][]*longhorn.BackingImage, error) {
diskBackingImageMap := map[string][]*longhorn.BackingImage{}
backingImages, err := s.ListBackingImages()
if err != nil {
Expand Down
15 changes: 5 additions & 10 deletions manager/backingimage.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,16 +236,11 @@ func (m *VolumeManager) UpdateBackingImageMinNumberOfCopies(name string, minNumb
return nil, errors.Wrapf(err, "unable to get backing image %v", name)
}

existingBI := bi.DeepCopy()
defer func() {
if err == nil {
if !reflect.DeepEqual(bi.Spec, existingBI.Spec) {
bi, err = m.ds.UpdateBackingImage(bi)
return
}
}
}()

bi.Spec.MinNumberOfCopies = minNumberOfCopies
bi, err = m.ds.UpdateBackingImage(bi)
if err != nil {
return nil, errors.Wrapf(err, "failed to update backing image %v", name)
}

return bi, nil
}
2 changes: 1 addition & 1 deletion types/setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,7 @@ var (
Type: SettingTypeInt,
Required: true,
ReadOnly: false,
Default: "1",
Default: "3",
ValueIntRange: map[string]int{
ValueIntRangeMinimum: 1,
},
Expand Down

0 comments on commit c538153

Please sign in to comment.