From d8c4ad3484345e65b738d648b10babd520dec367 Mon Sep 17 00:00:00 2001 From: Tim Serong Date: Mon, 9 Sep 2024 18:24:10 +1000 Subject: [PATCH] feat: add Longhorn V2 provisioner This allows adding extra disks for use by Longhorn's V2 data engine. I've implemented this as a separate LonghornV2Provisioner for the sake of neatness, rather than grafting it into LonghornV1Provisioner with a bunch of `if` statements, but some of the implementation (the UnProvision() and Update() functions) still call back to the LonghornV1Provisioner to avoid excessive code duplication. The current implementation theoretically supports using virtio disks, except that NDM as a whole ignores those, as they don't have WWNs. This is something we need to address separately as part of https://github.com/harvester/harvester/issues/6034 Related issue: https://github.com/harvester/harvester/issues/5274 Signed-off-by: Tim Serong (cherry picked from commit 83b58ddb0400e629167cd0f6985b5306120ad9e9) --- .../crds/harvesterhci.io_blockdevices.yaml | 8 + .../crds/harvesterhci.io_blockdevices.yaml | 8 + .../harvesterhci.io/v1beta1/blockdevice.go | 5 + pkg/controller/blockdevice/controller.go | 13 +- pkg/provisioner/longhornv2.go | 188 ++++++++++++++++++ 5 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 pkg/provisioner/longhornv2.go diff --git a/deploy/charts/harvester-node-disk-manager/templates/crds/harvesterhci.io_blockdevices.yaml b/deploy/charts/harvester-node-disk-manager/templates/crds/harvesterhci.io_blockdevices.yaml index 6e40129a..c7fcf1a0 100644 --- a/deploy/charts/harvester-node-disk-manager/templates/crds/harvesterhci.io_blockdevices.yaml +++ b/deploy/charts/harvester-node-disk-manager/templates/crds/harvesterhci.io_blockdevices.yaml @@ -98,6 +98,14 @@ spec: description: a provisioner for provision Longhorn volume backend disk properties: + diskDriver: + description: specifies the driver to use for V2 data engine + disks + enum: + - "" + - auto + - aio + type: string engineVersion: description: a string with the engine version for the provisioner enum: diff --git a/manifests/crds/harvesterhci.io_blockdevices.yaml b/manifests/crds/harvesterhci.io_blockdevices.yaml index 6e40129a..c7fcf1a0 100644 --- a/manifests/crds/harvesterhci.io_blockdevices.yaml +++ b/manifests/crds/harvesterhci.io_blockdevices.yaml @@ -98,6 +98,14 @@ spec: description: a provisioner for provision Longhorn volume backend disk properties: + diskDriver: + description: specifies the driver to use for V2 data engine + disks + enum: + - "" + - auto + - aio + type: string engineVersion: description: a string with the engine version for the provisioner enum: diff --git a/pkg/apis/harvesterhci.io/v1beta1/blockdevice.go b/pkg/apis/harvesterhci.io/v1beta1/blockdevice.go index f3696cb0..43108d75 100644 --- a/pkg/apis/harvesterhci.io/v1beta1/blockdevice.go +++ b/pkg/apis/harvesterhci.io/v1beta1/blockdevice.go @@ -1,6 +1,7 @@ package v1beta1 import ( + longhornv1 "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2" "github.com/rancher/wrangler/v3/pkg/condition" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -95,6 +96,10 @@ type LonghornProvisionerInfo struct { // a string with the engine version for the provisioner // +kubebuilder:validation:Enum:=LonghornV1;LonghornV2 EngineVersion string `json:"engineVersion"` + // specifies the driver to use for V2 data engine disks + // +kubebuilder:validation:Enum="";auto;aio + // +optional + DiskDriver longhornv1.DiskDriver `json:"diskDriver,omitempty"` } type FilesystemInfo struct { diff --git a/pkg/controller/blockdevice/controller.go b/pkg/controller/blockdevice/controller.go index 2e902e47..0dfbdf2f 100644 --- a/pkg/controller/blockdevice/controller.go +++ b/pkg/controller/blockdevice/controller.go @@ -242,7 +242,7 @@ func (c *Controller) generateProvisioner(device *diskv1.BlockDevice) (provisione device.Spec.Provisioner = provisioner return c.generateLHv1Provisioner(device) case provisioner.TypeLonghornV2: - return nil, fmt.Errorf("TBD type %s", provisionerType) + return c.generateLHv2Provisioner(device) case provisioner.TypeLVM: return c.generateLVMProvisioner(device) default: @@ -266,6 +266,17 @@ func (c *Controller) generateLVMProvisioner(device *diskv1.BlockDevice) (provisi return provisioner.NewLVMProvisioner(vgName, c.NodeName, c.LVMVgClient, device, c.BlockInfo) } +func (c *Controller) generateLHv2Provisioner(device *diskv1.BlockDevice) (provisioner.Provisioner, error) { + node, err := c.NodeCache.Get(c.Namespace, c.NodeName) + if apierrors.IsNotFound(err) { + node, err = c.Nodes.Get(c.Namespace, c.NodeName, metav1.GetOptions{}) + } + if err != nil { + return nil, err + } + return provisioner.NewLHV2Provisioner(device, c.BlockInfo, node, c.Nodes, c.NodeCache, CacheDiskTags) +} + func (c *Controller) updateDeviceStatus(device *diskv1.BlockDevice, devPath string) error { var newStatus diskv1.DeviceStatus var needAutoProvision bool diff --git a/pkg/provisioner/longhornv2.go b/pkg/provisioner/longhornv2.go new file mode 100644 index 00000000..15874ebf --- /dev/null +++ b/pkg/provisioner/longhornv2.go @@ -0,0 +1,188 @@ +package provisioner + +import ( + "errors" + "fmt" + "os" + "reflect" + "strings" + + longhornv1 "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2" + "github.com/sirupsen/logrus" + + diskv1 "github.com/harvester/node-disk-manager/pkg/apis/harvesterhci.io/v1beta1" + "github.com/harvester/node-disk-manager/pkg/block" + ctllonghornv1 "github.com/harvester/node-disk-manager/pkg/generated/controllers/longhorn.io/v1beta2" +) + +type LonghornV2Provisioner struct { + *LonghornV1Provisioner +} + +func NewLHV2Provisioner( + device *diskv1.BlockDevice, + block block.Info, + nodeObj *longhornv1.Node, + nodesClient ctllonghornv1.NodeClient, + nodesClientCache ctllonghornv1.NodeCache, + cacheDiskTags *DiskTags, +) (Provisioner, error) { + if !cacheDiskTags.Initialized() { + return nil, errors.New(ErrorCacheDiskTagsNotInitialized) + } + baseProvisioner := &provisioner{ + name: TypeLonghornV2, + blockInfo: block, + device: device, + } + return &LonghornV2Provisioner{ + &LonghornV1Provisioner{ + provisioner: baseProvisioner, + nodeObj: nodeObj, + nodesClient: nodesClient, + nodesClientCache: nodesClientCache, + cacheDiskTags: cacheDiskTags, + semaphoreObj: nil, + }, + }, nil +} + +// Format is a no-op for V2 disks, but we still need to return +// isFormatComplete == true to indicate the disk is ready for use. +func (p *LonghornV2Provisioner) Format(string) (isFormatComplete, isRequeueNeeded bool, err error) { + isFormatComplete = true + return +} + +// UnFormat is a no-op for V2 disks +func (p *LonghornV2Provisioner) UnFormat() (isRequeueNeeded bool, err error) { + return +} + +// Provision adds the block device to longhorn's list of disks. Longhorn's admission +// webhook will deny the update if the V2 engine isn't enabled (you'll see something +// like 'admission webhook "validator.longhorn.io" denied the request: update disk on +// node harvester-node-0 error: The disk 989754e4e66edadfd3390974a1aba3f8(/dev/sda) is +// a block device, but the SPDK feature is not enabled') +func (p *LonghornV2Provisioner) Provision() (isRequeueNeeded bool, err error) { + logrus.WithFields(logrus.Fields{ + "provisioner": p.name, + "device": p.device.Name, + }).Info("Provisioning Longhorn block device") + + nodeObjCpy := p.nodeObj.DeepCopy() + tags := []string{} + if p.device.Spec.Tags != nil { + tags = p.device.Spec.Tags + } + + // Here we want either a BDF path, or a /dev/disk/by-id/ path (not the short + // path like /dev/sdx which might change) + devPath, err := resolveLonghornV2DevPath(p.device) + if err != nil { + // Probably no point requeuing if we can't resolve the device path, + // because really what's going to change between this call and the next...? + return false, err + } + + // If diskDriver is an empty string, longhorn will map that to "auto" internally + diskDriver := p.device.Spec.Provisioner.Longhorn.DiskDriver + + diskSpec := longhornv1.DiskSpec{ + Type: longhornv1.DiskTypeBlock, + Path: devPath, + AllowScheduling: true, + EvictionRequested: false, + StorageReserved: 0, + Tags: tags, + DiskDriver: diskDriver, + } + + // We're intentionally not trying to sync disk tags from longhorn if the + // disk already exists, because the Harvester blockdevice CR is meant to + // be the source of truth. + + nodeObjCpy.Spec.Disks[p.device.Name] = diskSpec + if !reflect.DeepEqual(p.nodeObj, nodeObjCpy) { + if _, err := p.nodesClient.Update(nodeObjCpy); err != nil { + return true, err + } + } + + if !diskv1.DiskAddedToNode.IsTrue(p.device) { + msg := fmt.Sprintf("Added disk %s to longhorn node `%s` as an additional disk", p.device.Name, p.nodeObj.Name) + setCondDiskAddedToNodeTrue(p.device, msg, diskv1.ProvisionPhaseProvisioned) + } + + p.cacheDiskTags.UpdateDiskTags(p.device.Name, p.device.Spec.Tags) + return +} + +func (p *LonghornV2Provisioner) UnProvision() (isRequeueNeeded bool, err error) { + // The LH v1 UnProvision function works just fine for unprovisioning + // a V2 volume. The only thing it does that's not necessary for V2 + // is the potential call to unmountTheBrokenDisk(), but that doesn't + // do anything if there's no filesystem mounted, so it's a no-op. + return p.LonghornV1Provisioner.UnProvision() +} + +func (p *LonghornV2Provisioner) Update() (isRequeueNeeded bool, err error) { + // Sync disk tags (we can just use the V1 implementation for this for now) + isRequeueNeeded, err = p.LonghornV1Provisioner.Update() + if err != nil { + return + } + + // Sync disk driver + if targetDisk, found := p.nodeObj.Spec.Disks[p.device.Name]; found { + nodeCpy := p.nodeObj.DeepCopy() + targetDisk.DiskDriver = p.device.Spec.Provisioner.Longhorn.DiskDriver + nodeCpy.Spec.Disks[p.device.Name] = targetDisk + if _, err = p.nodesClient.Update(nodeCpy); err != nil { + isRequeueNeeded = true + } + } + return +} + +// resolveLonghornV2DevPath will return a BDF path if possible for virtio or +// NVMe devices, then will fall back to /dev/disk/by-id (which requires the +// disk to have a WWN). For details on BDF pathing, see +// https://longhorn.io/docs/1.7.1/v2-data-engine/features/node-disk-support/ +func resolveLonghornV2DevPath(device *diskv1.BlockDevice) (string, error) { + if device.Status.DeviceStatus.Details.DeviceType != diskv1.DeviceTypeDisk { + return "", fmt.Errorf("device type must be disk to resolve Longhorn V2 device path (type is %s)", + device.Status.DeviceStatus.Details.DeviceType) + } + devPath := "" + if device.Status.DeviceStatus.Details.StorageController == string(diskv1.StorageControllerVirtio) || + device.Status.DeviceStatus.Details.StorageController == string(diskv1.StorageControllerNVMe) { + // In both of these cases, we should (hopefully!) be able to extract BDF from BusPath + if strings.HasPrefix(device.Status.DeviceStatus.Details.BusPath, "pci-") { + devPath = strings.Split(device.Status.DeviceStatus.Details.BusPath, "-")[1] + } + if len(devPath) > 0 { + return devPath, nil + } + logrus.WithFields(logrus.Fields{ + "device": device.Name, + "buspath": device.Status.DeviceStatus.Details.BusPath, + }).Warn("Unable to extract BDF from BusPath, falling back to WWN") + } + if wwn := device.Status.DeviceStatus.Details.WWN; valueExists(wwn) { + devPath = "/dev/disk/by-id/wwn-" + wwn + _, err := os.Stat(devPath) + if err == nil { + return devPath, nil + } + logrus.WithFields(logrus.Fields{ + "device": device.Name, + "wwn": device.Status.DeviceStatus.Details.WWN, + }).Warn("/dev/disk/by-id/wwn-* path does not exist for device") + } + // TODO: see if we can find something else under /dev/disk/by-id, for + // example maybe there's a serial number but no WWN. In the "no WWN" + // case, maybe it's sufficient to just take whatever path we can find + // under /dev/disk/by-id that links back to the device...? + return "", fmt.Errorf("unable to resolve Longhorn V2 device path; %s has no WWN and no BDF", device.Name) +}