diff --git a/go.mod b/go.mod index 0acd7f783..fabb21a29 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/longhorn/go-common-libs v0.0.0-20240729132251-9e1e0e6045c6 github.com/longhorn/go-spdk-helper v0.0.0-20240809041416-d679e629f379 github.com/longhorn/longhorn-engine v1.7.0-rc3 - github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809042332-aeec19edc7a0 + github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809071348-015a1f25f838 github.com/longhorn/types v0.0.0-20240725040629-473d671316c4 github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.9.3 diff --git a/go.sum b/go.sum index f48fa1c87..a6800e1a6 100644 --- a/go.sum +++ b/go.sum @@ -101,8 +101,8 @@ github.com/longhorn/go-spdk-helper v0.0.0-20240809041416-d679e629f379 h1:FNO6HGG github.com/longhorn/go-spdk-helper v0.0.0-20240809041416-d679e629f379/go.mod h1:UauA7GIVFR0g580fjKsHsY14Yv/lv4s2q8m6wqkx6Fw= github.com/longhorn/longhorn-engine v1.7.0-rc3 h1:YTt++OeSrEOlifz++8VAOH/aJ4lGShD2TaJP1ZaQ3Uw= github.com/longhorn/longhorn-engine v1.7.0-rc3/go.mod h1:2Hq/3QzW4fF2yUg+kauiAT3ps5WCKLMkrwXW2Wyfj9o= -github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809042332-aeec19edc7a0 h1:HGykUpNNdGFOv/91yCdvYeugrCYXo3uiyGs3OnchZuI= -github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809042332-aeec19edc7a0/go.mod h1:WBRlBFJg8RMTugI5ansazFGDdC6Uwa4QcftgIB6R4fQ= +github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809071348-015a1f25f838 h1:Egi79M1XaJgW07h8DKMVhv2RTCTVHtZXEH8FqMNEhgM= +github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809071348-015a1f25f838/go.mod h1:WBRlBFJg8RMTugI5ansazFGDdC6Uwa4QcftgIB6R4fQ= github.com/longhorn/nsfilelock v0.0.0-20200723175406-fa7c83ad0003 h1:Jw9uANsGcHTxp6HcC++/vN17LfeuDmozHI2j6DoZf5E= github.com/longhorn/nsfilelock v0.0.0-20200723175406-fa7c83ad0003/go.mod h1:0CLeXlf59Lg6C0kjLSDf47ft73Dh37CwymYRKWwAn04= github.com/longhorn/sparse-tools v0.0.0-20240703010727-92451e38077a h1:+o63c0oh7ZNKeQdc0Hawfzz5vRa4LiDvLOtJYjegtnk= diff --git a/vendor/github.com/longhorn/longhorn-spdk-engine/pkg/spdk/engine.go b/vendor/github.com/longhorn/longhorn-spdk-engine/pkg/spdk/engine.go index de53bf12b..a5d6d909e 100644 --- a/vendor/github.com/longhorn/longhorn-spdk-engine/pkg/spdk/engine.go +++ b/vendor/github.com/longhorn/longhorn-spdk-engine/pkg/spdk/engine.go @@ -599,16 +599,20 @@ func (e *Engine) ValidateAndUpdate(spdkClient *spdkclient.Client) (err error) { containValidReplica := false for replicaName, bdevName := range e.ReplicaBdevNameMap { - if e.ReplicaModeMap[replicaName] == types.ModeERR || e.ReplicaModeMap[replicaName] == types.ModeWO { + if e.ReplicaModeMap[replicaName] == types.ModeERR { + continue + } + if e.ReplicaModeMap[replicaName] != types.ModeWO && e.ReplicaModeMap[replicaName] != types.ModeRW { + e.log.Errorf("Engine found replica %s invalid mode %v during ValidateAndUpdate", replicaName, e.ReplicaModeMap[replicaName]) + e.ReplicaModeMap[replicaName] = types.ModeERR + updateRequired = true continue } - mode, err := e.validateAndUpdateReplicaMode(replicaName, bdevMap[bdevName]) + mode, err := e.validateAndUpdateReplicaNvme(replicaName, bdevMap[bdevName]) if err != nil { - if e.ReplicaModeMap[replicaName] != types.ModeERR { - e.log.WithError(err).Errorf("Replica %v is invalid, will update the mode from %s to ERR during ValidateAndUpdate", replicaName, e.ReplicaModeMap[replicaName]) - e.ReplicaModeMap[replicaName] = types.ModeERR - updateRequired = true - } + e.log.WithError(err).Errorf("Engine found valid nvme for replica %v, will update the mode from %s to ERR during ValidateAndUpdate", replicaName, e.ReplicaModeMap[replicaName]) + e.ReplicaModeMap[replicaName] = types.ModeERR + updateRequired = true continue } if e.ReplicaModeMap[replicaName] != mode { @@ -652,6 +656,19 @@ func (e *Engine) checkAndUpdateInfoFromReplicaNoLock() { e.ReplicaModeMap[replicaName] = types.ModeERR continue } + if e.ReplicaModeMap[replicaName] == types.ModeWO { + shallowCopyStatus, err := replicaServiceCli.ReplicaRebuildingDstShallowCopyCheck(replicaName) + if err != nil { + e.log.WithError(err).Warnf("failed to get rebuilding replica %s shallow copy info, will skip this replica and continue info update from replica", replicaName) + continue + } + if shallowCopyStatus.TotalState == helpertypes.ShallowCopyStateError || shallowCopyStatus.Error != "" { + e.log.Errorf("Engine found rebuilding replica %s error %v during info update from replica, will mark the mode from WO to ERR and continue info update from replica", replicaName, shallowCopyStatus.Error) + e.ReplicaModeMap[replicaName] = types.ModeERR + } + // No need to do anything if `shallowCopyStatus.TotalState == helpertypes.ShallowCopyStateComplete`, engine should leave the rebuilding logic to update its mode + continue + } // The ancestor check sequence: the backing image, then the oldest snapshot, finally head // TODO: Check the backing image first @@ -811,43 +828,34 @@ func (e *Engine) validateAndUpdateFrontend(subsystemMap map[string]*spdktypes.Nv return nil } -func (e *Engine) validateAndUpdateReplicaMode(replicaName string, bdev *spdktypes.BdevInfo) (mode types.Mode, err error) { +func (e *Engine) validateAndUpdateReplicaNvme(replicaName string, bdev *spdktypes.BdevInfo) (mode types.Mode, err error) { if bdev == nil { return types.ModeERR, fmt.Errorf("cannot find a bdev for replica %s", replicaName) } + bdevSpecSize := bdev.NumBlocks * uint64(bdev.BlockSize) if e.SpecSize != bdevSpecSize { - return types.ModeERR, fmt.Errorf("found mismatching between replica %s bdev spec size %d and the engine spec size %d for engine %s", replicaName, bdevSpecSize, e.SpecSize, e.Name) + return types.ModeERR, fmt.Errorf("found mismatching between replica bdev %s spec size %d and the engine %s spec size %d during replica %s mode validation", bdev.Name, bdevSpecSize, e.Name, e.SpecSize, replicaName) } - switch spdktypes.GetBdevType(bdev) { - case spdktypes.BdevTypeLvol: - replicaIP, _, err := net.SplitHostPort(e.ReplicaAddressMap[replicaName]) - if err != nil { - return types.ModeERR, err - } - if e.IP != replicaIP { - return types.ModeERR, fmt.Errorf("found mismatching between replica %s IP %s and the engine IP %s for engine %s", replicaName, replicaIP, e.IP, e.Name) - } - case spdktypes.BdevTypeNvme: - if len(*bdev.DriverSpecific.Nvme) != 1 { - return types.ModeERR, fmt.Errorf("found zero or multiple nvme info in a remote nvme base bdev %v for replica %s", bdev.Name, replicaName) - } - nvmeInfo := (*bdev.DriverSpecific.Nvme)[0] - if !strings.EqualFold(string(nvmeInfo.Trid.Adrfam), string(spdktypes.NvmeAddressFamilyIPv4)) || - !strings.EqualFold(string(nvmeInfo.Trid.Trtype), string(spdktypes.NvmeTransportTypeTCP)) { - return types.ModeERR, fmt.Errorf("found invalid address family %s and transport type %s in a remote nvme base bdev %s for replica %s", nvmeInfo.Trid.Adrfam, nvmeInfo.Trid.Trtype, bdev.Name, replicaName) - } - bdevAddr := net.JoinHostPort(nvmeInfo.Trid.Traddr, nvmeInfo.Trid.Trsvcid) - if e.ReplicaAddressMap[replicaName] != bdevAddr { - return types.ModeERR, fmt.Errorf("found mismatching between replica %s bdev address %s and the nvme bdev actual address %s", replicaName, e.ReplicaAddressMap[replicaName], bdevAddr) - } - // TODO: Validate NVMe controller state - // TODO: Verify Mode WO - default: - return types.ModeERR, fmt.Errorf("found invalid bdev type %v for replica %s ", spdktypes.GetBdevType(bdev), replicaName) + + if spdktypes.GetBdevType(bdev) != spdktypes.BdevTypeNvme { + return types.ModeERR, fmt.Errorf("found bdev type %v rather than %v during replica %s mode validation", spdktypes.GetBdevType(bdev), spdktypes.BdevTypeNvme, replicaName) + } + if len(*bdev.DriverSpecific.Nvme) != 1 { + return types.ModeERR, fmt.Errorf("found zero or multiple nvme info in a nvme base bdev %v during replica %s mode validation", bdev.Name, replicaName) + } + nvmeInfo := (*bdev.DriverSpecific.Nvme)[0] + if !strings.EqualFold(string(nvmeInfo.Trid.Adrfam), string(spdktypes.NvmeAddressFamilyIPv4)) || + !strings.EqualFold(string(nvmeInfo.Trid.Trtype), string(spdktypes.NvmeTransportTypeTCP)) { + return types.ModeERR, fmt.Errorf("found invalid address family %s and transport type %s in a remote nvme base bdev %s during replica %s mode validation", nvmeInfo.Trid.Adrfam, nvmeInfo.Trid.Trtype, bdev.Name, replicaName) + } + bdevAddr := net.JoinHostPort(nvmeInfo.Trid.Traddr, nvmeInfo.Trid.Trsvcid) + if e.ReplicaAddressMap[replicaName] != bdevAddr { + return types.ModeERR, fmt.Errorf("found mismatching between replica bdev %s address %s and the nvme bdev actual address %s during replica %s mode validation", bdev.Name, e.ReplicaAddressMap[replicaName], bdevAddr, replicaName) } + // TODO: Validate NVMe controller state - return types.ModeRW, nil + return e.ReplicaModeMap[replicaName], nil } func (e *Engine) ReplicaAdd(spdkClient *spdkclient.Client, dstReplicaName, dstReplicaAddress string) (err error) { diff --git a/vendor/modules.txt b/vendor/modules.txt index b7d3bee7d..a8d571106 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -230,7 +230,7 @@ github.com/longhorn/longhorn-engine/pkg/sync github.com/longhorn/longhorn-engine/pkg/types github.com/longhorn/longhorn-engine/pkg/util github.com/longhorn/longhorn-engine/pkg/util/disk -# github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809042332-aeec19edc7a0 +# github.com/longhorn/longhorn-spdk-engine v0.0.0-20240809071348-015a1f25f838 ## explicit; go 1.22.0 github.com/longhorn/longhorn-spdk-engine/pkg/api github.com/longhorn/longhorn-spdk-engine/pkg/client