diff --git a/PendingReleaseNotes.md b/PendingReleaseNotes.md index b5b24eda3d54..0cf183144e02 100644 --- a/PendingReleaseNotes.md +++ b/PendingReleaseNotes.md @@ -4,5 +4,6 @@ - Updating Ceph COSI driver images, this impact existing COSI `Buckets` and `BucketAccesses`, please update the `BucketClass` and `BucketAccessClass` for resolving refer [here](https://github.com/rook/rook/discussions/14297) +- During CephBlockPool updates, return an error if an invalid device class is specified. Pools with invalid device classes may start failing reconcile until the correct device class is specified. See #14057. ## Features diff --git a/pkg/daemon/ceph/client/pool.go b/pkg/daemon/ceph/client/pool.go index 95d778ec8077..82eae5e77eb9 100644 --- a/pkg/daemon/ceph/client/pool.go +++ b/pkg/daemon/ceph/client/pool.go @@ -253,16 +253,16 @@ func DeletePool(context *clusterd.Context, clusterInfo *ClusterInfo, name string logger.Infof("purging pool %q (id=%d)", name, pool.Number) args := []string{"osd", "pool", "delete", name, name, reallyConfirmFlag} - _, err = NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to delete pool %q", name) + return errors.Wrapf(err, "failed to delete pool %q. %s", name, string(output)) } // remove the crush rule for this pool and ignore the error in case the rule is still in use or not found args = []string{"osd", "crush", "rule", "rm", name} - _, err = NewCephCommand(context, clusterInfo, args).Run() + output, err = NewCephCommand(context, clusterInfo, args).Run() if err != nil { - logger.Errorf("failed to delete crush rule %q. %v", name, err) + logger.Errorf("failed to delete crush rule %q. %v. %s", name, err, string(output)) } logger.Infof("purge completed for pool %q", name) @@ -280,9 +280,9 @@ func givePoolAppTag(context *clusterd.Context, clusterInfo *ClusterInfo, poolNam } args := []string{"osd", "pool", "application", "enable", poolName, appName, confirmFlag} - _, err = NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to enable application %q on pool %q", appName, poolName) + return errors.Wrapf(err, "failed to enable application %q on pool %q. %s", appName, poolName, string(output)) } return nil @@ -456,7 +456,7 @@ func createReplicatedPoolForApp(context *clusterd.Context, clusterInfo *ClusterI if checkFailureDomain || pool.PoolSpec.DeviceClass != "" { if err = updatePoolCrushRule(context, clusterInfo, clusterSpec, pool); err != nil { - return nil + return errors.Wrapf(err, "failed to update crush rule for pool %q", pool.Name) } } return nil @@ -561,9 +561,9 @@ func extractPoolDetails(rule ruleSpec) (string, string) { func setCrushRule(context *clusterd.Context, clusterInfo *ClusterInfo, poolName, crushRule string) error { args := []string{"osd", "pool", "set", poolName, "crush_rule", crushRule} - _, err := NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to set crush rule %q", crushRule) + return errors.Wrapf(err, "failed to set crush rule %q. %s", crushRule, string(output)) } return nil } @@ -714,9 +714,9 @@ func createReplicationCrushRule(context *clusterd.Context, clusterInfo *ClusterI args = append(args, deviceClass) } - _, err := NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to create crush rule %s", ruleName) + return errors.Wrapf(err, "failed to create crush rule %s. %s", ruleName, string(output)) } return nil @@ -726,9 +726,9 @@ func createReplicationCrushRule(context *clusterd.Context, clusterInfo *ClusterI func SetPoolProperty(context *clusterd.Context, clusterInfo *ClusterInfo, name, propName, propVal string) error { args := []string{"osd", "pool", "set", name, propName, propVal} logger.Infof("setting pool property %q to %q on pool %q", propName, propVal, name) - _, err := NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to set pool property %q on pool %q", propName, name) + return errors.Wrapf(err, "failed to set pool property %q on pool %q. %s", propName, name, string(output)) } return nil } @@ -737,9 +737,9 @@ func SetPoolProperty(context *clusterd.Context, clusterInfo *ClusterInfo, name, func setPoolQuota(context *clusterd.Context, clusterInfo *ClusterInfo, poolName, quotaType, quotaVal string) error { args := []string{"osd", "pool", "set-quota", poolName, quotaType, quotaVal} logger.Infof("setting quota %q=%q on pool %q", quotaType, quotaVal, poolName) - _, err := NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to set %q quota on pool %q", quotaType, poolName) + return errors.Wrapf(err, "failed to set %q quota on pool %q. %s", quotaType, poolName, string(output)) } return nil } @@ -752,9 +752,9 @@ func SetPoolReplicatedSizeProperty(context *clusterd.Context, clusterInfo *Clust args = append(args, "--yes-i-really-mean-it") } - _, err := NewCephCommand(context, clusterInfo, args).Run() + output, err := NewCephCommand(context, clusterInfo, args).Run() if err != nil { - return errors.Wrapf(err, "failed to set pool property %q on pool %q", propName, poolName) + return errors.Wrapf(err, "failed to set pool property %q on pool %q. %s", propName, poolName, string(output)) } return nil diff --git a/pkg/daemon/ceph/client/pool_test.go b/pkg/daemon/ceph/client/pool_test.go index 28bacae2109a..3afe66b820ca 100644 --- a/pkg/daemon/ceph/client/pool_test.go +++ b/pkg/daemon/ceph/client/pool_test.go @@ -179,6 +179,11 @@ func testCreateReplicaPool(t *testing.T, failureDomain, crushRoot, deviceClass, assert.Equal(t, "12345", args[8]) return "", nil } + if args[2] == "get" { + assert.Equal(t, "mypool", args[3]) + assert.Equal(t, "all", args[4]) + return `{"pool":"replicapool","pool_id":2,"size":1,"min_size":1,"crush_rule":"replicapool_osd"}`, nil + } if args[2] == "set" { assert.Equal(t, "mypool", args[3]) if args[4] == "size" { @@ -203,8 +208,12 @@ func testCreateReplicaPool(t *testing.T, failureDomain, crushRoot, deviceClass, if args[1] == "crush" { crushRuleCreated = true assert.Equal(t, "rule", args[2]) + if args[3] == "dump" { + assert.Equal(t, "replicapool_osd", args[4]) + return `{"rule_id": 3,"rule_name": "replicapool_osd","type": 1}`, nil + } assert.Equal(t, "create-replicated", args[3]) - assert.Equal(t, "mypool", args[4]) + assert.Contains(t, args[4], "mypool") if crushRoot == "" { assert.Equal(t, "cluster-crush-root", args[5]) } else { diff --git a/pkg/operator/ceph/pool/controller.go b/pkg/operator/ceph/pool/controller.go index 58d0f13379cf..24d080ebf644 100644 --- a/pkg/operator/ceph/pool/controller.go +++ b/pkg/operator/ceph/pool/controller.go @@ -366,7 +366,7 @@ func (r *ReconcileCephBlockPool) reconcileCreatePool(clusterInfo *cephclient.Clu poolSpec := cephBlockPool.ToNamedPoolSpec() err := createPool(r.context, clusterInfo, cephCluster, &poolSpec) if err != nil { - return opcontroller.ImmediateRetryResult, errors.Wrapf(err, "failed to create pool %q.", cephBlockPool.GetName()) + return opcontroller.ImmediateRetryResult, errors.Wrapf(err, "failed to configure pool %q.", cephBlockPool.GetName()) } // Let's return here so that on the initial creation we don't check for update right away @@ -382,7 +382,7 @@ func createPool(context *clusterd.Context, clusterInfo *cephclient.ClusterInfo, // create the pool logger.Infof("creating pool %q in namespace %q", p.Name, clusterInfo.Namespace) if err := cephclient.CreatePool(context, clusterInfo, clusterSpec, *p); err != nil { - return errors.Wrapf(err, "failed to create pool %q", p.Name) + return errors.Wrapf(err, "failed to configure pool %q", p.Name) } if p.Application != poolApplicationNameRBD {