Skip to content

Commit

Permalink
osd: wait for the pgs to be clean
Browse files Browse the repository at this point in the history
If migration of the OSDs is enabled, operator should wait
for the pgs to be clean after the all the OSDs have reconciled.
This will help add the exponential backoff for the operator
restarts when OSD migration is needed but PGs are still rebalancing the
data after last OSD migration.

Signed-off-by: sp98 <sapillai@redhat.com>
(cherry picked from commit 43d3bf1)
  • Loading branch information
sp98 committed Jan 23, 2024
1 parent 2c7f55c commit 6765bce
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions pkg/operator/ceph/cluster/osd/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,20 +268,30 @@ func (c *Cluster) Start() error {
return errors.Wrapf(err, "failed to update ceph storage status")
}

if c.replaceOSD != nil {
delOpts := &k8sutil.DeleteOptions{MustDelete: true, WaitOptions: k8sutil.WaitOptions{Wait: true}}
err := k8sutil.DeleteConfigMap(c.clusterInfo.Context, c.context.Clientset, OSDReplaceConfigName, namespace, delOpts)
if err != nil {
return errors.Wrapf(err, "failed to delete the %q configmap", OSDReplaceConfigName)
if c.spec.Storage.Store.UpdateStore == OSDStoreUpdateConfirmation {
if c.replaceOSD != nil {
delOpts := &k8sutil.DeleteOptions{MustDelete: true, WaitOptions: k8sutil.WaitOptions{Wait: true}}
err := k8sutil.DeleteConfigMap(c.clusterInfo.Context, c.context.Clientset, OSDReplaceConfigName, namespace, delOpts)
if err != nil {
return errors.Wrapf(err, "failed to delete the %q configmap", OSDReplaceConfigName)
}
}

// Wait for PGs to be healthy before continuing the reconcile
_, err = c.waitForHealthyPGs()
// wait for the pgs to be healthy before attempting to migrate the next OSD
_, err := c.waitForHealthyPGs()
if err != nil {
return errors.Wrapf(err, "failed to wait for pgs to be healhty")
}

return errors.New("reconcile operator to replace OSDs that are pending migration")
// reconcile if migration of one or more OSD is pending.
osdsToReplace, err := c.getOSDWithNonMatchingStore()
if err != nil {
return errors.Wrapf(err, "failed to check if any OSD migration is pending")
}

if len(osdsToReplace) != 0 {
return errors.New("reconcile operator to replace OSDs that are pending migration")
}
}

logger.Infof("finished running OSDs in namespace %q", namespace)
Expand Down Expand Up @@ -846,7 +856,7 @@ func (c *Cluster) waitForHealthyPGs() (bool, error) {
if pgClean {
return true, nil
}
logger.Infof("waiting for PGs to be healthy after replacing an OSD, status: %q", pgHealthMsg)
logger.Infof("waiting for PGs to be healthy. PG status: %q", pgHealthMsg)
return false, nil
}

Expand Down

0 comments on commit 6765bce

Please sign in to comment.