From e479b051bca142ee3b2352159e22b574240a9acc Mon Sep 17 00:00:00 2001
From: Travis Nielsen <tnielsen@redhat.com>
Date: Tue, 28 May 2024 17:20:43 -0600
Subject: [PATCH 1/2] osd: configure cluster full settings when osds fill up

When the clusters reach full, nearfull, or backfill full thresholds
ceph will raise health warnings and stop allowing IO or backfill
depending on the threshold. These settings require special ceph
commands instead of being generic ceph config. Allow these settings
to be set from the CephCluster CR in the spec.storage
section.

Signed-off-by: Travis Nielsen <tnielsen@redhat.com>
---
 .../CRDs/Cluster/ceph-cluster-crd.md          |  3 +
 Documentation/CRDs/specification.md           | 36 +++++++
 .../charts/rook-ceph/templates/resources.yaml | 18 ++++
 deploy/examples/cluster.yaml                  |  6 ++
 deploy/examples/crds.yaml                     | 18 ++++
 pkg/apis/ceph.rook.io/v1/types.go             | 18 ++++
 pkg/daemon/ceph/client/osd.go                 |  7 +-
 pkg/operator/ceph/cluster/cluster.go          | 64 +++++++++++++
 pkg/operator/ceph/cluster/cluster_test.go     | 95 +++++++++++++++++++
 tests/framework/installer/ceph_manifests.go   |  8 ++
 10 files changed, 271 insertions(+), 2 deletions(-)
diff --git a/Documentation/CRDs/Cluster/ceph-cluster-crd.md b/Documentation/CRDs/Cluster/ceph-cluster-crd.md
index b0eaf8a7655c..168dcb1415cf 100755
--- a/Documentation/CRDs/Cluster/ceph-cluster-crd.md
+++ b/Documentation/CRDs/Cluster/ceph-cluster-crd.md
@@ -86,6 +86,9 @@ For more details on the mons and when to choose a number other than `3`, see the
         * For non-PVCs: `placement.all` and `placement.osd`
         * For PVCs: `placement.all` and inside the storageClassDeviceSets from the `placement` or `preparePlacement`
     * `flappingRestartIntervalHours`: Defines the time for which an OSD pod will sleep before restarting, if it stopped due to flapping. Flapping occurs where OSDs are marked `down` by Ceph more than 5 times in 600 seconds. The OSDs will stay down when flapping since they likely have a bad disk or other issue that needs investigation. If the issue with the OSD is fixed manually, the OSD pod can be manually restarted. The sleep is disabled if this interval is set to 0.
+    * `fullRatio`: The ratio at which Ceph should block IO if the OSDs are too full. The default is 0.95.
+    * `backfillFullRatio`: The ratio at which Ceph should stop backfilling data if the OSDs are too full. The default is 0.90.
+    * `nearFullRatio`: The ratio at which Ceph should raise a health warning if the cluster is almost full. The default is 0.85.
 * `disruptionManagement`: The section for configuring management of daemon disruptions
     * `managePodBudgets`: if `true`, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the [design](https://github.com/rook/rook/blob/master/design/ceph/ceph-managed-disruptionbudgets.md). The operator will block eviction of OSDs by default and unblock them safely when drains are detected.
     * `osdMaintenanceTimeout`: is a duration in minutes that determines how long an entire failureDomain like `region/zone/host` will be held in `noout` (in addition to the default DOWN/OUT interval) when it is draining. The default value is `30` minutes.
diff --git a/Documentation/CRDs/specification.md b/Documentation/CRDs/specification.md
index 61c329b27e06..f03bc8648ca2 100644
--- a/Documentation/CRDs/specification.md
+++ b/Documentation/CRDs/specification.md
@@ -12170,6 +12170,42 @@ User needs to manually restart the OSD pod if they manage to fix the underlying
 The sleep will be disabled if this interval is set to 0.</p>
 </td>
 </tr>
+<tr>
+<td>
+<code>fullRatio</code><br/>
+<em>
+float64
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>nearFullRatio</code><br/>
+<em>
+float64
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85.</p>
+</td>
+</tr>
+<tr>
+<td>
+<code>backfillFullRatio</code><br/>
+<em>
+float64
+</em>
+</td>
+<td>
+<em>(Optional)</em>
+<p>BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90.</p>
+</td>
+</tr>
 </tbody>
 </table>
 <h3 id="ceph.rook.io/v1.StoreType">StoreType
diff --git a/deploy/charts/rook-ceph/templates/resources.yaml b/deploy/charts/rook-ceph/templates/resources.yaml
index c8c58dbc19a3..bac4fb8a1e21 100644
--- a/deploy/charts/rook-ceph/templates/resources.yaml
+++ b/deploy/charts/rook-ceph/templates/resources.yaml
@@ -3152,6 +3152,12 @@ spec:
                   description: A spec for available storage in the cluster and how it should be used
                   nullable: true
                   properties:
+                    backfillFullRatio:
+                      description: BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
                     config:
                       additionalProperties:
                         type: string
@@ -3192,6 +3198,18 @@ spec:
                         User needs to manually restart the OSD pod if they manage to fix the underlying OSD flapping issue before the restart interval.
                         The sleep will be disabled if this interval is set to 0.
                       type: integer
+                    fullRatio:
+                      description: FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
+                    nearFullRatio:
+                      description: NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
                     nodes:
                       items:
                         description: Node is a storage nodes
diff --git a/deploy/examples/cluster.yaml b/deploy/examples/cluster.yaml
index 1c0c68434075..902577817d3e 100644
--- a/deploy/examples/cluster.yaml
+++ b/deploy/examples/cluster.yaml
@@ -272,6 +272,12 @@ spec:
     onlyApplyOSDPlacement: false
     # Time for which an OSD pod will sleep before restarting, if it stopped due to flapping
     # flappingRestartIntervalHours: 24
+    # The ratio at which Ceph should block IO if the OSDs are too full. The default is 0.95.
+    # fullRatio: 0.95
+    # The ratio at which Ceph should stop backfilling data if the OSDs are too full. The default is 0.90.
+    # backfillFullRatio: 0.90
+    # The ratio at which Ceph should raise a health warning if the OSDs are almost full. The default is 0.85.
+    # nearFullRatio: 0.85
   # The section for configuring management of daemon disruptions during upgrade or fencing.
   disruptionManagement:
     # If true, the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically
diff --git a/deploy/examples/crds.yaml b/deploy/examples/crds.yaml
index a2bfaa060c8b..0a510790c7e6 100644
--- a/deploy/examples/crds.yaml
+++ b/deploy/examples/crds.yaml
@@ -3150,6 +3150,12 @@ spec:
                   description: A spec for available storage in the cluster and how it should be used
                   nullable: true
                   properties:
+                    backfillFullRatio:
+                      description: BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
                     config:
                       additionalProperties:
                         type: string
@@ -3190,6 +3196,18 @@ spec:
                         User needs to manually restart the OSD pod if they manage to fix the underlying OSD flapping issue before the restart interval.
                         The sleep will be disabled if this interval is set to 0.
                       type: integer
+                    fullRatio:
+                      description: FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
+                    nearFullRatio:
+                      description: NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85.
+                      maximum: 1
+                      minimum: 0
+                      nullable: true
+                      type: number
                     nodes:
                       items:
                         description: Node is a storage nodes
diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go
index 306b4078a895..f305f95530a0 100755
--- a/pkg/apis/ceph.rook.io/v1/types.go
+++ b/pkg/apis/ceph.rook.io/v1/types.go
@@ -2839,6 +2839,24 @@ type StorageScopeSpec struct {
 	// User needs to manually restart the OSD pod if they manage to fix the underlying OSD flapping issue before the restart interval.
 	// The sleep will be disabled if this interval is set to 0.
 	FlappingRestartIntervalHours int `json:"flappingRestartIntervalHours"`
+	// FullRatio is the ratio at which the cluster is considered full and ceph will stop accepting writes. Default is 0.95.
+	// +kubebuilder:validation:Minimum=0.0
+	// +kubebuilder:validation:Maximum=1.0
+	// +optional
+	// +nullable
+	FullRatio *float64 `json:"fullRatio,omitempty"`
+	// NearFullRatio is the ratio at which the cluster is considered nearly full and will raise a ceph health warning. Default is 0.85.
+	// +kubebuilder:validation:Minimum=0.0
+	// +kubebuilder:validation:Maximum=1.0
+	// +optional
+	// +nullable
+	NearFullRatio *float64 `json:"nearFullRatio,omitempty"`
+	// BackfillFullRatio is the ratio at which the cluster is too full for backfill. Backfill will be disabled if above this threshold. Default is 0.90.
+	// +kubebuilder:validation:Minimum=0.0
+	// +kubebuilder:validation:Maximum=1.0
+	// +optional
+	// +nullable
+	BackfillFullRatio *float64 `json:"backfillFullRatio,omitempty"`
 }
 
 // OSDStore is the backend storage type used for creating the OSDs
diff --git a/pkg/daemon/ceph/client/osd.go b/pkg/daemon/ceph/client/osd.go
index bfdb7da4b617..65c266d2d434 100644
--- a/pkg/daemon/ceph/client/osd.go
+++ b/pkg/daemon/ceph/client/osd.go
@@ -65,8 +65,11 @@ type OSDDump struct {
 		Up  json.Number `json:"up"`
 		In  json.Number `json:"in"`
 	} `json:"osds"`
-	Flags          string              `json:"flags"`
-	CrushNodeFlags map[string][]string `json:"crush_node_flags"`
+	Flags             string              `json:"flags"`
+	CrushNodeFlags    map[string][]string `json:"crush_node_flags"`
+	FullRatio         float64             `json:"full_ratio"`
+	BackfillFullRatio float64             `json:"backfillfull_ratio"`
+	NearFullRatio     float64             `json:"nearfull_ratio"`
 }
 
 // IsFlagSet checks if an OSD flag is set
diff --git a/pkg/operator/ceph/cluster/cluster.go b/pkg/operator/ceph/cluster/cluster.go
index 344da2b4e163..e3c6e8df710c 100755
--- a/pkg/operator/ceph/cluster/cluster.go
+++ b/pkg/operator/ceph/cluster/cluster.go
@@ -20,6 +20,7 @@ package cluster
 import (
 	"context"
 	"fmt"
+	"math"
 	"os"
 	"os/exec"
 	"path"
@@ -474,6 +475,10 @@ func (c *cluster) postMonStartupActions() error {
 		return errors.Wrap(err, "")
 	}
 
+	if err := c.configureStorageSettings(); err != nil {
+		return errors.Wrap(err, "failed to configure storage settings")
+	}
+
 	crushRoot := client.GetCrushRootFromSpec(c.Spec)
 	if crushRoot != "default" {
 		// Remove the root=default and replicated_rule which are created by
@@ -492,6 +497,65 @@ func (c *cluster) postMonStartupActions() error {
 	return nil
 }
 
+func (c *cluster) configureStorageSettings() error {
+	if !c.shouldSetClusterFullSettings() {
+		return nil
+	}
+	osdDump, err := client.GetOSDDump(c.context, c.ClusterInfo)
+	if err != nil {
+		return errors.Wrap(err, "failed to get osd dump for setting cluster full settings")
+	}
+
+	if err := c.setClusterFullRatio("set-full-ratio", c.Spec.Storage.FullRatio, osdDump.FullRatio); err != nil {
+		return err
+	}
+
+	if err := c.setClusterFullRatio("set-backfillfull-ratio", c.Spec.Storage.BackfillFullRatio, osdDump.BackfillFullRatio); err != nil {
+		return err
+	}
+
+	if err := c.setClusterFullRatio("set-nearfull-ratio", c.Spec.Storage.NearFullRatio, osdDump.NearFullRatio); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (c *cluster) setClusterFullRatio(ratioCommand string, desiredRatio *float64, actualRatio float64) error {
+	if !shouldUpdateFloatSetting(desiredRatio, actualRatio) {
+		if desiredRatio != nil {
+			logger.Infof("desired value %s=%.2f is already set", ratioCommand, *desiredRatio)
+		}
+		return nil
+	}
+	desiredStringVal := fmt.Sprintf("%.2f", *desiredRatio)
+	logger.Infof("updating %s from %.2f to %s", ratioCommand, actualRatio, desiredStringVal)
+	args := []string{"osd", ratioCommand, desiredStringVal}
+	cephCmd := client.NewCephCommand(c.context, c.ClusterInfo, args)
+	output, err := cephCmd.Run()
+	if err != nil {
+		return errors.Wrapf(err, "failed to update %s to %q. %s", ratioCommand, desiredStringVal, output)
+	}
+	return nil
+}
+
+func shouldUpdateFloatSetting(desired *float64, actual float64) bool {
+	if desired == nil {
+		return false
+	}
+	if *desired == actual {
+		return false
+	}
+	if actual != 0 && math.Abs(*desired-actual)/actual > 0.01 {
+		return true
+	}
+	return false
+}
+
+func (c *cluster) shouldSetClusterFullSettings() bool {
+	return c.Spec.Storage.FullRatio != nil || c.Spec.Storage.BackfillFullRatio != nil || c.Spec.Storage.NearFullRatio != nil
+}
+
 func (c *cluster) updateConfigStoreFromCRD() error {
 	monStore := config.GetMonStore(c.context, c.ClusterInfo)
 	return monStore.SetAllMultiple(c.Spec.CephConfig)
diff --git a/pkg/operator/ceph/cluster/cluster_test.go b/pkg/operator/ceph/cluster/cluster_test.go
index 84f86e29d1ab..6656ee1bb641 100644
--- a/pkg/operator/ceph/cluster/cluster_test.go
+++ b/pkg/operator/ceph/cluster/cluster_test.go
@@ -335,3 +335,98 @@ func TestTelemetry(t *testing.T) {
 		c.reportTelemetry()
 	})
 }
+func TestClusterFullSettings(t *testing.T) {
+	actualFullRatio := 0.95
+	actualBackfillFullRatio := 0.90
+	actualNearFullRatio := 0.85
+	setFullRatio := false
+	setBackfillFullRatio := false
+	setNearFullRatio := false
+	clientset := testop.New(t, 1)
+	context := &clusterd.Context{Clientset: clientset}
+	c := cluster{
+		context:     context,
+		ClusterInfo: cephclient.AdminTestClusterInfo("cluster"),
+		Spec:        &cephv1.ClusterSpec{},
+	}
+	context.Executor = &exectest.MockExecutor{
+		MockExecuteCommandWithOutput: func(command string, args ...string) (string, error) {
+			logger.Infof("Command: %s %v", command, args)
+			if args[0] == "osd" {
+				if args[1] == "dump" {
+					return fmt.Sprintf(
+						`{	"full_ratio": %.2f,
+					"backfillfull_ratio": %.2f,
+					"nearfull_ratio": %.2f}`, actualFullRatio, actualBackfillFullRatio, actualNearFullRatio), nil
+				}
+				if args[1] == "set-full-ratio" {
+					assert.Equal(t, fmt.Sprintf("%.2f", *c.Spec.Storage.FullRatio), args[2])
+					setFullRatio = true
+					return "", nil
+				}
+				if args[1] == "set-nearfull-ratio" {
+					assert.Equal(t, fmt.Sprintf("%.2f", *c.Spec.Storage.NearFullRatio), args[2])
+					setNearFullRatio = true
+					return "", nil
+				}
+				if args[1] == "set-backfillfull-ratio" {
+					assert.Equal(t, fmt.Sprintf("%.2f", *c.Spec.Storage.BackfillFullRatio), args[2])
+					setBackfillFullRatio = true
+					return "", nil
+				}
+			}
+			return "", errors.New("mock error to simulate failure of mon store config")
+		},
+	}
+	t.Run("no settings", func(t *testing.T) {
+		err := c.configureStorageSettings()
+		assert.NoError(t, err)
+		assert.False(t, setFullRatio)
+		assert.False(t, setNearFullRatio)
+		assert.False(t, setBackfillFullRatio)
+	})
+
+	val91 := 0.91
+	val90 := 0.90
+	val85 := 0.85
+	val80 := 0.80
+
+	t.Run("all settings applied", func(t *testing.T) {
+		c.Spec.Storage.FullRatio = &val90
+		c.Spec.Storage.NearFullRatio = &val80
+		c.Spec.Storage.BackfillFullRatio = &val85
+		err := c.configureStorageSettings()
+		assert.NoError(t, err)
+		assert.True(t, setFullRatio)
+		assert.True(t, setNearFullRatio)
+		assert.True(t, setBackfillFullRatio)
+	})
+
+	t.Run("no settings changed", func(t *testing.T) {
+		setFullRatio = false
+		setBackfillFullRatio = false
+		setNearFullRatio = false
+		c.Spec.Storage.FullRatio = &actualFullRatio
+		c.Spec.Storage.NearFullRatio = &actualNearFullRatio
+		c.Spec.Storage.BackfillFullRatio = &actualBackfillFullRatio
+		err := c.configureStorageSettings()
+		assert.NoError(t, err)
+		assert.False(t, setFullRatio)
+		assert.False(t, setNearFullRatio)
+		assert.False(t, setBackfillFullRatio)
+	})
+
+	t.Run("one setting applied", func(t *testing.T) {
+		setFullRatio = false
+		setBackfillFullRatio = false
+		setNearFullRatio = false
+		c.Spec.Storage.FullRatio = &val91
+		c.Spec.Storage.NearFullRatio = nil
+		c.Spec.Storage.BackfillFullRatio = nil
+		err := c.configureStorageSettings()
+		assert.NoError(t, err)
+		assert.True(t, setFullRatio)
+		assert.False(t, setNearFullRatio)
+		assert.False(t, setBackfillFullRatio)
+	})
+}
diff --git a/tests/framework/installer/ceph_manifests.go b/tests/framework/installer/ceph_manifests.go
index bb8fb0175ae2..7aeb28bd1018 100644
--- a/tests/framework/installer/ceph_manifests.go
+++ b/tests/framework/installer/ceph_manifests.go
@@ -238,6 +238,14 @@ spec:
     config:
       databaseSizeMB: "1024"
 `
+		// Append the storage settings if it's not an upgrade from 1.13 where the settings do not exist
+		if m.settings.RookVersion != Version1_13 {
+			clusterSpec += `
+    fullRatio: 0.96
+    backfillFullRatio: 0.91
+    nearFullRatio: 0.88
+`
+		}
 	}
 
 	if m.settings.ConnectionsEncrypted {

From 9824dec3bd1207ec623cfec64690bee8ae874687 Mon Sep 17 00:00:00 2001
From: Ceph Jenkins <ceph-jenkins@redhat.com>
Date: Fri, 31 May 2024 04:02:19 -0400
Subject: [PATCH 2/2] csv: add additional csv changes that other commits bring

add generated csv changes

Signed-off-by: Ceph Jenkins <ceph-jenkins@redhat.com>
---
 build/csv/ceph/ceph.rook.io_cephclusters.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/build/csv/ceph/ceph.rook.io_cephclusters.yaml b/build/csv/ceph/ceph.rook.io_cephclusters.yaml
index 6843cd9197cf..4e7b1f2b4d1e 100644
--- a/build/csv/ceph/ceph.rook.io_cephclusters.yaml
+++ b/build/csv/ceph/ceph.rook.io_cephclusters.yaml
@@ -1502,6 +1502,11 @@ spec:
               storage:
                 nullable: true
                 properties:
+                  backfillFullRatio:
+                    maximum: 1
+                    minimum: 0
+                    nullable: true
+                    type: number
                   config:
                     additionalProperties:
                       type: string
@@ -1531,6 +1536,16 @@ spec:
                     x-kubernetes-preserve-unknown-fields: true
                   flappingRestartIntervalHours:
                     type: integer
+                  fullRatio:
+                    maximum: 1
+                    minimum: 0
+                    nullable: true
+                    type: number
+                  nearFullRatio:
+                    maximum: 1
+                    minimum: 0
+                    nullable: true
+                    type: number
                   nodes:
                     items:
                       properties: