From 52ca68d46e3f0ef3e1df8393ad8760ab5cf01efa Mon Sep 17 00:00:00 2001
From: Mathias Chapelain <mathias.chapelain@proton.ch>
Date: Fri, 23 Aug 2024 15:32:56 +0200
Subject: [PATCH 1/8] osd: discover metadata and wal devices for raw device
 cleanup

When doing the clean-up of raw device OSDs, it was not taking metadata
and wal devices into account. This commit discover them via the output
of `ceph-volume raw list`, which is returning `device_db` and/or
`device_wal` if they are present.

After discovering the metadata and wal device it also clean them and
close the encrypted device if any.

Signed-off-by: Mathias Chapelain <mathias.chapelain@proton.ch>
---
 pkg/daemon/ceph/cleanup/disk.go | 30 +++++++++++++++++++-----------
 pkg/daemon/ceph/osd/volume.go   | 22 +++++++++++++++-------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/pkg/daemon/ceph/cleanup/disk.go b/pkg/daemon/ceph/cleanup/disk.go
index 484c48bf9557..e6d148a8cd85 100644
--- a/pkg/daemon/ceph/cleanup/disk.go
+++ b/pkg/daemon/ceph/cleanup/disk.go
@@ -179,21 +179,29 @@ func (s *DiskSanitizer) executeSanitizeCommand(osdInfo oposd.OSDInfo, wg *sync.W
 	// On return, notify the WaitGroup that we’re done
 	defer wg.Done()
 
-	output, err := s.context.Executor.ExecuteCommandWithCombinedOutput(shredUtility, s.buildShredArgs(osdInfo.BlockPath)...)
-	if err != nil {
-		logger.Errorf("failed to sanitize osd disk %q. %s. %v", osdInfo.BlockPath, output, err)
-	}
+	for _, device := range []string{osdInfo.BlockPath, osdInfo.MetadataPath, osdInfo.WalPath} {
+		if device == "" {
+			continue
+		}
 
-	logger.Infof("%s\n", output)
-	logger.Infof("successfully sanitized osd disk %q", osdInfo.BlockPath)
+		output, err := s.context.Executor.ExecuteCommandWithCombinedOutput(shredUtility, s.buildShredArgs(device)...)
+
+		logger.Infof("%s\n", output)
 
-	// If the device is encrypted let's close it after sanitizing its content
-	if osdInfo.Encrypted {
-		err := osd.CloseEncryptedDevice(s.context, osdInfo.BlockPath)
 		if err != nil {
-			logger.Errorf("failed to close encrypted osd disk %q. %v", osdInfo.BlockPath, err)
+			logger.Errorf("failed to sanitize osd disk %q. %s. %v", device, output, err)
 		} else {
-			logger.Infof("successfully closed encrypted osd disk %q", osdInfo.BlockPath)
+			logger.Infof("successfully sanitized osd disk %q", device)
+		}
+
+		// If the device is encrypted let's close it after sanitizing its content
+		if osdInfo.Encrypted {
+			err := osd.CloseEncryptedDevice(s.context, device)
+			if err != nil {
+				logger.Errorf("failed to close encrypted osd disk %q. %v", device, err)
+			} else {
+				logger.Infof("successfully closed encrypted osd disk %q", device)
+			}
 		}
 	}
 }
diff --git a/pkg/daemon/ceph/osd/volume.go b/pkg/daemon/ceph/osd/volume.go
index a01b67055e84..75029082b193 100644
--- a/pkg/daemon/ceph/osd/volume.go
+++ b/pkg/daemon/ceph/osd/volume.go
@@ -64,11 +64,13 @@ var (
 )
 
 type osdInfoBlock struct {
-	CephFsid string `json:"ceph_fsid"`
-	Device   string `json:"device"`
-	OsdID    int    `json:"osd_id"`
-	OsdUUID  string `json:"osd_uuid"`
-	Type     string `json:"type"`
+	CephFsid  string `json:"ceph_fsid"`
+	Device    string `json:"device"`
+	DeviceDb  string `json:"device_db"`
+	DeviceWal string `json:"device_wal"`
+	OsdID     int    `json:"osd_id"`
+	OsdUUID   string `json:"osd_uuid"`
+	Type      string `json:"type"`
 }
 
 type osdInfo struct {
@@ -993,6 +995,8 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster
 	// blockPath represents the path of the OSD block
 	// it can be the one passed from the function's call or discovered by the c-v list command
 	var blockPath string
+	var blockMetadataPath string
+	var blockWalPath string
 
 	// If block is passed, check if it's an encrypted device, this is needed to get the correct
 	// device path and populate the OSDInfo for that OSD
@@ -1123,8 +1127,12 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster
 		// If no block is specified let's take the one we discovered
 		if setDevicePathFromList {
 			blockPath = osdInfo.Device
+			blockMetadataPath = osdInfo.DeviceDb
+			blockWalPath = osdInfo.DeviceWal
 		} else {
 			blockPath = block
+			blockMetadataPath = metadataBlock
+			blockWalPath = walBlock
 		}
 
 		osdStore := osdInfo.Type
@@ -1139,8 +1147,8 @@ func GetCephVolumeRawOSDs(context *clusterd.Context, clusterInfo *client.Cluster
 			// Thus in the activation sequence we might activate the wrong OSD and have OSDInfo messed up
 			// Hence, let's use the PVC name instead which will always remain consistent
 			BlockPath:     blockPath,
-			MetadataPath:  metadataBlock,
-			WalPath:       walBlock,
+			MetadataPath:  blockMetadataPath,
+			WalPath:       blockWalPath,
 			SkipLVRelease: true,
 			LVBackedPV:    lvBackedPV,
 			CVMode:        cvMode,

From 1ee58cc5bc7ddbdd121edd46202c41ade411af07 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:13:48 +0000
Subject: [PATCH 2/8] build(deps): bump actions/setup-python from 5.1.1 to
 5.2.0

Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5.1.1 to 5.2.0.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/39cd14951b08e74b54015e9e001cdefcf80e669f...f677139bbe7f9c59b41e40162b753c062f5d49a3)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/docs-check.yml | 2 +-
 .github/workflows/helm-lint.yaml | 2 +-
 .github/workflows/linters.yaml   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml
index b443286fdc85..c060ce5d4b74 100644
--- a/.github/workflows/docs-check.yml
+++ b/.github/workflows/docs-check.yml
@@ -32,7 +32,7 @@ jobs:
         with:
           go-version: "1.22"
 
-      - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+      - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.9
 
diff --git a/.github/workflows/helm-lint.yaml b/.github/workflows/helm-lint.yaml
index f06e9edd970c..013074f2ec48 100644
--- a/.github/workflows/helm-lint.yaml
+++ b/.github/workflows/helm-lint.yaml
@@ -35,7 +35,7 @@ jobs:
         with:
           version: v3.6.2
 
-      - uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+      - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.9
 
diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml
index 3ce34e4ef14a..d995b051c75e 100644
--- a/.github/workflows/linters.yaml
+++ b/.github/workflows/linters.yaml
@@ -28,7 +28,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.9
 
@@ -46,7 +46,7 @@ jobs:
           fetch-depth: 0
 
       - name: Set up Python
-        uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.9
 

From fc9cea164e05ce9eaf4b29d41fe6ab4bdac3241e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:13:57 +0000
Subject: [PATCH 3/8] build(deps): bump actions/upload-artifact from 4.3.6 to
 4.4.0

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.6 to 4.4.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/834a144ee995460fba8ed112a2fc961b36a5ec5a...50769540e7f4bd5e21e526ee35c689e35e0d6874)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/daily-nightly-jobs.yml      | 20 +++++++++----------
 .../integration-test-helm-suite.yaml          |  2 +-
 .../integration-test-keystone-auth-suite.yaml |  2 +-
 .../workflows/integration-test-mgr-suite.yaml |  2 +-
 .../integration-test-multi-cluster-suite.yaml |  2 +-
 .../integration-test-object-suite.yaml        |  2 +-
 .../integration-test-smoke-suite.yaml         |  2 +-
 .../integration-test-upgrade-suite.yaml       |  4 ++--
 .../integration-tests-on-release.yaml         | 12 +++++------
 .github/workflows/scorecards.yml              |  2 +-
 10 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/daily-nightly-jobs.yml b/.github/workflows/daily-nightly-jobs.yml
index 735129fa4c77..af1676170776 100644
--- a/.github/workflows/daily-nightly-jobs.yml
+++ b/.github/workflows/daily-nightly-jobs.yml
@@ -112,7 +112,7 @@ jobs:
           name: canary-arm64
 
       - name: upload canary test result
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: always()
         with:
           name: canary-arm64
@@ -152,7 +152,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-quincy-artifact
@@ -192,7 +192,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-reef-artifact
@@ -232,7 +232,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-squid-artifact
@@ -272,7 +272,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-master-artifact
@@ -312,7 +312,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-object-suite-quincy-artifact
@@ -352,7 +352,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-object-suite-master-artifact
@@ -392,7 +392,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-squid-artifact
@@ -433,7 +433,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-reef-artifact
@@ -473,7 +473,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-quincy-artifact
diff --git a/.github/workflows/integration-test-helm-suite.yaml b/.github/workflows/integration-test-helm-suite.yaml
index 334d5992a277..8b9d7e42682d 100644
--- a/.github/workflows/integration-test-helm-suite.yaml
+++ b/.github/workflows/integration-test-helm-suite.yaml
@@ -64,7 +64,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-helm-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-keystone-auth-suite.yaml b/.github/workflows/integration-test-keystone-auth-suite.yaml
index e52f7932636d..dada9abca9cf 100644
--- a/.github/workflows/integration-test-keystone-auth-suite.yaml
+++ b/.github/workflows/integration-test-keystone-auth-suite.yaml
@@ -62,7 +62,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-keystone-auth-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-mgr-suite.yaml b/.github/workflows/integration-test-mgr-suite.yaml
index 6389f2a3c764..d88e14f67321 100644
--- a/.github/workflows/integration-test-mgr-suite.yaml
+++ b/.github/workflows/integration-test-mgr-suite.yaml
@@ -59,7 +59,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-mgr-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-multi-cluster-suite.yaml b/.github/workflows/integration-test-multi-cluster-suite.yaml
index 874074875fee..101fbf2e3841 100644
--- a/.github/workflows/integration-test-multi-cluster-suite.yaml
+++ b/.github/workflows/integration-test-multi-cluster-suite.yaml
@@ -63,7 +63,7 @@ jobs:
           CLUSTER_NAMESPACE="multi-external" tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-multi-cluster-deploy-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-object-suite.yaml b/.github/workflows/integration-test-object-suite.yaml
index 7513b7c144fd..b50e26f52c39 100644
--- a/.github/workflows/integration-test-object-suite.yaml
+++ b/.github/workflows/integration-test-object-suite.yaml
@@ -60,7 +60,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-object-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-smoke-suite.yaml b/.github/workflows/integration-test-smoke-suite.yaml
index 8b1d18fba568..829f1a14459a 100644
--- a/.github/workflows/integration-test-smoke-suite.yaml
+++ b/.github/workflows/integration-test-smoke-suite.yaml
@@ -60,7 +60,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-upgrade-suite.yaml b/.github/workflows/integration-test-upgrade-suite.yaml
index 694d60fe7de3..26c6e2fbf7dc 100644
--- a/.github/workflows/integration-test-upgrade-suite.yaml
+++ b/.github/workflows/integration-test-upgrade-suite.yaml
@@ -60,7 +60,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -107,7 +107,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-helm-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-tests-on-release.yaml b/.github/workflows/integration-tests-on-release.yaml
index 3a0ec705212d..acc16b6987b9 100644
--- a/.github/workflows/integration-tests-on-release.yaml
+++ b/.github/workflows/integration-tests-on-release.yaml
@@ -50,7 +50,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-helm-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -91,7 +91,7 @@ jobs:
           CLUSTER_NAMESPACE="multi-external" tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-multi-cluster-deploy-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -129,7 +129,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-smoke-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -167,7 +167,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -208,7 +208,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -246,7 +246,7 @@ jobs:
           tests/scripts/collect-logs.sh
 
       - name: Artifact
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         if: failure()
         with:
           name: ceph-object-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 00f556c37324..f21a5f2d275b 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -55,7 +55,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
           name: SARIF file
           path: results.sarif

From 4458eae5c858cf321fbb48801c7ed850730f5aff Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:14:04 +0000
Subject: [PATCH 4/8] build(deps): bump github/codeql-action from 3.26.5 to
 3.26.6

Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.5 to 3.26.6.
- [Release notes](https://github.com/github/codeql-action/releases)
- [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/github/codeql-action/compare/2c779ab0d087cd7fe7b826087247c2c81f27bfa6...4dd16135b69a43b6c8efb853346f8437d92d3c93)

---
updated-dependencies:
- dependency-name: github/codeql-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/scorecards.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 00f556c37324..09d2301d4241 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -64,6 +64,6 @@ jobs:
       # Upload the results to GitHub's code scanning dashboard (optional).
       # Commenting out will disable upload of results to your repo's Code Scanning dashboard
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5
+        uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
         with:
           sarif_file: results.sarif

From bf99a14c71b962082464f49ce06633f060e877b9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Sep 2024 12:31:43 +0000
Subject: [PATCH 5/8] build(deps): bump
 github.com/csi-addons/kubernetes-csi-addons

Bumps the github-dependencies group with 1 update: [github.com/csi-addons/kubernetes-csi-addons](https://github.com/csi-addons/kubernetes-csi-addons).


Updates `github.com/csi-addons/kubernetes-csi-addons` from 0.9.0 to 0.9.1
- [Release notes](https://github.com/csi-addons/kubernetes-csi-addons/releases)
- [Commits](https://github.com/csi-addons/kubernetes-csi-addons/compare/v0.9.0...v0.9.1)

---
updated-dependencies:
- dependency-name: github.com/csi-addons/kubernetes-csi-addons
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: github-dependencies
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index e770421d9085..076aa298e388 100644
--- a/go.mod
+++ b/go.mod
@@ -21,7 +21,7 @@ require (
 	github.com/ceph/ceph-csi-operator/api v0.0.0-20240819112305-88e6db254d6c
 	github.com/ceph/go-ceph v0.29.0
 	github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8
-	github.com/csi-addons/kubernetes-csi-addons v0.9.0
+	github.com/csi-addons/kubernetes-csi-addons v0.9.1
 	github.com/gemalto/kmip-go v0.0.10
 	github.com/go-ini/ini v1.67.0
 	github.com/google/go-cmp v0.6.0
diff --git a/go.sum b/go.sum
index 87ef32262286..1faf19988f81 100644
--- a/go.sum
+++ b/go.sum
@@ -213,8 +213,8 @@ github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7Do
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/csi-addons/kubernetes-csi-addons v0.9.0 h1:Hhb44WcrxtbzmpLY+uqX+DBWCI6HgA/rwQMPyvsyCc8=
-github.com/csi-addons/kubernetes-csi-addons v0.9.0/go.mod h1:/YROZDdEi1N/1Ls9rdU5W2VNjm8MK7HHApl8W4Sqt9s=
+github.com/csi-addons/kubernetes-csi-addons v0.9.1 h1:2m8/Pls7Ws3ld1zr/w6lL6BWwsXqqLg2JnW0jP8AX6I=
+github.com/csi-addons/kubernetes-csi-addons v0.9.1/go.mod h1:32kTa/Ngp7hMK2GEjx+Zk8yfKupR5WG4JG+oRzkM1TM=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=

From 05d579b607226b35ccdb83033ef79caff5e68ebf Mon Sep 17 00:00:00 2001
From: Madhu Rajanna <madhupr007@gmail.com>
Date: Mon, 2 Sep 2024 12:20:04 +0200
Subject: [PATCH 6/8] csi: update csi-addons to v0.9.1

updating csi-addons to latest
v0.9.1 release.

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
---
 Documentation/Helm-Charts/operator-chart.md   |  2 +-
 .../Ceph-CSI/ceph-csi-drivers.md              | 20 +++++++++----------
 .../Ceph-CSI/custom-images.md                 |  2 +-
 deploy/charts/rook-ceph/values.yaml           |  2 +-
 deploy/examples/images.txt                    |  2 +-
 deploy/examples/operator-openshift.yaml       |  2 +-
 deploy/examples/operator.yaml                 |  2 +-
 go.mod                                        |  2 +-
 go.sum                                        |  4 ++--
 pkg/operator/ceph/csi/spec.go                 |  2 +-
 tests/scripts/csiaddons.sh                    |  2 +-
 11 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/Documentation/Helm-Charts/operator-chart.md b/Documentation/Helm-Charts/operator-chart.md
index 2a7173c73949..f3816c76c320 100644
--- a/Documentation/Helm-Charts/operator-chart.md
+++ b/Documentation/Helm-Charts/operator-chart.md
@@ -67,7 +67,7 @@ The following table lists the configurable parameters of the rook-operator chart
 | `csi.clusterName` | Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster | `nil` |
 | `csi.csiAddons.enabled` | Enable CSIAddons | `false` |
 | `csi.csiAddons.repository` | CSIAddons sidecar image repository | `"quay.io/csiaddons/k8s-sidecar"` |
-| `csi.csiAddons.tag` | CSIAddons sidecar image tag | `"v0.9.0"` |
+| `csi.csiAddons.tag` | CSIAddons sidecar image tag | `"v0.9.1"` |
 | `csi.csiAddonsPort` | CSI Addons server port | `9070` |
 | `csi.csiCephFSPluginResource` | CEPH CSI CephFS plugin resource requirement list | see values.yaml |
 | `csi.csiCephFSPluginVolume` | The volume of the CephCSI CephFS plugin DaemonSet | `nil` |
diff --git a/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md b/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
index 9b6e986cd36e..b904d2da8ab5 100644
--- a/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
+++ b/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
@@ -166,9 +166,9 @@ that the controller inspects and forwards to one or more CSI-Addons sidecars for
 Deploy the controller by running the following commands:
 
 ```console
-kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.0/crds.yaml
-kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.0/rbac.yaml
-kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.0/setup-controller.yaml
+kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.1/crds.yaml
+kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.1/rbac.yaml
+kubectl create -f https://github.com/csi-addons/kubernetes-csi-addons/releases/download/v0.9.1/setup-controller.yaml
 ```
 
 This creates the required CRDs and configures permissions.
@@ -196,15 +196,15 @@ Execute the following to enable the CSI-Addons sidecars:
 CSI-Addons supports the following operations:
 
 * Reclaim Space
-    * [Creating a ReclaimSpaceJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/reclaimspace.md#reclaimspacejob)
-    * [Creating a ReclaimSpaceCronJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/reclaimspace.md#reclaimspacecronjob)
-    * [Annotating PersistentVolumeClaims](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/reclaimspace.md#annotating-perstentvolumeclaims)
-    * [Annotating Namespace](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/reclaimspace.md#annotating-namespace)
+    * [Creating a ReclaimSpaceJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/reclaimspace.md#reclaimspacejob)
+    * [Creating a ReclaimSpaceCronJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/reclaimspace.md#reclaimspacecronjob)
+    * [Annotating PersistentVolumeClaims](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/reclaimspace.md#annotating-perstentvolumeclaims)
+    * [Annotating Namespace](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/reclaimspace.md#annotating-namespace)
 * Network Fencing
-    * [Creating a NetworkFence](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/networkfence.md)
+    * [Creating a NetworkFence](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/networkfence.md)
 * Volume Replication
-    * [Creating VolumeReplicationClass](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/volumereplicationclass.md)
-    * [Creating VolumeReplication CR](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.0/docs/volumereplication.md)
+    * [Creating VolumeReplicationClass](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/volumereplicationclass.md)
+    * [Creating VolumeReplication CR](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.9.1/docs/volumereplication.md)
 
 ## Enable RBD and CephFS Encryption Support
 
diff --git a/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md b/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
index 59c0575c5a9a..e3dedf6351c6 100644
--- a/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
+++ b/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
@@ -24,7 +24,7 @@ ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v5.0.1"
 ROOK_CSI_ATTACHER_IMAGE: "registry.k8s.io/sig-storage/csi-attacher:v4.6.1"
 ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.11.1"
 ROOK_CSI_SNAPSHOTTER_IMAGE: "registry.k8s.io/sig-storage/csi-snapshotter:v8.0.1"
-ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.0"
+ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.1"
 ```
 
 ### **Use private repository**
diff --git a/deploy/charts/rook-ceph/values.yaml b/deploy/charts/rook-ceph/values.yaml
index f93548f38619..1b931a492693 100644
--- a/deploy/charts/rook-ceph/values.yaml
+++ b/deploy/charts/rook-ceph/values.yaml
@@ -537,7 +537,7 @@ csi:
     # -- CSIAddons sidecar image repository
     repository: quay.io/csiaddons/k8s-sidecar
     # -- CSIAddons sidecar image tag
-    tag: v0.9.0
+    tag: v0.9.1
 
   nfs:
     # -- Enable the nfs csi driver
diff --git a/deploy/examples/images.txt b/deploy/examples/images.txt
index 88a25cdd0df7..12ccd7dfd6ee 100644
--- a/deploy/examples/images.txt
+++ b/deploy/examples/images.txt
@@ -3,7 +3,7 @@
  quay.io/ceph/ceph:v18.2.4
  quay.io/ceph/cosi:v0.1.2
  quay.io/cephcsi/cephcsi:v3.12.0
- quay.io/csiaddons/k8s-sidecar:v0.9.0
+ quay.io/csiaddons/k8s-sidecar:v0.9.1
  registry.k8s.io/sig-storage/csi-attacher:v4.6.1
  registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.11.1
  registry.k8s.io/sig-storage/csi-provisioner:v5.0.1
diff --git a/deploy/examples/operator-openshift.yaml b/deploy/examples/operator-openshift.yaml
index 84e4aeae7383..7d0c052d59e2 100644
--- a/deploy/examples/operator-openshift.yaml
+++ b/deploy/examples/operator-openshift.yaml
@@ -558,7 +558,7 @@ data:
   CSI_ENABLE_CSIADDONS: "false"
   # Enable watch for faster recovery from rbd rwo node loss
   ROOK_WATCH_FOR_NODE_FAILURE: "true"
-  # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.0"
+  # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.1"
   # The GCSI RPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
   CSI_GRPC_TIMEOUT_SECONDS: "150"
 
diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml
index 454380848316..7f86068f50d9 100644
--- a/deploy/examples/operator.yaml
+++ b/deploy/examples/operator.yaml
@@ -508,7 +508,7 @@ data:
   CSI_ENABLE_CSIADDONS: "false"
   # Enable watch for faster recovery from rbd rwo node loss
   ROOK_WATCH_FOR_NODE_FAILURE: "true"
-  # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.0"
+  # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.9.1"
   # The CSI GRPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
   CSI_GRPC_TIMEOUT_SECONDS: "150"
 
diff --git a/go.mod b/go.mod
index e770421d9085..076aa298e388 100644
--- a/go.mod
+++ b/go.mod
@@ -21,7 +21,7 @@ require (
 	github.com/ceph/ceph-csi-operator/api v0.0.0-20240819112305-88e6db254d6c
 	github.com/ceph/go-ceph v0.29.0
 	github.com/coreos/pkg v0.0.0-20230601102743-20bbbf26f4d8
-	github.com/csi-addons/kubernetes-csi-addons v0.9.0
+	github.com/csi-addons/kubernetes-csi-addons v0.9.1
 	github.com/gemalto/kmip-go v0.0.10
 	github.com/go-ini/ini v1.67.0
 	github.com/google/go-cmp v0.6.0
diff --git a/go.sum b/go.sum
index 87ef32262286..1faf19988f81 100644
--- a/go.sum
+++ b/go.sum
@@ -213,8 +213,8 @@ github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7Do
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
 github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
-github.com/csi-addons/kubernetes-csi-addons v0.9.0 h1:Hhb44WcrxtbzmpLY+uqX+DBWCI6HgA/rwQMPyvsyCc8=
-github.com/csi-addons/kubernetes-csi-addons v0.9.0/go.mod h1:/YROZDdEi1N/1Ls9rdU5W2VNjm8MK7HHApl8W4Sqt9s=
+github.com/csi-addons/kubernetes-csi-addons v0.9.1 h1:2m8/Pls7Ws3ld1zr/w6lL6BWwsXqqLg2JnW0jP8AX6I=
+github.com/csi-addons/kubernetes-csi-addons v0.9.1/go.mod h1:32kTa/Ngp7hMK2GEjx+Zk8yfKupR5WG4JG+oRzkM1TM=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go
index 631b0446f01c..4be362ed25bb 100644
--- a/pkg/operator/ceph/csi/spec.go
+++ b/pkg/operator/ceph/csi/spec.go
@@ -157,7 +157,7 @@ var (
 	DefaultAttacherImage    = "registry.k8s.io/sig-storage/csi-attacher:v4.6.1"
 	DefaultSnapshotterImage = "registry.k8s.io/sig-storage/csi-snapshotter:v8.0.1"
 	DefaultResizerImage     = "registry.k8s.io/sig-storage/csi-resizer:v1.11.1"
-	DefaultCSIAddonsImage   = "quay.io/csiaddons/k8s-sidecar:v0.9.0"
+	DefaultCSIAddonsImage   = "quay.io/csiaddons/k8s-sidecar:v0.9.1"
 
 	// image pull policy
 	DefaultCSIImagePullPolicy = string(corev1.PullIfNotPresent)
diff --git a/tests/scripts/csiaddons.sh b/tests/scripts/csiaddons.sh
index 401061da3305..60daf61e217e 100755
--- a/tests/scripts/csiaddons.sh
+++ b/tests/scripts/csiaddons.sh
@@ -16,7 +16,7 @@
 
 set -xEo pipefail
 
-CSIADDONS_VERSION="v0.9.0"
+CSIADDONS_VERSION="v0.9.1"
 CSIADDONS_CRD_NAME="csiaddonsnodes.csiaddons.openshift.io"
 CSIADDONS_CONTAINER_NAME="csi-addons"
 

From 8987d26a6d89d1be9cb68097579238a6d32798b7 Mon Sep 17 00:00:00 2001
From: Travis Nielsen <tnielsen@redhat.com>
Date: Wed, 28 Aug 2024 15:20:47 -0600
Subject: [PATCH 7/8] mon: remove extra mon from quorum before taking down pod

When removing a mon from quorum, there is a race condition that
can result in mon quorum going being lost at least temporarily.
The mon pod was being deleted first, and then the mon removed
from quorum. If any other mon went down between the time the
pod of the bad mon was deleted and when the mon was removed from
quorum, there may not be sufficient quorum to complete the
action of removing the mon from quorum and the operator would
be stuck.

For example, there could be 4 mons temporarily due to timing
of upgrading K8s nodes where mons may be taken down for some
number of minutes. Say a new mon is started while the down
mon also comes back up. Now the operator sees it can remove
the 4th mon from quorum, so it starts to remove it. Now say
another mon goes down on another node that is being updated
or otherwise drained. Since the 4th mon pod was deleted
and another mon is down, there are only two mons remaining
in quorum, but 3 mons are required in quorum when there
are 4 mons. Therefore, the quorum is stuck until the
third mon comes back up.

The solution is to first remove the extra mon from quorum
before taking down the mon pod.

Signed-off-by: Travis Nielsen <tnielsen@redhat.com>
---
 pkg/operator/ceph/cluster/mon/health.go   | 56 +++++++++++++----------
 pkg/operator/ceph/cluster/mon/mon.go      | 34 +++++++++++++-
 pkg/operator/ceph/cluster/mon/mon_test.go | 44 ++++++++++++++++++
 3 files changed, 108 insertions(+), 26 deletions(-)

diff --git a/pkg/operator/ceph/cluster/mon/health.go b/pkg/operator/ceph/cluster/mon/health.go
index df8de52878a0..a8fe149fe02c 100644
--- a/pkg/operator/ceph/cluster/mon/health.go
+++ b/pkg/operator/ceph/cluster/mon/health.go
@@ -730,20 +730,6 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro
 	}
 	logger.Infof("ensuring removal of unhealthy monitor %s", daemonName)
 
-	resourceName := resourceName(daemonName)
-
-	// Remove the mon pod if it is still there
-	var gracePeriod int64
-	propagation := metav1.DeletePropagationForeground
-	options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation}
-	if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
-		if kerrors.IsNotFound(err) {
-			logger.Infof("dead mon %s was already gone", resourceName)
-		} else {
-			logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err)
-		}
-	}
-
 	// Remove the bad monitor from quorum
 	if shouldRemoveFromQuorum {
 		if err := c.removeMonitorFromQuorum(daemonName); err != nil {
@@ -751,10 +737,33 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro
 		}
 	}
 	delete(c.ClusterInfo.Monitors, daemonName)
-
 	delete(c.mapping.Schedule, daemonName)
 
+	if err := c.saveMonConfig(); err != nil {
+		return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName)
+	}
+
+	// Update cluster-wide RBD bootstrap peer token since Monitors have changed
+	_, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo)
+	if err != nil {
+		return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token")
+	}
+
+	// When the mon is removed from quorum, it is possible that the operator will be restarted
+	// before the mon pod is deleted. In this case, the operator will need to delete the mon
+	// during the next reconcile. If the reconcile finds an extra mon pod, it will be removed
+	// at that later reconcile. Thus, we delete the mon pod last during the failover
+	// and in case the failover is interrupted, the operator can detect the resources to finish the cleanup.
+	c.removeMonResources(daemonName)
+	return nil
+}
+
+func (c *Cluster) removeMonResources(daemonName string) {
 	// Remove the service endpoint
+	resourceName := resourceName(daemonName)
+	var gracePeriod int64
+	propagation := metav1.DeletePropagationForeground
+	options := &metav1.DeleteOptions{GracePeriodSeconds: &gracePeriod, PropagationPolicy: &propagation}
 	if err := c.context.Clientset.CoreV1().Services(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
 		if kerrors.IsNotFound(err) {
 			logger.Infof("dead mon service %s was already gone", resourceName)
@@ -772,17 +781,14 @@ func (c *Cluster) removeMonWithOptionalQuorum(daemonName string, shouldRemoveFro
 		}
 	}
 
-	if err := c.saveMonConfig(); err != nil {
-		return errors.Wrapf(err, "failed to save mon config after failing over mon %s", daemonName)
-	}
-
-	// Update cluster-wide RBD bootstrap peer token since Monitors have changed
-	_, err := controller.CreateBootstrapPeerSecret(c.context, c.ClusterInfo, &cephv1.CephCluster{ObjectMeta: metav1.ObjectMeta{Name: c.ClusterInfo.NamespacedName().Name, Namespace: c.Namespace}}, c.ownerInfo)
-	if err != nil {
-		return errors.Wrap(err, "failed to update cluster rbd bootstrap peer token")
+	// Remove the mon pod if it is still there
+	if err := c.context.Clientset.AppsV1().Deployments(c.Namespace).Delete(c.ClusterInfo.Context, resourceName, *options); err != nil {
+		if kerrors.IsNotFound(err) {
+			logger.Infof("dead mon %s was already gone", resourceName)
+		} else {
+			logger.Errorf("failed to remove dead mon deployment %q. %v", resourceName, err)
+		}
 	}
-
-	return nil
 }
 
 func (c *Cluster) removeMonitorFromQuorum(name string) error {
diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go
index d80839d65c2c..17258a5322d5 100644
--- a/pkg/operator/ceph/cluster/mon/mon.go
+++ b/pkg/operator/ceph/cluster/mon/mon.go
@@ -1060,7 +1060,39 @@ func (c *Cluster) startDeployments(mons []*monConfig, requireAllInQuorum bool) e
 			requireAllInQuorum = true
 		}
 	}
-	return c.waitForMonsToJoin(mons, requireAllInQuorum)
+	err = c.waitForMonsToJoin(mons, requireAllInQuorum)
+
+	// Check for the rare case of an extra mon deployment that needs to be cleaned up
+	c.checkForExtraMonResources(mons, deployments.Items)
+	return err
+}
+
+func (c *Cluster) checkForExtraMonResources(mons []*monConfig, deployments []apps.Deployment) string {
+	if len(deployments) <= c.spec.Mon.Count || len(deployments) <= len(mons) {
+		return ""
+	}
+
+	// If there are more deployments than expected mons from the ceph quorum,
+	// find the extra mon deployment and clean it up.
+	logger.Infof("there is an extra mon deployment that is not needed and not in quorum")
+	for _, deploy := range deployments {
+		monName := deploy.Labels[controller.DaemonIDLabel]
+		found := false
+		// Search for the mon in the list of mons expected in quorum
+		for _, monDaemon := range mons {
+			if monName == monDaemon.DaemonName {
+				found = true
+				break
+			}
+		}
+		if !found {
+			logger.Infof("deleting extra mon deployment %q", deploy.Name)
+			c.removeMonResources(monName)
+			return monName
+		}
+	}
+
+	return ""
 }
 
 func (c *Cluster) waitForMonsToJoin(mons []*monConfig, requireAllInQuorum bool) error {
diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go
index fb8643d8c251..9d27be4014c9 100644
--- a/pkg/operator/ceph/cluster/mon/mon_test.go
+++ b/pkg/operator/ceph/cluster/mon/mon_test.go
@@ -676,6 +676,50 @@ func TestMonVolumeClaimTemplate(t *testing.T) {
 		})
 	}
 }
+
+func TestRemoveExtraMonDeployments(t *testing.T) {
+	namespace := "ns"
+	context, err := newTestStartCluster(t, namespace)
+	assert.NoError(t, err)
+	c := newCluster(context, namespace, true, v1.ResourceRequirements{})
+	c.ClusterInfo = clienttest.CreateTestClusterInfo(1)
+
+	// Nothing to remove when the mons match the deployment
+	mons := []*monConfig{
+		{ResourceName: "rook-ceph-mon-a", DaemonName: "a"},
+	}
+	deployments := []apps.Deployment{
+		{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:   "rook-ceph-mon-a",
+				Labels: map[string]string{"ceph_daemon_id": "a"},
+			},
+		},
+	}
+	c.spec.Mon.Count = 1
+	removed := c.checkForExtraMonResources(mons, deployments)
+	assert.Equal(t, "", removed)
+
+	// Remove an extra mon deployment
+	deployments = append(deployments, apps.Deployment{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   "rook-ceph-mon-b",
+			Labels: map[string]string{"ceph_daemon_id": "b"},
+		}})
+	removed = c.checkForExtraMonResources(mons, deployments)
+	assert.Equal(t, "b", removed)
+
+	// Nothing to remove when there are not enough deployments for the expected mons
+	mons = []*monConfig{
+		{ResourceName: "rook-ceph-mon-a", DaemonName: "a"},
+		{ResourceName: "rook-ceph-mon-b", DaemonName: "b"},
+		{ResourceName: "rook-ceph-mon-c", DaemonName: "c"},
+	}
+	c.spec.Mon.Count = 3
+	removed = c.checkForExtraMonResources(mons, deployments)
+	assert.Equal(t, "", removed)
+}
+
 func TestStretchMonVolumeClaimTemplate(t *testing.T) {
 	generalSC := "generalSC"
 	zoneSC := "zoneSC"

From e3785883591ca6710a04e89bd4f0f88079775d81 Mon Sep 17 00:00:00 2001
From: Michael Adam <obnox@samba.org>
Date: Fri, 16 Aug 2024 19:04:01 +0200
Subject: [PATCH 8/8] network: add a new operator config setting
 ROOK_ENFORCE_HOSTNETWORK

    This new setting is of Boolean type and defaults to "false".

    When set to "true", it changes the behavior of the
     rook operator to
    nable host network on all pods created by the cephcluster controller

     new method to check the setting:  opcontroller.EnForceHostNetwork()

Signed-off-by: Michael Adam <obnox@samba.org>
---
 pkg/apis/ceph.rook.io/v1/network.go           | 17 ++++++++++--
 pkg/apis/ceph.rook.io/v1/network_test.go      | 24 ++++++++++++++++-
 pkg/operator/ceph/cluster/cleanup.go          |  2 ++
 .../ceph/cluster/osd/provision_spec.go        |  3 ++-
 pkg/operator/ceph/controller.go               |  1 +
 .../ceph/controller/controller_utils.go       | 20 ++++++++++++--
 .../ceph/controller/controller_utils_test.go  | 27 +++++++++++++++++++
 pkg/operator/ceph/csi/spec.go                 |  3 +++
 pkg/operator/ceph/object/cosi/spec.go         |  2 ++
 pkg/operator/discover/discover.go             |  3 ++-
 .../k8sutil/cmdreporter/cmdreporter.go        |  2 ++
 11 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/pkg/apis/ceph.rook.io/v1/network.go b/pkg/apis/ceph.rook.io/v1/network.go
index c950eed22b43..debe0d39f7c5 100644
--- a/pkg/apis/ceph.rook.io/v1/network.go
+++ b/pkg/apis/ceph.rook.io/v1/network.go
@@ -27,6 +27,11 @@ import (
 	"github.com/pkg/errors"
 )
 
+// enforceHostNetwork is a private package variable that can be set via the rook-operator-config
+// setting "ROOK_ENFORCE_HOST_NETWORK". when set to "true", it lets rook create all pods with host network enabled.
+// This can be used, for example, to run Rook in k8s clusters with no CNI where host networking is required
+var enforceHostNetwork bool = false
+
 // IsMultus get whether to use multus network provider
 func (n *NetworkSpec) IsMultus() bool {
 	return n.Provider == NetworkProviderMultus
@@ -40,7 +45,7 @@ func (n *NetworkSpec) IsMultus() bool {
 // together with an empty or unset network provider has the same effect as
 // network.Provider set to "host"
 func (n *NetworkSpec) IsHost() bool {
-	return (n.HostNetwork && n.Provider == NetworkProviderDefault) || n.Provider == NetworkProviderHost
+	return enforceHostNetwork || (n.HostNetwork && n.Provider == NetworkProviderDefault) || n.Provider == NetworkProviderHost
 }
 
 func ValidateNetworkSpec(clusterNamespace string, spec NetworkSpec) error {
@@ -62,7 +67,7 @@ func ValidateNetworkSpec(clusterNamespace string, spec NetworkSpec) error {
 
 	if !spec.AddressRanges.IsEmpty() {
 		if !spec.IsMultus() && !spec.IsHost() {
-			// TODO: be sure to update docs that AddressRanges can be specified for host networking  as
+			// TODO: be sure to update docs that AddressRanges can be specified for host networking as
 			// well as multus so that the override configmap doesn't need to be set
 			return errors.Errorf("network ranges can only be specified for %q and %q network providers", NetworkProviderHost, NetworkProviderMultus)
 		}
@@ -181,3 +186,11 @@ func (l *CIDRList) String() string {
 	}
 	return strings.Join(sl, ", ")
 }
+
+func SetEnforceHostNetwork(val bool) {
+	enforceHostNetwork = val
+}
+
+func EnforceHostNetwork() bool {
+	return enforceHostNetwork
+}
diff --git a/pkg/apis/ceph.rook.io/v1/network_test.go b/pkg/apis/ceph.rook.io/v1/network_test.go
index 6c6980f2ce8b..7269a86df979 100644
--- a/pkg/apis/ceph.rook.io/v1/network_test.go
+++ b/pkg/apis/ceph.rook.io/v1/network_test.go
@@ -75,6 +75,8 @@ func TestValidateNetworkSpec(t *testing.T) {
 // test the NetworkSpec.IsHost method with different network providers
 // Also test it in combination with the legacy
 // "HostNetwork" setting.
+// Also test the effect of the operator config setting
+// ROOK_ENFORCE_HOST_NETWORK.
 func TestNetworkCephIsHost(t *testing.T) {
 	net := NetworkSpec{HostNetwork: false}
 
@@ -85,6 +87,14 @@ func TestNetworkCephIsHost(t *testing.T) {
 	net.HostNetwork = true
 	assert.True(t, net.IsHost())
 
+	// enforcing does not change the result if host network is selected
+	// anyway in the cluster.
+	SetEnforceHostNetwork(true)
+	assert.True(t, net.IsHost())
+
+	SetEnforceHostNetwork(false)
+	assert.True(t, net.IsHost())
+
 	net = NetworkSpec{}
 	net.Provider = NetworkProviderDefault
 	net.HostNetwork = false
@@ -95,16 +105,28 @@ func TestNetworkCephIsHost(t *testing.T) {
 	net.HostNetwork = false
 	assert.False(t, net.IsHost())
 
+	// test that not enforcing does not change the result.
+	SetEnforceHostNetwork(false)
+	assert.False(t, net.IsHost())
+
+	// test enforcing of host network
+	SetEnforceHostNetwork(true)
+	assert.True(t, net.IsHost())
+
+	SetEnforceHostNetwork(false)
 	net = NetworkSpec{}
 	net.Provider = NetworkProviderMultus
 	net.HostNetwork = true
 	assert.False(t, net.IsHost())
 
-	// test with  nonempty but invalid provider
+	// test with nonempty but invalid provider
 	net = NetworkSpec{}
 	net.HostNetwork = true
 	net.Provider = "foo"
+	SetEnforceHostNetwork(false)
 	assert.False(t, net.IsHost())
+	SetEnforceHostNetwork(true)
+	assert.True(t, net.IsHost())
 
 }
 
diff --git a/pkg/operator/ceph/cluster/cleanup.go b/pkg/operator/ceph/cluster/cleanup.go
index 92b2933591ab..eb50d1a3354d 100644
--- a/pkg/operator/ceph/cluster/cleanup.go
+++ b/pkg/operator/ceph/cluster/cleanup.go
@@ -30,6 +30,7 @@ import (
 	"github.com/rook/rook/pkg/operator/ceph/cluster/osd"
 	"github.com/rook/rook/pkg/operator/ceph/cluster/rbd"
 	"github.com/rook/rook/pkg/operator/ceph/controller"
+	opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
 	"github.com/rook/rook/pkg/operator/ceph/file/mds"
 	"github.com/rook/rook/pkg/operator/ceph/file/mirror"
 	"github.com/rook/rook/pkg/operator/ceph/object"
@@ -162,6 +163,7 @@ func (c *ClusterController) cleanUpJobTemplateSpec(cluster *cephv1.CephCluster,
 			RestartPolicy:      v1.RestartPolicyOnFailure,
 			PriorityClassName:  cephv1.GetCleanupPriorityClassName(cluster.Spec.PriorityClassNames),
 			ServiceAccountName: k8sutil.DefaultServiceAccount,
+			HostNetwork:        opcontroller.EnforceHostNetwork(),
 		},
 	}
 
diff --git a/pkg/operator/ceph/cluster/osd/provision_spec.go b/pkg/operator/ceph/cluster/osd/provision_spec.go
index 9e3673dfbb0b..4839ea6a230b 100644
--- a/pkg/operator/ceph/cluster/osd/provision_spec.go
+++ b/pkg/operator/ceph/cluster/osd/provision_spec.go
@@ -28,6 +28,7 @@ import (
 	"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
 	"github.com/rook/rook/pkg/operator/ceph/cluster/osd/config"
 	"github.com/rook/rook/pkg/operator/ceph/controller"
+	opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
 	"github.com/rook/rook/pkg/operator/k8sutil"
 	batch "k8s.io/api/batch/v1"
 	v1 "k8s.io/api/core/v1"
@@ -154,7 +155,7 @@ func (c *Cluster) provisionPodTemplateSpec(osdProps osdProperties, restart v1.Re
 		},
 		RestartPolicy:     restart,
 		Volumes:           volumes,
-		HostNetwork:       c.spec.Network.IsHost(),
+		HostNetwork:       opcontroller.EnforceHostNetwork(),
 		PriorityClassName: cephv1.GetOSDPriorityClassName(c.spec.PriorityClassNames),
 		SchedulerName:     osdProps.schedulerName,
 	}
diff --git a/pkg/operator/ceph/controller.go b/pkg/operator/ceph/controller.go
index 0e32a898a1a5..238dd29bd43b 100644
--- a/pkg/operator/ceph/controller.go
+++ b/pkg/operator/ceph/controller.go
@@ -132,6 +132,7 @@ func (r *ReconcileConfig) reconcile(request reconcile.Request) (reconcile.Result
 	}
 
 	opcontroller.SetAllowLoopDevices(r.config.Parameters)
+	opcontroller.SetEnforceHostNetwork(r.config.Parameters)
 
 	logger.Infof("%s done reconciling", controllerName)
 	return reconcile.Result{}, nil
diff --git a/pkg/operator/ceph/controller/controller_utils.go b/pkg/operator/ceph/controller/controller_utils.go
index b726e2e97aed..f5cc1ca1da27 100644
--- a/pkg/operator/ceph/controller/controller_utils.go
+++ b/pkg/operator/ceph/controller/controller_utils.go
@@ -50,7 +50,9 @@ type ClusterHealth struct {
 
 const (
 	// OperatorSettingConfigMapName refers to ConfigMap that configures rook ceph operator
-	OperatorSettingConfigMapName string = "rook-ceph-operator-config"
+	OperatorSettingConfigMapName   string = "rook-ceph-operator-config"
+	enforceHostNetworkSettingName  string = "ROOK_ENFORCE_HOST_NETWORK"
+	enforceHostNetworkDefaultValue string = "false"
 
 	// UninitializedCephConfigError refers to the error message printed by the Ceph CLI when there is no ceph configuration file
 	// This typically is raised when the operator has not finished initializing
@@ -116,6 +118,21 @@ func LoopDevicesAllowed() bool {
 	return loopDevicesAllowed
 }
 
+func SetEnforceHostNetwork(data map[string]string) {
+	strval := k8sutil.GetValue(data, enforceHostNetworkSettingName, enforceHostNetworkDefaultValue)
+	val, err := strconv.ParseBool(strval)
+	if err != nil {
+		logger.Warningf("failed to parse value %q for %q. assuming false value", strval, enforceHostNetworkSettingName)
+		cephv1.SetEnforceHostNetwork(false)
+		return
+	}
+	cephv1.SetEnforceHostNetwork(val)
+}
+
+func EnforceHostNetwork() bool {
+	return cephv1.EnforceHostNetwork()
+}
+
 // canIgnoreHealthErrStatusInReconcile determines whether a status of HEALTH_ERR in the CephCluster can be ignored safely.
 func canIgnoreHealthErrStatusInReconcile(cephCluster cephv1.CephCluster, controllerName string) bool {
 	// Get a list of all the keys causing the HEALTH_ERR status.
@@ -153,7 +170,6 @@ func IsReadyToReconcile(ctx context.Context, c client.Client, namespacedName typ
 		return cephCluster, false, cephClusterExists, WaitForRequeueIfCephClusterNotReady
 	}
 	cephCluster = clusterList.Items[0]
-
 	// If the cluster has a cleanup policy to destroy the cluster and it has been marked for deletion, treat it as if it does not exist
 	if cephCluster.Spec.CleanupPolicy.HasDataDirCleanPolicy() && !cephCluster.DeletionTimestamp.IsZero() {
 		logger.Infof("%q: CephCluster has a destructive cleanup policy, allowing %q to be deleted", controllerName, namespacedName)
diff --git a/pkg/operator/ceph/controller/controller_utils_test.go b/pkg/operator/ceph/controller/controller_utils_test.go
index eadcb2376c40..532cc0bcbecc 100644
--- a/pkg/operator/ceph/controller/controller_utils_test.go
+++ b/pkg/operator/ceph/controller/controller_utils_test.go
@@ -106,6 +106,33 @@ func TestSetAllowLoopDevices(t *testing.T) {
 	assert.True(t, LoopDevicesAllowed())
 }
 
+func TestSetEnforceHostNetwork(t *testing.T) {
+	logger.Infof("testing default value for %v", enforceHostNetworkSettingName)
+	opConfig := map[string]string{}
+	SetEnforceHostNetwork(opConfig)
+	assert.False(t, EnforceHostNetwork())
+
+	// test invalid setting
+	var value string = "foo"
+	logger.Infof("testing invalid value'%v' for %v", value, enforceHostNetworkSettingName)
+	opConfig[enforceHostNetworkSettingName] = value
+	SetEnforceHostNetwork(opConfig)
+	assert.False(t, EnforceHostNetwork())
+
+	// test valid settings
+	value = "true"
+	logger.Infof("testing valid value'%v' for %v", value, enforceHostNetworkSettingName)
+	opConfig[enforceHostNetworkSettingName] = value
+	SetEnforceHostNetwork(opConfig)
+	assert.True(t, EnforceHostNetwork())
+
+	value = "false"
+	logger.Infof("testing valid value'%v' for %v", value, enforceHostNetworkSettingName)
+	opConfig[enforceHostNetworkSettingName] = value
+	SetEnforceHostNetwork(opConfig)
+	assert.False(t, EnforceHostNetwork())
+}
+
 func TestIsReadyToReconcile(t *testing.T) {
 	scheme := scheme.Scheme
 	scheme.AddKnownTypes(cephv1.SchemeGroupVersion, &cephv1.CephCluster{}, &cephv1.CephClusterList{})
diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go
index 4be362ed25bb..d63e7bc9da2b 100644
--- a/pkg/operator/ceph/csi/spec.go
+++ b/pkg/operator/ceph/csi/spec.go
@@ -382,6 +382,7 @@ func (r *ReconcileCSI) startDrivers(ver *version.Info, ownerInfo *k8sutil.OwnerI
 		if tp.CSILogRotation {
 			applyLogrotateSidecar(&rbdProvisionerDeployment.Spec.Template, "csi-rbd-deployment-log-collector", LogrotateTemplatePath, tp)
 		}
+		rbdProvisionerDeployment.Spec.Template.Spec.HostNetwork = opcontroller.EnforceHostNetwork()
 
 		// Create service if either liveness or GRPC metrics are enabled.
 		if CSIParam.EnableLiveness {
@@ -419,6 +420,7 @@ func (r *ReconcileCSI) startDrivers(ver *version.Info, ownerInfo *k8sutil.OwnerI
 		if tp.CSILogRotation {
 			applyLogrotateSidecar(&cephfsProvisionerDeployment.Spec.Template, "csi-cephfs-deployment-log-collector", LogrotateTemplatePath, tp)
 		}
+		cephfsProvisionerDeployment.Spec.Template.Spec.HostNetwork = opcontroller.EnforceHostNetwork()
 
 		// Create service if either liveness or GRPC metrics are enabled.
 		if CSIParam.EnableLiveness {
@@ -457,6 +459,7 @@ func (r *ReconcileCSI) startDrivers(ver *version.Info, ownerInfo *k8sutil.OwnerI
 		if tp.CSILogRotation {
 			applyLogrotateSidecar(&nfsProvisionerDeployment.Spec.Template, "csi-nfs-deployment-log-collector", LogrotateTemplatePath, tp)
 		}
+		nfsProvisionerDeployment.Spec.Template.Spec.HostNetwork = opcontroller.EnforceHostNetwork()
 
 		enabledDrivers = append(enabledDrivers, driverDetails{
 			name:           NFSDriverShortName,
diff --git a/pkg/operator/ceph/object/cosi/spec.go b/pkg/operator/ceph/object/cosi/spec.go
index f01ebabbd662..50bf737c5bab 100644
--- a/pkg/operator/ceph/object/cosi/spec.go
+++ b/pkg/operator/ceph/object/cosi/spec.go
@@ -20,6 +20,7 @@ import (
 	"github.com/pkg/errors"
 	cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
 	"github.com/rook/rook/pkg/operator/ceph/controller"
+	opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -83,6 +84,7 @@ func createCOSIPodSpec(cephCOSIDriver *cephv1.CephCOSIDriver) (corev1.PodTemplat
 	cosiSideCarContainer := createCOSISideCarContainer(cephCOSIDriver)
 
 	podSpec := corev1.PodSpec{
+		HostNetwork: opcontroller.EnforceHostNetwork(),
 		Containers: []corev1.Container{
 			cosiDriverContainer,
 			cosiSideCarContainer,
diff --git a/pkg/operator/discover/discover.go b/pkg/operator/discover/discover.go
index bf543524b328..ee4cdb3f6369 100644
--- a/pkg/operator/discover/discover.go
+++ b/pkg/operator/discover/discover.go
@@ -31,6 +31,7 @@ import (
 	"github.com/rook/rook/pkg/clusterd"
 	discoverDaemon "github.com/rook/rook/pkg/daemon/discover"
 	"github.com/rook/rook/pkg/operator/ceph/controller"
+	opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
 	k8sutil "github.com/rook/rook/pkg/operator/k8sutil"
 	"github.com/rook/rook/pkg/util/sys"
 
@@ -173,7 +174,7 @@ func (d *Discover) createDiscoverDaemonSet(ctx context.Context, namespace, disco
 							},
 						},
 					},
-					HostNetwork:       false,
+					HostNetwork:       opcontroller.EnforceHostNetwork(),
 					PriorityClassName: k8sutil.GetValue(data, discoverDaemonsetPriorityClassNameEnv, ""),
 				},
 			},
diff --git a/pkg/operator/k8sutil/cmdreporter/cmdreporter.go b/pkg/operator/k8sutil/cmdreporter/cmdreporter.go
index affc87558f9e..b651def6d24a 100644
--- a/pkg/operator/k8sutil/cmdreporter/cmdreporter.go
+++ b/pkg/operator/k8sutil/cmdreporter/cmdreporter.go
@@ -25,6 +25,7 @@ import (
 
 	"github.com/coreos/pkg/capnslog"
 	"github.com/pkg/errors"
+	cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
 	"github.com/rook/rook/pkg/daemon/util"
 	"github.com/rook/rook/pkg/operator/k8sutil"
 	batch "k8s.io/api/batch/v1"
@@ -302,6 +303,7 @@ func (cr *cmdReporterCfg) initJobSpec() (*batch.Job, error) {
 		},
 		RestartPolicy:      v1.RestartPolicyOnFailure,
 		ServiceAccountName: k8sutil.DefaultServiceAccount,
+		HostNetwork:        cephv1.EnforceHostNetwork(),
 	}
 	copyBinsVol, _ := copyBinariesVolAndMount()
 	podSpec.Volumes = []v1.Volume{copyBinsVol}