diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml
index a5d7b8b5264b..49d4f7e81f37 100644
--- a/.github/workflows/canary-integration-test.yml
+++ b/.github/workflows/canary-integration-test.yml
@@ -1334,7 +1334,7 @@ jobs:
# ceph-image: # use default
- name: upload test result
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: always()
with:
name: rgw-multisite-testing
@@ -1366,7 +1366,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: upload test result
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: always()
with:
name: encryption-pvc-kms-ibm-kp
diff --git a/.github/workflows/canary-test-config/action.yaml b/.github/workflows/canary-test-config/action.yaml
index c13eb4570ef6..24ae8ae9f18d 100644
--- a/.github/workflows/canary-test-config/action.yaml
+++ b/.github/workflows/canary-test-config/action.yaml
@@ -23,7 +23,7 @@ runs:
- name: Setup Minikube
shell: bash --noprofile --norc -eo pipefail -x {0}
run: |
- tests/scripts/github-action-helper.sh install_minikube_with_none_driver v1.28.4
+ tests/scripts/github-action-helper.sh install_minikube_with_none_driver v1.29.0
- name: install deps
shell: bash --noprofile --norc -eo pipefail -x {0}
diff --git a/.github/workflows/daily-nightly-jobs.yml b/.github/workflows/daily-nightly-jobs.yml
index 886fd3ffa0b1..07fefa5ddd88 100644
--- a/.github/workflows/daily-nightly-jobs.yml
+++ b/.github/workflows/daily-nightly-jobs.yml
@@ -107,7 +107,7 @@ jobs:
name: canary-arm64
- name: upload canary test result
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: always()
with:
name: canary-arm64
@@ -147,7 +147,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-smoke-suite-quincy-artifact
@@ -187,7 +187,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-smoke-suite-reef-artifact
@@ -227,7 +227,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-smoke-suite-master-artifact
@@ -267,7 +267,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-object-suite-quincy-artifact
@@ -307,7 +307,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-object-suite-master-artifact
@@ -347,7 +347,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-suite-reef-artifact
@@ -387,7 +387,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-suite-quincy-artifact
@@ -418,7 +418,7 @@ jobs:
ceph-image: quay.io/ceph/daemon-base:${{ matrix.ceph-image-tag }}
- name: upload test result
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: always()
with:
name: rgw-multisite-testing-ceph-${{ matrix.ceph-image-tag }}
@@ -449,7 +449,7 @@ jobs:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: upload test result
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: always()
with:
name: encryption-pvc-kms-ibm-kp
diff --git a/.github/workflows/integration-test-helm-suite.yaml b/.github/workflows/integration-test-helm-suite.yaml
index 2640df307e4c..0e5a9a41095d 100644
--- a/.github/workflows/integration-test-helm-suite.yaml
+++ b/.github/workflows/integration-test-helm-suite.yaml
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -59,11 +59,10 @@ jobs:
run: |
export LOG_DIR="/home/runner/work/rook/rook/tests/integration/_output/tests/"
export CLUSTER_NAMESPACE="helm-ns"
- export OPERATOR_NAMESPACE="helm-ns-system"
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-helm-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-mgr-suite.yaml b/.github/workflows/integration-test-mgr-suite.yaml
index 6eee2317dfcb..d0bea34a95f5 100644
--- a/.github/workflows/integration-test-mgr-suite.yaml
+++ b/.github/workflows/integration-test-mgr-suite.yaml
@@ -24,7 +24,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.28.4"]
+ kubernetes-versions: ["v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -58,7 +58,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-mgr-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-multi-cluster-suite.yaml b/.github/workflows/integration-test-multi-cluster-suite.yaml
index f552df5d8b9c..b826fe4d22b0 100644
--- a/.github/workflows/integration-test-multi-cluster-suite.yaml
+++ b/.github/workflows/integration-test-multi-cluster-suite.yaml
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.28.4"]
+ kubernetes-versions: ["v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -59,7 +59,7 @@ jobs:
CLUSTER_NAMESPACE="multi-external" tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-multi-cluster-deploy-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-object-suite.yaml b/.github/workflows/integration-test-object-suite.yaml
index ccecbeb7f83c..9e4c1c24a3ad 100644
--- a/.github/workflows/integration-test-object-suite.yaml
+++ b/.github/workflows/integration-test-object-suite.yaml
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -58,7 +58,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-object-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-smoke-suite.yaml b/.github/workflows/integration-test-smoke-suite.yaml
index c6bb24bc959e..418a9ad6a496 100644
--- a/.github/workflows/integration-test-smoke-suite.yaml
+++ b/.github/workflows/integration-test-smoke-suite.yaml
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -58,7 +58,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-smoke-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-test-upgrade-suite.yaml b/.github/workflows/integration-test-upgrade-suite.yaml
index 10122b68bce9..03d46b4b2dd2 100644
--- a/.github/workflows/integration-test-upgrade-suite.yaml
+++ b/.github/workflows/integration-test-upgrade-suite.yaml
@@ -25,7 +25,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -58,7 +58,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -70,7 +70,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -106,7 +106,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-helm-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/.github/workflows/integration-tests-on-release.yaml b/.github/workflows/integration-tests-on-release.yaml
index 9f714501b1d1..83aa41341c95 100644
--- a/.github/workflows/integration-tests-on-release.yaml
+++ b/.github/workflows/integration-tests-on-release.yaml
@@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.24.17", "v1.26.11", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.25.16", "v1.27.8", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -48,7 +48,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-helm-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -59,7 +59,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.24.17", "v1.26.11", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.25.16", "v1.27.8", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -88,7 +88,7 @@ jobs:
CLUSTER_NAMESPACE="multi-external" tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-multi-cluster-deploy-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -99,7 +99,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.24.17", "v1.26.11", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.25.16", "v1.27.8", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -127,7 +127,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-smoke-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -138,7 +138,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.24.17", "v1.26.11", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.25.16", "v1.27.8", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -166,7 +166,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -177,7 +177,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.25.16", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.26.11", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -208,7 +208,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-upgrade-suite-artifact-${{ matrix.kubernetes-versions }}
@@ -219,7 +219,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- kubernetes-versions: ["v1.23.17", "v1.28.4"]
+ kubernetes-versions: ["v1.23.17", "v1.29.0"]
steps:
- name: checkout
uses: actions/checkout@v4
@@ -247,7 +247,7 @@ jobs:
tests/scripts/collect-logs.sh
- name: Artifact
- uses: actions/upload-artifact@v3
+ uses: actions/upload-artifact@v4
if: failure()
with:
name: ceph-object-suite-artifact-${{ matrix.kubernetes-versions }}
diff --git a/Documentation/CRDs/specification.md b/Documentation/CRDs/specification.md
index e7592e645de3..f946897f7e4c 100644
--- a/Documentation/CRDs/specification.md
+++ b/Documentation/CRDs/specification.md
@@ -8420,7 +8420,8 @@ NetworkProviderType
(Optional)
- Provider is what provides network connectivity to the cluster e.g. “host” or “multus”
+Provider is what provides network connectivity to the cluster e.g. “host” or “multus”.
+If the Provider is updated from being empty to “host” on a running cluster, then the operator will automatically fail over all the mons to apply the “host” network settings.
|
@@ -8492,7 +8493,9 @@ bool
(Optional)
- HostNetwork to enable host network
+HostNetwork to enable host network.
+If host networking is enabled or disabled on a running cluster, then the operator will automatically fail over all the mons to
+apply the new network settings.
|
diff --git a/Documentation/Getting-Started/quickstart.md b/Documentation/Getting-Started/quickstart.md
index 42b528dfa519..1fc735fd88eb 100644
--- a/Documentation/Getting-Started/quickstart.md
+++ b/Documentation/Getting-Started/quickstart.md
@@ -36,7 +36,7 @@ To configure the Ceph storage cluster, at least one of these local storage optio
A simple Rook cluster is created for Kubernetes with the following `kubectl` commands and [example manifests](https://github.com/rook/rook/blob/master/deploy/examples).
```console
-$ git clone --single-branch --branch v1.13.0 https://github.com/rook/rook.git
+$ git clone --single-branch --branch v1.13.1 https://github.com/rook/rook.git
cd rook/deploy/examples
kubectl create -f crds.yaml -f common.yaml -f operator.yaml
kubectl create -f cluster.yaml
diff --git a/Documentation/Helm-Charts/operator-chart.md b/Documentation/Helm-Charts/operator-chart.md
index 4aa24f5bfe69..ec825d8dd5ef 100644
--- a/Documentation/Helm-Charts/operator-chart.md
+++ b/Documentation/Helm-Charts/operator-chart.md
@@ -59,12 +59,12 @@ The following table lists the configurable parameters of the rook-operator chart
| `csi.cephFSKernelMountOptions` | Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options. Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR | `nil` |
| `csi.cephFSPluginUpdateStrategy` | CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate | `RollingUpdate` |
| `csi.cephFSPluginUpdateStrategyMaxUnavailable` | A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. | `1` |
-| `csi.cephcsi.image` | Ceph CSI image | `quay.io/cephcsi/cephcsi:v3.10.0` |
+| `csi.cephcsi.image` | Ceph CSI image | `quay.io/cephcsi/cephcsi:v3.10.1` |
| `csi.cephfsLivenessMetricsPort` | CSI CephFS driver metrics port | `9081` |
| `csi.cephfsPodLabels` | Labels to add to the CSI CephFS Deployments and DaemonSets Pods | `nil` |
| `csi.clusterName` | Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster | `nil` |
| `csi.csiAddons.enabled` | Enable CSIAddons | `false` |
-| `csi.csiAddons.image` | CSIAddons Sidecar image | `"quay.io/csiaddons/k8s-sidecar:v0.7.0"` |
+| `csi.csiAddons.image` | CSIAddons Sidecar image | `"quay.io/csiaddons/k8s-sidecar:v0.8.0"` |
| `csi.csiAddonsPort` | CSI Addons server port | `9070` |
| `csi.csiCephFSPluginResource` | CEPH CSI CephFS plugin resource requirement list | see values.yaml |
| `csi.csiCephFSPluginVolume` | The volume of the CephCSI CephFS plugin DaemonSet | `nil` |
diff --git a/Documentation/Storage-Configuration/Advanced/ceph-mon-health.md b/Documentation/Storage-Configuration/Advanced/ceph-mon-health.md
index 169c78b6efa2..ce80aa6fc355 100644
--- a/Documentation/Storage-Configuration/Advanced/ceph-mon-health.md
+++ b/Documentation/Storage-Configuration/Advanced/ceph-mon-health.md
@@ -115,3 +115,10 @@ $ ceph -s
osd: 3 osds: 3 up (since 10m), 3 in (since 10m)
[...]
```
+
+## Automatic Monitor Failover
+
+Rook will automatically fail over the mons when the following settings are updated in the CephCluster CR:
+- `spec.network.hostNetwork`: When enabled or disabled, Rook fails over all monitors, configuring them to enable or disable host networking.
+- `spec.network.Provider` : When updated from being empty to "host", Rook fails over all monitors, configuring them to enable or disable host networking.
+- `spec.network.multiClusterService`: When enabled or disabled, Rook fails over all monitors, configuring them to start (or stop) using service IPs compatible with the multi-cluster service.
diff --git a/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md b/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md
index ba0d33440bd2..5ce95ead3c4d 100644
--- a/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md
+++ b/Documentation/Storage-Configuration/Block-Storage-RBD/block-storage.md
@@ -204,9 +204,9 @@ If a node goes down where a pod is running where a RBD RWO volume is mounted, th
Deploy the csi-addons manifests:
```console
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/crds.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/rbac.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/setup-controller.yaml
```
Enable the `csi-addons` sidecar in the Rook operator configuration.
diff --git a/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md b/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
index 38ee97104994..fd8a79f25014 100644
--- a/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
+++ b/Documentation/Storage-Configuration/Ceph-CSI/ceph-csi-drivers.md
@@ -127,9 +127,9 @@ that the controller inspects and forwards to one or more CSI-Addons sidecars for
Deploy the controller by running the following commands:
```console
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml
-kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/crds.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/rbac.yaml
+kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.8.0/deploy/controller/setup-controller.yaml
```
This creates the required CRDs and configures permissions.
@@ -157,15 +157,15 @@ will start automatically in the RBD CSI provisioner and nodeplugin pods.
CSI-Addons supports the following operations:
* Reclaim Space
- * [Creating a ReclaimSpaceJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/reclaimspace.md#reclaimspacejob)
- * [Creating a ReclaimSpaceCronJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/reclaimspace.md#reclaimspacecronjob)
- * [Annotating PersistentVolumeClaims](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/reclaimspace.md#annotating-perstentvolumeclaims)
- * [Annotating Namespace](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/reclaimspace.md#annotating-namespace)
+ * [Creating a ReclaimSpaceJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/reclaimspace.md#reclaimspacejob)
+ * [Creating a ReclaimSpaceCronJob](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/reclaimspace.md#reclaimspacecronjob)
+ * [Annotating PersistentVolumeClaims](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/reclaimspace.md#annotating-perstentvolumeclaims)
+ * [Annotating Namespace](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/reclaimspace.md#annotating-namespace)
* Network Fencing
- * [Creating a NetworkFence](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/networkfence.md)
+ * [Creating a NetworkFence](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/networkfence.md)
* Volume Replication
- * [Creating VolumeReplicationClass](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/volumereplicationclass.md)
- * [Creating VolumeReplication CR](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.7.0/docs/volumereplication.md)
+ * [Creating VolumeReplicationClass](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/volumereplicationclass.md)
+ * [Creating VolumeReplication CR](https://github.com/csi-addons/kubernetes-csi-addons/blob/v0.8.0/docs/volumereplication.md)
## Enable RBD Encryption Support
diff --git a/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md b/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
index a45b5e04eded..cf805fe30f95 100644
--- a/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
+++ b/Documentation/Storage-Configuration/Ceph-CSI/custom-images.md
@@ -18,13 +18,13 @@ kubectl -n $ROOK_OPERATOR_NAMESPACE edit configmap rook-ceph-operator-config
The default upstream images are included below, which you can change to your desired images.
```yaml
-ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.0"
+ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.1"
ROOK_CSI_REGISTRAR_IMAGE: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.1"
ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v3.6.2"
ROOK_CSI_ATTACHER_IMAGE: "registry.k8s.io/sig-storage/csi-attacher:v4.4.2"
ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.9.2"
ROOK_CSI_SNAPSHOTTER_IMAGE: "registry.k8s.io/sig-storage/csi-snapshotter:v6.3.2"
-ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.7.0"
+ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.8.0"
```
### **Use private repository**
@@ -32,7 +32,7 @@ ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.7.0"
If image version is not passed along with the image name in any of the variables above,
Rook will add the corresponding default version to that image.
Example: if `ROOK_CSI_CEPH_IMAGE: "quay.io/private-repo/cephcsi"` is passed,
-Rook will add internal default version and consume it as `"quay.io/private-repo/cephcsi:v3.10.0"`.
+Rook will add internal default version and consume it as `"quay.io/private-repo/cephcsi:v3.10.1"`.
### **Use default images**
diff --git a/Documentation/Storage-Configuration/Monitoring/ceph-monitoring.md b/Documentation/Storage-Configuration/Monitoring/ceph-monitoring.md
index fead6dc6962d..750dea9c72a7 100644
--- a/Documentation/Storage-Configuration/Monitoring/ceph-monitoring.md
+++ b/Documentation/Storage-Configuration/Monitoring/ceph-monitoring.md
@@ -44,7 +44,7 @@ There are two sources for metrics collection:
From the root of your locally cloned Rook repo, go the monitoring directory:
```console
-$ git clone --single-branch --branch v1.13.0 https://github.com/rook/rook.git
+$ git clone --single-branch --branch v1.13.1 https://github.com/rook/rook.git
cd rook/deploy/examples/monitoring
```
diff --git a/Documentation/Upgrade/rook-upgrade.md b/Documentation/Upgrade/rook-upgrade.md
index 98659fa7125b..06ab0fdf199c 100644
--- a/Documentation/Upgrade/rook-upgrade.md
+++ b/Documentation/Upgrade/rook-upgrade.md
@@ -128,8 +128,8 @@ In order to successfully upgrade a Rook cluster, the following prerequisites mus
## Rook Operator Upgrade
-The examples given in this guide upgrade a live Rook cluster running `v1.12.9` to
-the version `v1.13.0`. This upgrade should work from any official patch release of Rook v1.12 to any
+The examples given in this guide upgrade a live Rook cluster running `v1.12.10` to
+the version `v1.13.1`. This upgrade should work from any official patch release of Rook v1.12 to any
official patch release of v1.13.
Let's get started!
@@ -156,7 +156,7 @@ by the Operator. Also update the Custom Resource Definitions (CRDs).
Get the latest common resources manifests that contain the latest changes.
```console
-git clone --single-branch --depth=1 --branch v1.13.0 https://github.com/rook/rook.git
+git clone --single-branch --depth=1 --branch v1.13.1 https://github.com/rook/rook.git
cd rook/deploy/examples
```
@@ -195,7 +195,7 @@ The largest portion of the upgrade is triggered when the operator's image is upd
When the operator is updated, it will proceed to update all of the Ceph daemons.
```console
-kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.13.0
+kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.13.1
```
### **3. Update Ceph CSI**
@@ -225,18 +225,18 @@ watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=
```
As an example, this cluster is midway through updating the OSDs. When all deployments report `1/1/1`
-availability and `rook-version=v1.13.0`, the Ceph cluster's core components are fully updated.
+availability and `rook-version=v1.13.1`, the Ceph cluster's core components are fully updated.
```console
Every 2.0s: kubectl -n rook-ceph get deployment -o j...
-rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.13.0
-rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.13.0
-rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.13.0
-rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.13.0
-rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.13.0
-rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.12.9
-rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.12.9
+rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.13.1
+rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.13.1
+rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.13.1
+rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.13.1
+rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.13.1
+rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.12.10
+rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.12.10
```
An easy check to see if the upgrade is totally finished is to check that there is only one
@@ -245,14 +245,14 @@ An easy check to see if the upgrade is totally finished is to check that there i
```console
# kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{"rook-version="}{.metadata.labels.rook-version}{"\n"}{end}' | sort | uniq
This cluster is not yet finished:
- rook-version=v1.12.9
- rook-version=v1.13.0
+ rook-version=v1.12.10
+ rook-version=v1.13.1
This cluster is finished:
- rook-version=v1.13.0
+ rook-version=v1.13.1
```
### **5. Verify the updated cluster**
-At this point, the Rook operator should be running version `rook/ceph:v1.13.0`.
+At this point, the Rook operator should be running version `rook/ceph:v1.13.1`.
Verify the CephCluster health using the [health verification doc](health-verification.md).
diff --git a/deploy/charts/rook-ceph-cluster/templates/cephcluster.yaml b/deploy/charts/rook-ceph-cluster/templates/cephcluster.yaml
index 3a568e22a383..4f5c78ca1e07 100644
--- a/deploy/charts/rook-ceph-cluster/templates/cephcluster.yaml
+++ b/deploy/charts/rook-ceph-cluster/templates/cephcluster.yaml
@@ -15,6 +15,9 @@ spec:
{{- if .Values.monitoring.externalMgrPrometheusPort }}
externalMgrPrometheusPort: {{ toYaml .Values.monitoring.externalMgrPrometheusPort }}
{{- end }}
+{{- if .Values.monitoring.interval }}
+ interval: {{ .Values.monitoring.interval }}
+{{- end }}
{{- end }}
{{ toYaml .Values.cephClusterSpec | indent 2 }}
diff --git a/deploy/charts/rook-ceph-cluster/values.yaml b/deploy/charts/rook-ceph-cluster/values.yaml
index 67ed48f4bc41..fb2499c47221 100644
--- a/deploy/charts/rook-ceph-cluster/values.yaml
+++ b/deploy/charts/rook-ceph-cluster/values.yaml
@@ -61,6 +61,8 @@ monitoring:
# Monitoring settings for external clusters:
# externalMgrEndpoints:
# externalMgrPrometheusPort:
+ # Scrape interval for prometheus
+ # interval: 5s
# allow adding custom labels and annotations to the prometheus rule
prometheusRule:
# -- Labels applied to PrometheusRule
diff --git a/deploy/charts/rook-ceph/templates/deployment.yaml b/deploy/charts/rook-ceph/templates/deployment.yaml
index e7feeb64b8a7..a0d2be74fb91 100644
--- a/deploy/charts/rook-ceph/templates/deployment.yaml
+++ b/deploy/charts/rook-ceph/templates/deployment.yaml
@@ -32,6 +32,9 @@ spec:
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 5
+{{- if .Values.tolerations }}
+{{ toYaml .Values.tolerations | indent 8 }}
+{{- end }}
containers:
- name: rook-ceph-operator
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
@@ -110,10 +113,6 @@ spec:
nodeSelector:
{{ toYaml .Values.nodeSelector | indent 8 }}
{{- end }}
-{{- if .Values.tolerations }}
- tolerations:
-{{ toYaml .Values.tolerations | indent 8 }}
-{{- end }}
{{- if .Values.rbacEnable }}
serviceAccountName: rook-ceph-system
{{- end }}
diff --git a/deploy/charts/rook-ceph/templates/resources.yaml b/deploy/charts/rook-ceph/templates/resources.yaml
index dae49adf1ede..c188bca43c04 100644
--- a/deploy/charts/rook-ceph/templates/resources.yaml
+++ b/deploy/charts/rook-ceph/templates/resources.yaml
@@ -2232,7 +2232,7 @@ spec:
description: DualStack determines whether Ceph daemons should listen on both IPv4 and IPv6
type: boolean
hostNetwork:
- description: HostNetwork to enable host network
+ description: HostNetwork to enable host network. If host networking is enabled or disabled on a running cluster, then the operator will automatically fail over all the mons to apply the new network settings.
type: boolean
ipFamily:
description: IPFamily is the single stack IPv6 or IPv4 protocol
@@ -2252,7 +2252,7 @@ spec:
type: boolean
type: object
provider:
- description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus"
+ description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus". If the Provider is updated from being empty to "host" on a running cluster, then the operator will automatically fail over all the mons to apply the "host" network settings.
enum:
- ""
- host
diff --git a/deploy/charts/rook-ceph/values.yaml b/deploy/charts/rook-ceph/values.yaml
index 28d29289dee2..e06dcc4cdc66 100644
--- a/deploy/charts/rook-ceph/values.yaml
+++ b/deploy/charts/rook-ceph/values.yaml
@@ -479,7 +479,7 @@ csi:
cephcsi:
# -- Ceph CSI image
- # @default -- `quay.io/cephcsi/cephcsi:v3.10.0`
+ # @default -- `quay.io/cephcsi/cephcsi:v3.10.1`
image:
registrar:
@@ -523,7 +523,7 @@ csi:
# -- Enable CSIAddons
enabled: false
# -- CSIAddons Sidecar image
- image: "quay.io/csiaddons/k8s-sidecar:v0.7.0"
+ image: "quay.io/csiaddons/k8s-sidecar:v0.8.0"
nfs:
# -- Enable the nfs csi driver
diff --git a/deploy/examples/cluster-external-management.yaml b/deploy/examples/cluster-external-management.yaml
index d0a79b088b98..8201e3b9e7ad 100644
--- a/deploy/examples/cluster-external-management.yaml
+++ b/deploy/examples/cluster-external-management.yaml
@@ -19,4 +19,4 @@ spec:
dataDirHostPath: /var/lib/rook
# providing an image is required, if you want to create other CRs (rgw, mds, nfs)
cephVersion:
- image: quay.io/ceph/ceph:v18.2.0 # Should match external cluster version
+ image: quay.io/ceph/ceph:v18.2.1 # Should match external cluster version
diff --git a/deploy/examples/cluster-on-local-pvc.yaml b/deploy/examples/cluster-on-local-pvc.yaml
index 0418c7e79d69..96f6ac6c0345 100644
--- a/deploy/examples/cluster-on-local-pvc.yaml
+++ b/deploy/examples/cluster-on-local-pvc.yaml
@@ -173,7 +173,7 @@ spec:
requests:
storage: 10Gi
cephVersion:
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
allowUnsupported: false
skipUpgradeChecks: false
continueUpgradeAfterChecksEvenIfNotHealthy: false
diff --git a/deploy/examples/cluster-on-pvc.yaml b/deploy/examples/cluster-on-pvc.yaml
index 1c5bde156b1e..a55773d6de79 100644
--- a/deploy/examples/cluster-on-pvc.yaml
+++ b/deploy/examples/cluster-on-pvc.yaml
@@ -33,7 +33,7 @@ spec:
requests:
storage: 10Gi
cephVersion:
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
allowUnsupported: false
skipUpgradeChecks: false
continueUpgradeAfterChecksEvenIfNotHealthy: false
diff --git a/deploy/examples/cluster-stretched-aws.yaml b/deploy/examples/cluster-stretched-aws.yaml
index 1414586685eb..20a3a1f9fb4a 100644
--- a/deploy/examples/cluster-stretched-aws.yaml
+++ b/deploy/examples/cluster-stretched-aws.yaml
@@ -44,7 +44,7 @@ spec:
mgr:
count: 2
cephVersion:
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
allowUnsupported: true
skipUpgradeChecks: false
continueUpgradeAfterChecksEvenIfNotHealthy: false
diff --git a/deploy/examples/cluster-stretched.yaml b/deploy/examples/cluster-stretched.yaml
index 9feed4a742b2..adb19a347c3f 100644
--- a/deploy/examples/cluster-stretched.yaml
+++ b/deploy/examples/cluster-stretched.yaml
@@ -38,7 +38,7 @@ spec:
mgr:
count: 2
cephVersion:
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
allowUnsupported: true
skipUpgradeChecks: false
continueUpgradeAfterChecksEvenIfNotHealthy: false
diff --git a/deploy/examples/cluster.yaml b/deploy/examples/cluster.yaml
index 1045a283c5e9..9b3451112fab 100644
--- a/deploy/examples/cluster.yaml
+++ b/deploy/examples/cluster.yaml
@@ -21,7 +21,7 @@ spec:
# versions running within the cluster. See tags available at https://hub.docker.com/r/ceph/ceph/tags/.
# If you want to be more precise, you can always use a timestamp tag such as quay.io/ceph/ceph:v17.2.6-20231027
# This tag might not contain a new Ceph version, just security fixes from the underlying operating system, which will reduce vulnerabilities
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
# Whether to allow unsupported versions of Ceph. Currently `quincy` and `reef` are supported.
# Future versions such as `squid` (v19) would require this to be set to `true`.
# Do not set to true in production.
diff --git a/deploy/examples/crds.yaml b/deploy/examples/crds.yaml
index fa232557c08a..f381e9c3b604 100644
--- a/deploy/examples/crds.yaml
+++ b/deploy/examples/crds.yaml
@@ -2230,7 +2230,7 @@ spec:
description: DualStack determines whether Ceph daemons should listen on both IPv4 and IPv6
type: boolean
hostNetwork:
- description: HostNetwork to enable host network
+ description: HostNetwork to enable host network. If host networking is enabled or disabled on a running cluster, then the operator will automatically fail over all the mons to apply the new network settings.
type: boolean
ipFamily:
description: IPFamily is the single stack IPv6 or IPv4 protocol
@@ -2250,7 +2250,7 @@ spec:
type: boolean
type: object
provider:
- description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus"
+ description: Provider is what provides network connectivity to the cluster e.g. "host" or "multus". If the Provider is updated from being empty to "host" on a running cluster, then the operator will automatically fail over all the mons to apply the "host" network settings.
enum:
- ""
- host
diff --git a/deploy/examples/direct-mount.yaml b/deploy/examples/direct-mount.yaml
index 90cae9979a88..6cc56827bd0f 100644
--- a/deploy/examples/direct-mount.yaml
+++ b/deploy/examples/direct-mount.yaml
@@ -18,7 +18,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: rook-direct-mount
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
command: ["/bin/bash"]
args: ["-m", "-c", "/usr/local/bin/toolbox.sh"]
imagePullPolicy: IfNotPresent
diff --git a/deploy/examples/images.txt b/deploy/examples/images.txt
index de7ad9f7eb4b..63d10c8be30b 100644
--- a/deploy/examples/images.txt
+++ b/deploy/examples/images.txt
@@ -1,11 +1,11 @@
gcr.io/k8s-staging-sig-storage/objectstorage-sidecar/objectstorage-sidecar:v20230130-v0.1.0-24-gc0cf995
- quay.io/ceph/ceph:v18.2.0
+ quay.io/ceph/ceph:v18.2.1
quay.io/ceph/cosi:v0.1.1
- quay.io/cephcsi/cephcsi:v3.10.0
- quay.io/csiaddons/k8s-sidecar:v0.7.0
+ quay.io/cephcsi/cephcsi:v3.10.1
+ quay.io/csiaddons/k8s-sidecar:v0.8.0
registry.k8s.io/sig-storage/csi-attacher:v4.4.2
registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.1
registry.k8s.io/sig-storage/csi-provisioner:v3.6.2
registry.k8s.io/sig-storage/csi-resizer:v1.9.2
registry.k8s.io/sig-storage/csi-snapshotter:v6.3.2
- rook/ceph:v1.13.0
+ rook/ceph:v1.13.1
diff --git a/deploy/examples/multus-validation.yaml b/deploy/examples/multus-validation.yaml
index c99160fb6153..febd364850b0 100644
--- a/deploy/examples/multus-validation.yaml
+++ b/deploy/examples/multus-validation.yaml
@@ -101,7 +101,7 @@ spec:
serviceAccountName: rook-ceph-multus-validation
containers:
- name: multus-validation
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
command: ["rook"]
args:
- "multus"
diff --git a/deploy/examples/operator-openshift.yaml b/deploy/examples/operator-openshift.yaml
index a66924f8d134..0d2f912fb3df 100644
--- a/deploy/examples/operator-openshift.yaml
+++ b/deploy/examples/operator-openshift.yaml
@@ -190,7 +190,7 @@ data:
# The default version of CSI supported by Rook will be started. To change the version
# of the CSI driver to something other than what is officially supported, change
# these images to the desired release of the CSI driver.
- # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.0"
+ # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.1"
# ROOK_CSI_REGISTRAR_IMAGE: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.1"
# ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.9.2"
# ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v3.6.2"
@@ -572,7 +572,7 @@ data:
CSI_ENABLE_CSIADDONS: "false"
# Enable watch for faster recovery from rbd rwo node loss
ROOK_WATCH_FOR_NODE_FAILURE: "true"
- # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.5.0"
+ # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.8.0"
# The GCSI RPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
CSI_GRPC_TIMEOUT_SECONDS: "150"
@@ -672,7 +672,7 @@ spec:
serviceAccountName: rook-ceph-system
containers:
- name: rook-ceph-operator
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
args: ["ceph", "operator"]
securityContext:
runAsNonRoot: true
diff --git a/deploy/examples/operator.yaml b/deploy/examples/operator.yaml
index 76a10d5479af..94169106693d 100644
--- a/deploy/examples/operator.yaml
+++ b/deploy/examples/operator.yaml
@@ -106,7 +106,7 @@ data:
# The default version of CSI supported by Rook will be started. To change the version
# of the CSI driver to something other than what is officially supported, change
# these images to the desired release of the CSI driver.
- # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.0"
+ # ROOK_CSI_CEPH_IMAGE: "quay.io/cephcsi/cephcsi:v3.10.1"
# ROOK_CSI_REGISTRAR_IMAGE: "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.1"
# ROOK_CSI_RESIZER_IMAGE: "registry.k8s.io/sig-storage/csi-resizer:v1.9.2"
# ROOK_CSI_PROVISIONER_IMAGE: "registry.k8s.io/sig-storage/csi-provisioner:v3.6.2"
@@ -499,7 +499,7 @@ data:
CSI_ENABLE_CSIADDONS: "false"
# Enable watch for faster recovery from rbd rwo node loss
ROOK_WATCH_FOR_NODE_FAILURE: "true"
- # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.7.0"
+ # ROOK_CSIADDONS_IMAGE: "quay.io/csiaddons/k8s-sidecar:v0.8.0"
# The CSI GRPC timeout value (in seconds). It should be >= 120. If this variable is not set or is an invalid value, it's default to 150.
CSI_GRPC_TIMEOUT_SECONDS: "150"
@@ -598,7 +598,7 @@ spec:
serviceAccountName: rook-ceph-system
containers:
- name: rook-ceph-operator
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
args: ["ceph", "operator"]
securityContext:
runAsNonRoot: true
diff --git a/deploy/examples/osd-purge.yaml b/deploy/examples/osd-purge.yaml
index aeda0e13f990..f5fd57942b6d 100644
--- a/deploy/examples/osd-purge.yaml
+++ b/deploy/examples/osd-purge.yaml
@@ -28,7 +28,7 @@ spec:
serviceAccountName: rook-ceph-purge-osd
containers:
- name: osd-removal
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
# TODO: Insert the OSD ID in the last parameter that is to be removed
# The OSD IDs are a comma-separated list. For example: "0" or "0,2".
# If you want to preserve the OSD PVCs, set `--preserve-pvc true`.
diff --git a/deploy/examples/toolbox-job.yaml b/deploy/examples/toolbox-job.yaml
index 10afd44b2cc0..e5778b132205 100644
--- a/deploy/examples/toolbox-job.yaml
+++ b/deploy/examples/toolbox-job.yaml
@@ -10,7 +10,7 @@ spec:
spec:
initContainers:
- name: config-init
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
command: ["/usr/local/bin/toolbox.sh"]
args: ["--skip-watch"]
imagePullPolicy: IfNotPresent
@@ -29,7 +29,7 @@ spec:
mountPath: /var/lib/rook-ceph-mon
containers:
- name: script
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
volumeMounts:
- mountPath: /etc/ceph
name: ceph-config
diff --git a/deploy/examples/toolbox-operator-image.yaml b/deploy/examples/toolbox-operator-image.yaml
index 21d693bcea88..07fc123d8bd5 100644
--- a/deploy/examples/toolbox-operator-image.yaml
+++ b/deploy/examples/toolbox-operator-image.yaml
@@ -24,7 +24,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: rook-ceph-tools-operator-image
- image: rook/ceph:v1.13.0
+ image: rook/ceph:v1.13.1
command:
- /bin/bash
- -c
diff --git a/deploy/examples/toolbox.yaml b/deploy/examples/toolbox.yaml
index fe9350f29e5a..d90bb52c94fd 100644
--- a/deploy/examples/toolbox.yaml
+++ b/deploy/examples/toolbox.yaml
@@ -18,7 +18,7 @@ spec:
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: rook-ceph-tools
- image: quay.io/ceph/ceph:v18.2.0
+ image: quay.io/ceph/ceph:v18.2.1
command:
- /bin/bash
- -c
diff --git a/images/ceph/Makefile b/images/ceph/Makefile
index 992fb1e4e747..495fc037f87e 100755
--- a/images/ceph/Makefile
+++ b/images/ceph/Makefile
@@ -18,9 +18,9 @@ include ../image.mk
# Image Build Options
ifeq ($(GOARCH),amd64)
-CEPH_VERSION ?= v18.2.0-20231018
+CEPH_VERSION ?= v18.2.1-20231215
else
-CEPH_VERSION ?= v18.2.0-20231018
+CEPH_VERSION ?= v18.2.1-20231215
endif
REGISTRY_NAME = quay.io
BASEIMAGE = $(REGISTRY_NAME)/ceph/ceph-$(GOARCH):$(CEPH_VERSION)
diff --git a/pkg/apis/ceph.rook.io/v1/types.go b/pkg/apis/ceph.rook.io/v1/types.go
index 050977f420f5..70806eb7e7af 100755
--- a/pkg/apis/ceph.rook.io/v1/types.go
+++ b/pkg/apis/ceph.rook.io/v1/types.go
@@ -2317,7 +2317,8 @@ type SSSDSidecarAdditionalFile struct {
// NetworkSpec for Ceph includes backward compatibility code
// +kubebuilder:validation:XValidation:message="at least one network selector must be specified when using multus",rule="!has(self.provider) || (self.provider != 'multus' || (self.provider == 'multus' && size(self.selectors) > 0))"
type NetworkSpec struct {
- // Provider is what provides network connectivity to the cluster e.g. "host" or "multus"
+ // Provider is what provides network connectivity to the cluster e.g. "host" or "multus".
+ // If the Provider is updated from being empty to "host" on a running cluster, then the operator will automatically fail over all the mons to apply the "host" network settings.
// +kubebuilder:validation:XValidation:message="network provider must be disabled (reverted to empty string) before a new provider is enabled",rule="self == '' || self == oldSelf"
// +nullable
// +optional
@@ -2363,7 +2364,9 @@ type NetworkSpec struct {
// +optional
Connections *ConnectionsSpec `json:"connections,omitempty"`
- // HostNetwork to enable host network
+ // HostNetwork to enable host network.
+ // If host networking is enabled or disabled on a running cluster, then the operator will automatically fail over all the mons to
+ // apply the new network settings.
// +optional
HostNetwork bool `json:"hostNetwork,omitempty"`
diff --git a/pkg/operator/ceph/cluster/mon/mon.go b/pkg/operator/ceph/cluster/mon/mon.go
index f3012d668cff..a5961793aa11 100644
--- a/pkg/operator/ceph/cluster/mon/mon.go
+++ b/pkg/operator/ceph/cluster/mon/mon.go
@@ -1343,6 +1343,12 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e
return nil
}
+ // skip update if mon fail over is required due to change in hostnetwork settings
+ if isMonIPUpdateRequiredForHostNetwork(m.DaemonName, m.UseHostNetwork, &c.spec.Network) {
+ c.monsToFailover.Insert(m.DaemonName)
+ return nil
+ }
+
// the existing deployment may have a node selector. if the cluster
// isn't using host networking and the deployment is using pvc storage,
// then the node selector can be removed. this may happen after
@@ -1408,6 +1414,19 @@ func (c *Cluster) startMon(m *monConfig, schedule *controller.MonScheduleInfo) e
return nil
}
+func isMonIPUpdateRequiredForHostNetwork(mon string, isMonUsingHostNetwork bool, network *cephv1.NetworkSpec) bool {
+ isHostNetworkEnabledInSpec := network.IsHost()
+ if isHostNetworkEnabledInSpec && !isMonUsingHostNetwork {
+ logger.Infof("host network is enabled for the cluster but mon %q is not running on host IP address", mon)
+ return true
+ } else if !isHostNetworkEnabledInSpec && isMonUsingHostNetwork {
+ logger.Infof("host network is disabled for the cluster but mon %q is still running on host IP address", mon)
+ return true
+ }
+
+ return false
+}
+
func hasMonPathChanged(d *apps.Deployment, claim *v1.PersistentVolumeClaim) bool {
if d.Labels["pvc_name"] == "" && claim != nil {
logger.Infof("skipping update for mon %q where path has changed from hostPath to pvc", d.Name)
diff --git a/pkg/operator/ceph/cluster/mon/mon_test.go b/pkg/operator/ceph/cluster/mon/mon_test.go
index 49be715a0bae..c23d130e36bc 100644
--- a/pkg/operator/ceph/cluster/mon/mon_test.go
+++ b/pkg/operator/ceph/cluster/mon/mon_test.go
@@ -44,6 +44,7 @@ import (
apps "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/util/sets"
)
// generate a standard mon config from a mon id w/ default port and IP 2.4.6.{1,2,3,...}
@@ -120,7 +121,8 @@ func newCluster(context *clusterd.Context, namespace string, allowMultiplePerNod
mapping: &opcontroller.Mapping{
Schedule: map[string]*opcontroller.MonScheduleInfo{},
},
- ownerInfo: ownerInfo,
+ ownerInfo: ownerInfo,
+ monsToFailover: sets.New[string](),
}
}
@@ -917,3 +919,40 @@ func TestHasMonPathChanged(t *testing.T) {
assert.False(t, hasMonPathChanged(monDeployment, nil))
})
}
+
+func TestIsMonIPUpdateRequiredForHostNetwork(t *testing.T) {
+ t.Run("both cluster and mon are set to use host network", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{HostNetwork: true}
+ monUsingHostNetwork := true
+ assert.False(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+
+ t.Run("both cluster and mon are not set for host network", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{}
+ monUsingHostNetwork := false
+ assert.False(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+ t.Run("cluster is set for host networking but mon pod is not", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{HostNetwork: true}
+ monUsingHostNetwork := false
+ assert.True(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+
+ t.Run("mon is using host networking but cluster is updated to not use host network ", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{}
+ monUsingHostNetwork := true
+ assert.True(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+
+ t.Run("mon is using host networking and cluster is set host network via NetworkProviderHost ", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{Provider: cephv1.NetworkProviderHost}
+ monUsingHostNetwork := true
+ assert.False(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+
+ t.Run("mon is not using host networking but cluster is updated to use host network via NetworkProviderHost ", func(t *testing.T) {
+ hostNetwork := &cephv1.NetworkSpec{Provider: cephv1.NetworkProviderHost}
+ monUsingHostNetwork := false
+ assert.True(t, isMonIPUpdateRequiredForHostNetwork("a", monUsingHostNetwork, hostNetwork))
+ })
+}
diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go
index da55e20ce641..25edc67d074d 100644
--- a/pkg/operator/ceph/cluster/watcher.go
+++ b/pkg/operator/ceph/cluster/watcher.go
@@ -201,47 +201,79 @@ func (c *clientCluster) fenceNode(ctx context.Context, node *corev1.Node, cluste
}
logger.Debugf("volumesInuse %s", volumesInuse)
- rbdVolumesInUse := getCephVolumesInUse(cluster, volumesInuse)
- if len(rbdVolumesInUse) == 0 {
- logger.Debugf("no rbd volumes in use for out of service node %q", node.Name)
+ rbdVolumesInUse, cephFSVolumeInUse := getCephVolumesInUse(cluster, volumesInuse)
+ if len(rbdVolumesInUse) == 0 && len(cephFSVolumeInUse) == 0 {
+ logger.Debugf("no rbd or cephFS volumes in use for out of service node %q", node.Name)
return nil
}
- logger.Infof("node %q require fencing, found rbd volumes in use", node.Name)
listPVs, err := c.context.Clientset.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
if err != nil {
return pkgerror.Wrapf(err, "failed to list PV")
}
- rbdPVList := listRBDPV(listPVs, cluster, rbdVolumesInUse)
- if len(rbdPVList) == 0 {
- logger.Debug("No rbd PVs found on the node")
- return nil
- }
+ if len(rbdVolumesInUse) != 0 {
+ rbdPVList := listRBDPV(listPVs, cluster, rbdVolumesInUse)
+ if len(rbdPVList) == 0 {
+ logger.Debug("No rbd PVs found on the node")
+ } else {
+ logger.Infof("node %q require fencing, found rbd volumes in use", node.Name)
+ clusterInfo, _, _, err := opcontroller.LoadClusterInfo(c.context, ctx, cluster.Namespace, &cluster.Spec)
+ if err != nil {
+ return pkgerror.Wrapf(err, "Failed to load cluster info.")
+ }
- clusterInfo, _, _, err := opcontroller.LoadClusterInfo(c.context, ctx, cluster.Namespace, &cluster.Spec)
- if err != nil {
- return pkgerror.Wrapf(err, "Failed to load cluster info.")
+ for i := range rbdPVList {
+ err = c.fenceRbdImage(ctx, node, cluster, clusterInfo, rbdPVList[i])
+ // We only need to create the network fence for any one of rbd pv.
+ if err == nil {
+ break
+ }
+
+ if i == len(rbdPVList)-1 {
+ return pkgerror.Wrapf(err, "failed to fence rbd volumes")
+ }
+ logger.Errorf("failed to fence rbd volumes %q, trying next rbd volume", rbdPVList[i].Name)
+ }
+ }
}
- for i := range rbdPVList {
- err = c.fenceRbdImage(ctx, node, cluster, clusterInfo, rbdPVList[i])
- // We only need to create the network fence for any one of rbd pv.
- if err == nil {
- break
+ if len(cephFSVolumeInUse) != 0 {
+ cephFSVolumeInUseMap := make(map[string]struct{})
+ for _, vol := range cephFSVolumeInUse {
+ cephFSVolumeInUseMap[vol] = struct{}{}
}
+ cephFSPVList := listRWOCephFSPV(listPVs, cluster, cephFSVolumeInUseMap)
+ if len(cephFSPVList) == 0 {
+ logger.Debug("No cephFS PVs found on the node")
+ return nil
+ }
+ logger.Infof("node %q require fencing, found cephFS volumes in use", node.Name)
+ clusterInfo, _, _, err := opcontroller.LoadClusterInfo(c.context, ctx, cluster.Namespace, &cluster.Spec)
+ if err != nil {
+ return pkgerror.Wrapf(err, "Failed to load cluster info.")
+ }
+
+ for i := range cephFSPVList {
+ err = c.fenceCephFSVolume(ctx, node, cluster, clusterInfo, cephFSPVList[i])
+ // We only need to create the network fence for any one of cephFS pv.
+ if err == nil {
+ break
+ }
- if i == len(rbdPVList)-1 {
- return pkgerror.Wrapf(err, "failed to fence rbd volumes")
+ if i == len(cephFSPVList)-1 {
+ return pkgerror.Wrapf(err, "failed to fence cephFS volumes")
+ }
+ logger.Errorf("failed to fence cephFS volumes %q, trying next cephFS volume", cephFSPVList[i].Name)
}
- logger.Errorf("failed to fence rbd volumes %q, trying next rbd volume", rbdPVList[i].Name)
+
}
return nil
}
-func getCephVolumesInUse(cluster *cephv1.CephCluster, volumesInUse []corev1.UniqueVolumeName) []string {
- var rbdVolumesInUse []string
+func getCephVolumesInUse(cluster *cephv1.CephCluster, volumesInUse []corev1.UniqueVolumeName) ([]string, []string) {
+ var rbdVolumesInUse, cephFSVolumeInUse []string
for _, volume := range volumesInUse {
splitVolumeInUseBased := trimeVolumeInUse(volume)
@@ -250,8 +282,13 @@ func getCephVolumesInUse(cluster *cephv1.CephCluster, volumesInUse []corev1.Uniq
if len(splitVolumeInUseBased) == 2 && splitVolumeInUseBased[0] == fmt.Sprintf("%s.rbd.csi.ceph.com", cluster.Namespace) {
rbdVolumesInUse = append(rbdVolumesInUse, splitVolumeInUseBased[1])
}
+
+ if len(splitVolumeInUseBased) == 2 && splitVolumeInUseBased[0] == fmt.Sprintf("%s.cephfs.csi.ceph.com", cluster.Namespace) {
+ cephFSVolumeInUse = append(cephFSVolumeInUse, splitVolumeInUseBased[1])
+ }
}
- return rbdVolumesInUse
+
+ return rbdVolumesInUse, cephFSVolumeInUse
}
func trimeVolumeInUse(volume corev1.UniqueVolumeName) []string {
@@ -290,6 +327,36 @@ func listRBDPV(listPVs *corev1.PersistentVolumeList, cluster *cephv1.CephCluster
return listRbdPV
}
+func listRWOCephFSPV(listPVs *corev1.PersistentVolumeList, cluster *cephv1.CephCluster, cephFSVolumesInUse map[string]struct{}) []corev1.PersistentVolume {
+ var listCephFSPV []corev1.PersistentVolume
+
+ for _, pv := range listPVs.Items {
+ // Skip if pv is not provisioned by CSI
+ if pv.Spec.CSI == nil {
+ logger.Debugf("pv %q is not provisioned by CSI", pv.Name)
+ continue
+ }
+
+ if pv.Spec.CSI.Driver == fmt.Sprintf("%s.cephfs.csi.ceph.com", cluster.Namespace) {
+ // Ignore PVs that support multinode access (RWX, ROX), since they can be mounted on multiple nodes.
+ if pvSupportsMultiNodeAccess(pv.Spec.AccessModes) {
+ continue
+ }
+
+ if pv.Spec.CSI.VolumeAttributes["staticVolume"] == "true" || pv.Spec.CSI.VolumeAttributes["pool"] == "" {
+ logger.Debugf("skipping, static pv %q", pv.Name)
+ continue
+ }
+ // Check if the volume is in use
+ if _, exists := cephFSVolumesInUse[pv.Spec.CSI.VolumeHandle]; exists {
+ listCephFSPV = append(listCephFSPV, pv)
+ }
+ }
+
+ }
+ return listCephFSPV
+}
+
// pvSupportsMultiNodeAccess returns true if the PV access modes contain ReadWriteMany or ReadOnlyMany.
func pvSupportsMultiNodeAccess(accessModes []corev1.PersistentVolumeAccessMode) bool {
for _, accessMode := range accessModes {
@@ -330,6 +397,80 @@ func (c *clientCluster) fenceRbdImage(
return nil
}
+func (c *clientCluster) fenceCephFSVolume(
+ ctx context.Context, node *corev1.Node, cluster *cephv1.CephCluster,
+ clusterInfo *cephclient.ClusterInfo, cephFSPV corev1.PersistentVolume) error {
+
+ logger.Infof("fencing cephfs volume %q on node %q", cephFSPV.Name, node.Name)
+
+ status, err := cephclient.StatusWithUser(c.context, clusterInfo)
+ if err != nil {
+ return fmt.Errorf("failed to get ceph status for check active mds. %v", err)
+ }
+
+ var activeMDS string
+ for _, fsRank := range status.Fsmap.ByRank {
+ if fsRank.Status == "up:active" {
+ activeMDS = fsRank.Name
+ }
+ }
+
+ args := []string{"tell", fmt.Sprintf("mds.%s", activeMDS), "client", "ls", "--format", "json"}
+ cmd := cephclient.NewCephCommand(c.context, clusterInfo, args)
+ cmd.JsonOutput = true
+
+ buf, err := cmd.Run()
+ if err != nil {
+ return fmt.Errorf("failed to list watchers for cephfs pool/subvoumeName %s/%s. %v", cephFSPV.Spec.CSI.VolumeAttributes["pool"], cephFSPV.Spec.CSI.VolumeAttributes["subvolumeName"], err)
+ }
+ ips, err := cephFSMDSClientMarshal(buf, cephFSPV)
+ if err != nil || ips == nil {
+ return fmt.Errorf("failed to unmarshal cephfs mds output. %v", err)
+ }
+
+ err = c.createNetworkFence(ctx, cephFSPV, node, cluster, ips)
+ if err != nil {
+ return fmt.Errorf("failed to create network fence for node %q. %v", node.Name, err)
+ }
+
+ return nil
+}
+
+func cephFSMDSClientMarshal(output []byte, cephFSPV corev1.PersistentVolume) ([]string, error) {
+ type entity struct {
+ Addr struct {
+ Addr string `json:"addr"`
+ Nonce int `json:"nonce"`
+ } `json:"addr"`
+ }
+
+ type clientMetadata struct {
+ Root string `json:"root"`
+ }
+
+ type cephFSData struct {
+ Entity entity `json:"entity"`
+ ClientMetadata clientMetadata `json:"client_metadata"`
+ }
+
+ var data []cephFSData
+ err := json.Unmarshal([]byte(output), &data)
+ if err != nil {
+ return []string{}, pkgerror.Wrapf(err, "failed to unmarshal cephFS data output")
+ }
+
+ watcherIPlist := []string{}
+ for _, d := range data {
+ if cephFSPV.Spec.CSI.VolumeAttributes["subvolumePath"] == d.ClientMetadata.Root {
+ logger.Infof("cephfs mds client ips to fence %v", d.Entity.Addr)
+ watcherIP := concatenateWatcherIp(d.Entity.Addr.Addr)
+ watcherIPlist = append(watcherIPlist, watcherIP)
+ }
+ }
+
+ return watcherIPlist, nil
+}
+
func rbdStatusUnMarshal(output []byte) ([]string, error) {
type rbdStatus struct {
Watchers []struct {
@@ -352,9 +493,10 @@ func rbdStatusUnMarshal(output []byte) ([]string, error) {
}
func concatenateWatcherIp(address string) string {
- // address is in format `10.63.0.5:0/1254753579`
- // split with separation ':0/' to remove nounce and concatenating `/32` to define a network with only one IP address
- watcherIP := strings.Split(address, ":0/")[0] + "/32"
+ // address is in format `10.63.0.5:0/1254753579` for rbd and
+ // in the format '10.244.0.12:0' for cephfs
+ // split with separation ':0' to remove nounce and concatenating `/32` to define a network with only one IP address
+ watcherIP := strings.Split(address, ":0")[0] + "/32"
return watcherIP
}
diff --git a/pkg/operator/ceph/cluster/watcher_test.go b/pkg/operator/ceph/cluster/watcher_test.go
index 19d659437ac1..d37cb84c15d9 100644
--- a/pkg/operator/ceph/cluster/watcher_test.go
+++ b/pkg/operator/ceph/cluster/watcher_test.go
@@ -175,9 +175,11 @@ func TestHandleNodeFailure(t *testing.T) {
switch {
case command == "rbd" && args[0] == "status":
return `{"watchers":[{"address":"192.168.39.137:0/3762982934","client":4307,"cookie":18446462598732840961}]}`, nil
+ case command == "ceph" && args[0] == "tell":
+ return `{"watchers":[{"id":5201,"entity":[{"addr": [{"addr": "10.244.0.12:0", "nonce":3247243972}]}]]}`, nil
}
- return "", errors.Errorf("unexpected rbd command %q", args)
+ return "", errors.Errorf("unexpected rbd/ceph command %q", args)
}
node := &corev1.Node{
@@ -201,6 +203,7 @@ func TestHandleNodeFailure(t *testing.T) {
},
VolumesInUse: []corev1.UniqueVolumeName{
"kubernetes.io/csi/rook-ceph.rbd.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4002",
+ "kubernetes.io/csi/rook-ceph.cephfs.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001",
},
},
}
@@ -228,7 +231,7 @@ func TestHandleNodeFailure(t *testing.T) {
},
}
- staticPV := &corev1.PersistentVolume{
+ staticRbdPV := &corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-58469d41-f6c0-4720-b23a-0a0826b841cb",
Annotations: map[string]string{
@@ -248,6 +251,26 @@ func TestHandleNodeFailure(t *testing.T) {
},
}
+ staticCephfsPV := &corev1.PersistentVolume{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "pvc-58469d41-f6c0-4720-b23a-0a0826b842cb",
+ Annotations: map[string]string{
+ "pv.kubernetes.io/provisioned-by": fmt.Sprintf("%s.cephfs.csi.ceph.com", ns),
+ "volume.kubernetes.io/provisioner-deletion-secret-name": "rook-csi-cephfs-provisioner",
+ "volume.kubernetes.io/provisioner-deletion-secret-namespace": ns,
+ },
+ },
+ Spec: corev1.PersistentVolumeSpec{
+ PersistentVolumeSource: corev1.PersistentVolumeSource{
+ CSI: &corev1.CSIPersistentVolumeSource{
+ Driver: fmt.Sprintf("%s.cephfs.csi.ceph.com", ns),
+ VolumeHandle: "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001",
+ VolumeAttributes: map[string]string{},
+ },
+ },
+ },
+ }
+
pvNotProvisionByCSI := &corev1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: "pvc-58469d41-f6c0-4720-b23a-0a0826b841cc",
@@ -302,30 +325,48 @@ func TestHandleNodeFailure(t *testing.T) {
err = c.client.Get(ctx, types.NamespacedName{Name: node.Name, Namespace: cephCluster.Namespace}, networkFence)
assert.NoError(t, err)
- // For static pv
- _, err = c.context.Clientset.CoreV1().PersistentVolumes().Create(ctx, staticPV, metav1.CreateOptions{})
+ // For static rbd pv
+ _, err = c.context.Clientset.CoreV1().PersistentVolumes().Create(ctx, staticRbdPV, metav1.CreateOptions{})
assert.NoError(t, err)
pvList, err := c.context.Clientset.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
assert.NoError(t, err)
- volumeInUse := getCephVolumesInUse(cephCluster, node.Status.VolumesInUse)
- rbdPVList := listRBDPV(pvList, cephCluster, volumeInUse)
- assert.Equal(t, len(rbdPVList), 1) // it will be equal to once since we have one pv provisioned by csi named `PV`
+ rbdVolumesInUse, _ := getCephVolumesInUse(cephCluster, node.Status.VolumesInUse)
+ rbdPVList := listRBDPV(pvList, cephCluster, rbdVolumesInUse)
+ assert.Equal(t, len(rbdPVList), 1) // it will be equal to one since we have one pv provisioned by csi named `PV`
err = c.handleNodeFailure(ctx, cephCluster, node)
assert.NoError(t, err)
- // For static pv
+ // For static cephfs pv
+ _, err = c.context.Clientset.CoreV1().PersistentVolumes().Create(ctx, staticCephfsPV, metav1.CreateOptions{})
+ assert.NoError(t, err)
+
+ pvList, err = c.context.Clientset.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
+ assert.NoError(t, err)
+
+ _, cephFSVolumesInUse := getCephVolumesInUse(cephCluster, node.Status.VolumesInUse)
+ cephFSVolumesInUseMap := make(map[string]struct{})
+ for _, vol := range cephFSVolumesInUse {
+ cephFSVolumesInUseMap[vol] = struct{}{}
+ }
+ cephFSPVList := listRWOCephFSPV(pvList, cephCluster, cephFSVolumesInUseMap)
+ assert.Equal(t, len(cephFSPVList), 0)
+
+ err = c.handleNodeFailure(ctx, cephCluster, node)
+ assert.NoError(t, err)
+
+ // For pv not provisioned by CSI
_, err = c.context.Clientset.CoreV1().PersistentVolumes().Create(ctx, pvNotProvisionByCSI, metav1.CreateOptions{})
assert.NoError(t, err)
pvList, err = c.context.Clientset.CoreV1().PersistentVolumes().List(ctx, metav1.ListOptions{})
assert.NoError(t, err)
- volumeInUse = getCephVolumesInUse(cephCluster, node.Status.VolumesInUse)
- rbdPVList = listRBDPV(pvList, cephCluster, volumeInUse)
- assert.Equal(t, len(rbdPVList), 1) // it will be equal to once since we have one pv provisioned by csi named `PV`
+ rbdVolumesInUse, _ = getCephVolumesInUse(cephCluster, node.Status.VolumesInUse)
+ rbdPVList = listRBDPV(pvList, cephCluster, rbdVolumesInUse)
+ assert.Equal(t, len(rbdPVList), 1) // it will be equal to one since we have one pv provisioned by csi named `PV`
err = c.handleNodeFailure(ctx, cephCluster, node)
assert.NoError(t, err)
@@ -345,6 +386,8 @@ func TestGetCephVolumesInUse(t *testing.T) {
volInUse := []corev1.UniqueVolumeName{
"kubernetes.io/csi/rook-ceph.rbd.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4002",
"kubernetes.io/csi/rook-ceph.rbd.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4003",
+ "kubernetes.io/csi/rook-ceph.cephfs.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001",
+ "kubernetes.io/csi/rook-ceph.cephfs.csi.ceph.com^0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4004",
}
splitVolInUse := trimeVolumeInUse(volInUse[0])
@@ -355,9 +398,21 @@ func TestGetCephVolumesInUse(t *testing.T) {
assert.Equal(t, splitVolInUse[0], "rook-ceph.rbd.csi.ceph.com")
assert.Equal(t, splitVolInUse[1], "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4003")
- trimVolInUse := getCephVolumesInUse(cephCluster, volInUse)
- expected := []string{"0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4002", "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4003"}
- assert.Equal(t, expected, trimVolInUse)
+ splitVolInUse = trimeVolumeInUse(volInUse[2])
+ assert.Equal(t, splitVolInUse[0], "rook-ceph.cephfs.csi.ceph.com")
+ assert.Equal(t, splitVolInUse[1], "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001")
+
+ splitVolInUse = trimeVolumeInUse(volInUse[3])
+ assert.Equal(t, splitVolInUse[0], "rook-ceph.cephfs.csi.ceph.com")
+ assert.Equal(t, splitVolInUse[1], "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4004")
+
+ trimRbdVolInUse, trimCephFSVolInUse := getCephVolumesInUse(cephCluster, volInUse)
+
+ expectedRbd := []string{"0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4002", "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4003"}
+ expectedCephfs := []string{"0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001", "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4004"}
+
+ assert.Equal(t, expectedRbd, trimRbdVolInUse)
+ assert.Equal(t, expectedCephfs, trimCephFSVolInUse)
}
func TestRBDStatusUnMarshal(t *testing.T) {
diff --git a/pkg/operator/ceph/csi/secrets.go b/pkg/operator/ceph/csi/secrets.go
index 324c26a706f6..5f11dd2e1d9f 100644
--- a/pkg/operator/ceph/csi/secrets.go
+++ b/pkg/operator/ceph/csi/secrets.go
@@ -105,9 +105,10 @@ func cephCSIKeyringCephFSNodeCaps() []string {
func cephCSIKeyringCephFSProvisionerCaps() []string {
return []string{
- "mon", "allow r",
+ "mon", "allow r, allow command 'osd blocklist'",
"mgr", "allow rw",
"osd", "allow rw tag cephfs metadata=*",
+ "mds", "allow *", // TODO: replace '*' with required permissions
}
}
diff --git a/pkg/operator/ceph/csi/secrets_test.go b/pkg/operator/ceph/csi/secrets_test.go
index c7c908a94b50..d76b9db2ae9f 100644
--- a/pkg/operator/ceph/csi/secrets_test.go
+++ b/pkg/operator/ceph/csi/secrets_test.go
@@ -39,5 +39,5 @@ func TestCephCSIKeyringCephFSNodeCaps(t *testing.T) {
func TestCephCSIKeyringCephFSProvisionerCaps(t *testing.T) {
caps := cephCSIKeyringCephFSProvisionerCaps()
- assert.Equal(t, caps, []string{"mon", "allow r", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*"})
+ assert.Equal(t, caps, []string{"mon", "allow r, allow command 'osd blocklist'", "mgr", "allow rw", "osd", "allow rw tag cephfs metadata=*", "mds", "allow *"})
}
diff --git a/pkg/operator/ceph/csi/spec.go b/pkg/operator/ceph/csi/spec.go
index 6e8f0318b056..01c0f248b6eb 100644
--- a/pkg/operator/ceph/csi/spec.go
+++ b/pkg/operator/ceph/csi/spec.go
@@ -131,13 +131,13 @@ var (
// manually challenging.
var (
// image names
- DefaultCSIPluginImage = "quay.io/cephcsi/cephcsi:v3.10.0"
+ DefaultCSIPluginImage = "quay.io/cephcsi/cephcsi:v3.10.1"
DefaultRegistrarImage = "registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.1"
DefaultProvisionerImage = "registry.k8s.io/sig-storage/csi-provisioner:v3.6.2"
DefaultAttacherImage = "registry.k8s.io/sig-storage/csi-attacher:v4.4.2"
DefaultSnapshotterImage = "registry.k8s.io/sig-storage/csi-snapshotter:v6.3.2"
DefaultResizerImage = "registry.k8s.io/sig-storage/csi-resizer:v1.9.2"
- DefaultCSIAddonsImage = "quay.io/csiaddons/k8s-sidecar:v0.7.0"
+ DefaultCSIAddonsImage = "quay.io/csiaddons/k8s-sidecar:v0.8.0"
// image pull policy
DefaultCSIImagePullPolicy = string(corev1.PullIfNotPresent)
diff --git a/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-provisioner-dep.yaml b/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-provisioner-dep.yaml
index 7b05a2c3ff34..cdf5c9c5115f 100644
--- a/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-provisioner-dep.yaml
+++ b/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-provisioner-dep.yaml
@@ -173,6 +173,10 @@ spec:
- "--namespace=$(POD_NAMESPACE)"
- "--pod-uid=$(POD_UID)"
- "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/"
+ - "--leader-election-namespace={{ .Namespace }}"
+ - "--leader-election-lease-duration=137s"
+ - "--leader-election-renew-deadline=107s"
+ - "--leader-election-retry-period=26s"
ports:
- containerPort: {{ .CSIAddonsPort }}
env:
diff --git a/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-svc.yaml b/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-svc.yaml
index 2eb4b036c9c7..890b6861466c 100644
--- a/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-svc.yaml
+++ b/pkg/operator/ceph/csi/template/cephfs/csi-cephfsplugin-svc.yaml
@@ -12,9 +12,5 @@ spec:
port: 8080
protocol: TCP
targetPort: {{ .CephFSLivenessMetricsPort }}
- - name: csi-grpc-metrics
- port: 8081
- protocol: TCP
- targetPort: {{ .CephFSGRPCMetricsPort }}
selector:
contains: csi-cephfsplugin-metrics
diff --git a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml
index 99a710d05be0..3abc84de1b97 100644
--- a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml
+++ b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-provisioner-dep.yaml
@@ -145,6 +145,10 @@ spec:
- "--namespace=$(POD_NAMESPACE)"
- "--pod-uid=$(POD_UID)"
- "--stagingpath={{ .KubeletDirPath }}/plugins/kubernetes.io/csi/"
+ - "--leader-election-namespace={{ .Namespace }}"
+ - "--leader-election-lease-duration=137s"
+ - "--leader-election-renew-deadline=107s"
+ - "--leader-election-retry-period=26s"
ports:
- containerPort: {{ .CSIAddonsPort }}
env:
diff --git a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-svc.yaml b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-svc.yaml
index 6c432669c038..0493f06c7779 100644
--- a/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-svc.yaml
+++ b/pkg/operator/ceph/csi/template/rbd/csi-rbdplugin-svc.yaml
@@ -12,9 +12,5 @@ spec:
port: 8080
protocol: TCP
targetPort: {{ .RBDLivenessMetricsPort }}
- - name: csi-grpc-metrics
- port: 8081
- protocol: TCP
- targetPort: {{ .RBDGRPCMetricsPort }}
selector:
contains: csi-rbdplugin-metrics
diff --git a/pkg/operator/ceph/csi/util_test.go b/pkg/operator/ceph/csi/util_test.go
index 9beaa1b3ce97..66ba19e74791 100644
--- a/pkg/operator/ceph/csi/util_test.go
+++ b/pkg/operator/ceph/csi/util_test.go
@@ -267,7 +267,7 @@ func Test_getImage(t *testing.T) {
args: args{
data: map[string]string{},
settingName: "ROOK_CSI_CEPH_IMAGE",
- defaultImage: "quay.io/cephcsi/cephcsi:v3.10.0",
+ defaultImage: "quay.io/cephcsi/cephcsi:v3.10.1",
},
want: DefaultCSIPluginImage,
},
@@ -278,7 +278,7 @@ func Test_getImage(t *testing.T) {
"ROOK_CSI_CEPH_IMAGE": "registry.io/private/cephcsi:v8",
},
settingName: "ROOK_CSI_CEPH_IMAGE",
- defaultImage: "quay.io/cephcsi/cephcsi:v3.10.0",
+ defaultImage: "quay.io/cephcsi/cephcsi:v3.10.1",
},
want: "registry.io/private/cephcsi:v8",
},
@@ -289,9 +289,9 @@ func Test_getImage(t *testing.T) {
"ROOK_CSI_CEPH_IMAGE": "registry.io/private/cephcsi",
},
settingName: "ROOK_CSI_CEPH_IMAGE",
- defaultImage: "quay.io/cephcsi/cephcsi:v3.10.0",
+ defaultImage: "quay.io/cephcsi/cephcsi:v3.10.1",
},
- want: "registry.io/private/cephcsi:v3.10.0",
+ want: "registry.io/private/cephcsi:v3.10.1",
},
}
for _, tt := range tests {
diff --git a/tests/framework/installer/ceph_helm_installer.go b/tests/framework/installer/ceph_helm_installer.go
index 332a8f84d9f4..95a5f43592bc 100644
--- a/tests/framework/installer/ceph_helm_installer.go
+++ b/tests/framework/installer/ceph_helm_installer.go
@@ -173,6 +173,9 @@ func (h *CephInstaller) removeCephClusterHelmResources() {
if err := h.k8shelper.RookClientset.CephV1().CephBlockPools(h.settings.Namespace).Delete(context.TODO(), BlockPoolName, v1.DeleteOptions{}); err != nil {
assert.True(h.T(), kerrors.IsNotFound(err))
}
+ if err := h.k8shelper.RookClientset.CephV1().CephFilesystemSubVolumeGroups(h.settings.Namespace).Delete(context.TODO(), FilesystemName+"-csi", v1.DeleteOptions{}); err != nil {
+ assert.True(h.T(), kerrors.IsNotFound(err))
+ }
if err := h.k8shelper.RookClientset.CephV1().CephFilesystems(h.settings.Namespace).Delete(context.TODO(), FilesystemName, v1.DeleteOptions{}); err != nil {
assert.True(h.T(), kerrors.IsNotFound(err))
}
diff --git a/tests/scripts/collect-logs.sh b/tests/scripts/collect-logs.sh
index c78259366921..aea3bf015cc8 100755
--- a/tests/scripts/collect-logs.sh
+++ b/tests/scripts/collect-logs.sh
@@ -18,11 +18,15 @@ $CEPH_CMD osd dump >"${LOG_DIR}"/ceph-osd-dump.txt
$CEPH_CMD report >"${LOG_DIR}"/ceph-report.txt
NAMESPACES=("$CLUSTER_NAMESPACE")
-NAMESPACES+=("$KUBE_SYSTEM_NAMESPACE")
if [[ "$OPERATOR_NAMESPACE" != "$CLUSTER_NAMESPACE" ]]; then
NAMESPACES+=("$OPERATOR_NAMESPACE")
fi
+# Add kube-system namespace for multus test only as we need to debug network in multus test
+if [ "$1" == "canary-multus" ]; then
+ NAMESPACES+=("$KUBE_SYSTEM_NAMESPACE")
+fi
+
for NAMESPACE in "${NAMESPACES[@]}"; do
# each namespace is a sub-directory for easier debugging
NS_DIR="${LOG_DIR}"/namespace-"${NAMESPACE}"