From 21f68f6402861c9a8d90840512db1a431bd5f6f7 Mon Sep 17 00:00:00 2001 From: travisn Date: Mon, 12 Feb 2024 17:05:51 -0700 Subject: [PATCH 1/5] docs: update roadmap for the 1.14 release Looking forward to the 1.14 release in April, update the roadmap of planned feature work. The features are not limited to this list, it is just for context on what is currently planned. Signed-off-by: travisn --- ROADMAP.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index f696e9555930..20bc846aedb0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -8,21 +8,23 @@ We hope that the items listed below will inspire further engagement from the com Any dates listed below and the specific issues that will ship in a given milestone are subject to change but should give a general idea of what we are planning. See the [GitHub project boards](https://github.com/rook/rook/projects) for the most up-to-date issues and their status. -## Rook Ceph 1.13 +## Rook Ceph 1.14 -The following high level features are targeted for Rook v1.13 (November 2023). For more detailed project tracking see the [v1.13 board](https://github.com/rook/rook/projects/30). +The following high level features are targeted for Rook v1.14 (April 2024). For more detailed project tracking see the [v1.14 board](https://github.com/rook/rook/projects/31). -* OSD encryption on partitions [#10984](https://github.com/rook/rook/issues/10984) -* Object Store - * Pool sharing for clusters where many object stores are required [#11411](https://github.com/rook/rook/issues/11411) -* CephFS - * Automatic subvolume group pinning [#12607](https://github.com/rook/rook/issues/12607) -* Ceph-CSI [v3.10](https://github.com/ceph/ceph-csi/issues?q=is%3Aopen+is%3Aissue+milestone%3Arelease-v3.10.0) +* Allow setting the application name on a CephBlockPool [#13744](https://github.com/rook/rook/pull/13744) +* Pool sharing for multiple object stores [#11411](https://github.com/rook/rook/issues/11411) +* Replace a single OSD when a metadataDevice is configured with multiple OSDs [#13240](https://github.com/rook/rook/issues/13240) +* Create a default service account for all Ceph daemons [#13362](https://github.com/rook/rook/pull/13362) +* Enable the rook orchestrator mgr module by default for improved dashboard integration [#13760](https://github.com/rook/rook/issues/13760) +* Option to run all components on the host network [#13571](https://github.com/rook/rook/issues/13571) +* Multus-enabled clusters to begin "holder" pod deprecation [#13055](https://github.com/rook/rook/issues/13055) +* Separate CSI image repository and tag for all images in the helm chart [#13585](https://github.com/rook/rook/issues/13585) +* Ceph-CSI [v3.11](https://github.com/ceph/ceph-csi/issues?q=is%3Aopen+is%3Aissue+milestone%3Arelease-v3.11.0) +* Add build support for Go 1.22 [#13738](https://github.com/rook/rook/pull/13738) ## Kubectl Plugin -Features are planned in the 1.13 time frame for the [Kubectl Plugin](https://github.com/rook/kubectl-rook-ceph). - * Recover the CephCluster CR after accidental deletion [#68](https://github.com/rook/kubectl-rook-ceph/issues/68) - * Force cleanup the cluster if graceful uninstall is not desired [#131](https://github.com/rook/kubectl-rook-ceph/issues/131) - * Provide a restricted set of commands based on a build flag [#174](https://github.com/rook/kubectl-rook-ceph/issues/174) - * Collect details to help troubleshoot the csi driver [#69](https://github.com/rook/kubectl-rook-ceph/issues/69) +Features are planned in the 1.14 time frame for the [Kubectl Plugin](https://github.com/rook/kubectl-rook-ceph). +* Collect details to help troubleshoot the csi driver [#69](https://github.com/rook/kubectl-rook-ceph/issues/69) +* Command to flatten an RBD image [#222](https://github.com/rook/kubectl-rook-ceph/issues/222) From 0e115c44bf5e5c0ecff0cc650ddc33fc05703df4 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 14 Feb 2024 10:46:00 +0100 Subject: [PATCH 2/5] core: fix cephfs pvc network fencing The cephfs PVC might exist on the kubernetes node object but due to some timing issues the ip might not be visible on the ceph cluster or the client might already got evicted or disconnected from ceph cluster. In this case we will not be able to get IP details for the subvolume and we dont have any check for empty ip's in the code and rook tries to create NetworkFence CR with empty Ip's and the NetworkFence will get moved to the Failed state. This PR adds the necessary check and logging to prevent this one. Signed-off-by: Madhu Rajanna --- pkg/operator/ceph/cluster/watcher.go | 7 ++++++- pkg/operator/ceph/cluster/watcher_test.go | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go index 6b8f08d40567..f9162b11c1d8 100644 --- a/pkg/operator/ceph/cluster/watcher.go +++ b/pkg/operator/ceph/cluster/watcher.go @@ -438,10 +438,15 @@ func (c *clientCluster) fenceCephFSVolume( return fmt.Errorf("failed to list watchers for cephfs pool/subvoumeName %s/%s. %v", cephFSPV.Spec.CSI.VolumeAttributes["pool"], cephFSPV.Spec.CSI.VolumeAttributes["subvolumeName"], err) } ips, err := cephFSMDSClientMarshal(buf, cephFSPV) - if err != nil || ips == nil { + if err != nil { return fmt.Errorf("failed to unmarshal cephfs mds output. %v", err) } + if len(ips) == 0 { + logger.Infof("no active mds clients found for cephfs volume %q", cephFSPV.Name) + return nil + } + err = c.createNetworkFence(ctx, cephFSPV, node, cluster, ips, cephfsDriver) if err != nil { return fmt.Errorf("failed to create network fence for node %q. %v", node.Name, err) diff --git a/pkg/operator/ceph/cluster/watcher_test.go b/pkg/operator/ceph/cluster/watcher_test.go index 6e527b9108aa..29b6d60f8e7c 100644 --- a/pkg/operator/ceph/cluster/watcher_test.go +++ b/pkg/operator/ceph/cluster/watcher_test.go @@ -179,7 +179,7 @@ func TestHandleNodeFailure(t *testing.T) { case command == "ceph" && args[0] == "status": return `{"entity":[{"addr": [{"addr": "10.244.0.12:0", "nonce":3247243972}]}], "client_metadata":{"root":"/"}}`, nil case command == "ceph" && args[0] == "tell": - return `[{"entity":{"addr":{"addr":"10.244.0.12:0","nonce":3247243972}}, "client_metadata":{"root":"/"}}]`, nil + return `[{"entity":{"addr":{"addr":"10.244.0.12:0","nonce":3247243972}}, "client_metadata":{"root":"/volumes/csi/csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca"}}]`, nil } return "", errors.Errorf("unexpected rbd/ceph command %q", args) @@ -250,6 +250,7 @@ func TestHandleNodeFailure(t *testing.T) { VolumeHandle: "0001-0009-rook-ceph-0000000000000002-24862838-240d-4215-9183-abfc0e9e4001", VolumeAttributes: map[string]string{ "fsName": "myfs", + "subvolumePath": "/volumes/csi/csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca", "subvolumeName": "csi-vol-58469d41-f6c0-4720-b23a-0a0826b842ca", }, }, From eb97390df2bf3673a5f367e0f8f0fc9b6976de16 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 14 Feb 2024 10:51:09 +0100 Subject: [PATCH 3/5] core: use error wraping in fenceCephFSVolume to keep the code consistent and to propogate more details about error rook uses error wraping. updating current code whereever its required to wrap the details. Signed-off-by: Madhu Rajanna --- pkg/operator/ceph/cluster/watcher.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go index f9162b11c1d8..145d1f862961 100644 --- a/pkg/operator/ceph/cluster/watcher.go +++ b/pkg/operator/ceph/cluster/watcher.go @@ -419,7 +419,7 @@ func (c *clientCluster) fenceCephFSVolume( status, err := cephclient.StatusWithUser(c.context, clusterInfo) if err != nil { - return fmt.Errorf("failed to get ceph status for check active mds. %v", err) + return pkgerror.Wrapf(err, "failed to get ceph status for check active mds") } var activeMDS string @@ -439,7 +439,7 @@ func (c *clientCluster) fenceCephFSVolume( } ips, err := cephFSMDSClientMarshal(buf, cephFSPV) if err != nil { - return fmt.Errorf("failed to unmarshal cephfs mds output. %v", err) + return pkgerror.Wrapf(err, "failed to unmarshal cephfs mds output") } if len(ips) == 0 { @@ -449,7 +449,7 @@ func (c *clientCluster) fenceCephFSVolume( err = c.createNetworkFence(ctx, cephFSPV, node, cluster, ips, cephfsDriver) if err != nil { - return fmt.Errorf("failed to create network fence for node %q. %v", node.Name, err) + return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name) } return nil From ca21bd97155279dcc171652206ac544e16c98276 Mon Sep 17 00:00:00 2001 From: Madhu Rajanna Date: Wed, 14 Feb 2024 18:45:22 +0100 Subject: [PATCH 4/5] core: retry other pvc if active client not found retry other cephfs/rbd pvc if there are no active clients found on ceph cluster. Signed-off-by: Madhu Rajanna --- pkg/operator/ceph/cluster/watcher.go | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go index 145d1f862961..92756e7060df 100644 --- a/pkg/operator/ceph/cluster/watcher.go +++ b/pkg/operator/ceph/cluster/watcher.go @@ -20,6 +20,7 @@ package cluster import ( "context" "encoding/json" + stderrors "errors" "fmt" "strings" "time" @@ -54,6 +55,7 @@ type clientCluster struct { var ( nodesCheckedForReconcile = sets.New[string]() networkFenceLabel = "cephClusterUID" + errActiveClientNotFound = stderrors.New("active client not found") ) // drivers that supports fencing, used in naming networkFence object @@ -243,6 +245,10 @@ func (c *clientCluster) fenceNode(ctx context.Context, node *corev1.Node, cluste if err == nil { break } + // continue to fence next rbd volume if active client not found + if stderrors.Is(err, errActiveClientNotFound) { + continue + } if i == len(rbdPVList)-1 { return pkgerror.Wrapf(err, "failed to fence rbd volumes") @@ -275,6 +281,10 @@ func (c *clientCluster) fenceNode(ctx context.Context, node *corev1.Node, cluste break } + // continue to fence next rbd volume if active client not found + if stderrors.Is(err, errActiveClientNotFound) { + continue + } if i == len(cephFSPVList)-1 { return pkgerror.Wrapf(err, "failed to fence cephFS volumes") } @@ -401,11 +411,13 @@ func (c *clientCluster) fenceRbdImage( if err != nil { return pkgerror.Wrapf(err, "failed to unmarshal rbd status output") } - if len(ips) != 0 { - err = c.createNetworkFence(ctx, rbdPV, node, cluster, ips, rbdDriver) - if err != nil { - return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name) - } + if len(ips) == 0 { + logger.Infof("no active rbd clients found for rbd volume %q", rbdPV.Name) + return errActiveClientNotFound + } + err = c.createNetworkFence(ctx, rbdPV, node, cluster, ips, rbdDriver) + if err != nil { + return pkgerror.Wrapf(err, "failed to create network fence for node %q", node.Name) } return nil @@ -444,7 +456,7 @@ func (c *clientCluster) fenceCephFSVolume( if len(ips) == 0 { logger.Infof("no active mds clients found for cephfs volume %q", cephFSPV.Name) - return nil + return errActiveClientNotFound } err = c.createNetworkFence(ctx, cephFSPV, node, cluster, ips, cephfsDriver) From 3e76de7868f7c4061dde7f05576c5d74679b8555 Mon Sep 17 00:00:00 2001 From: Blaine Gardner Date: Mon, 12 Feb 2024 10:01:59 -0700 Subject: [PATCH 5/5] ci: allow canary jobs to have ids for nightly suite For nightly jobs, canary tests are all running with the same job IDs, making the last-run jobs cancel previous runs. Add a workflow-id parameter to canary jobs that can be used to give each job in a canary suite a unique job ID to prevent this. Signed-off-by: Blaine Gardner --- .../workflows/canary-integration-suite.yml | 32 ++++ .github/workflows/canary-integration-test.yml | 149 +++++++++++------- .github/workflows/collect-logs/action.yaml | 26 ++- .github/workflows/daily-nightly-jobs.yml | 12 +- .../encryption-pvc-kms-ibm-kp/action.yml | 5 +- .../workflows/rgw-multisite-test/action.yml | 6 - .mergify.yml | 58 +++++++ tests/scripts/collect-logs.sh | 11 +- 8 files changed, 216 insertions(+), 83 deletions(-) create mode 100644 .github/workflows/canary-integration-suite.yml diff --git a/.github/workflows/canary-integration-suite.yml b/.github/workflows/canary-integration-suite.yml new file mode 100644 index 000000000000..b13ad4f49037 --- /dev/null +++ b/.github/workflows/canary-integration-suite.yml @@ -0,0 +1,32 @@ +name: Canary integration tests +on: + push: + tags: + - v* + branches: + - master + - release-* + pull_request: + branches: + - master + - release-* + paths-ignore: + - "Documentation/**" + - "design/**" + +defaults: + run: + # reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell + shell: bash --noprofile --norc -eo pipefail -x {0} + +# cancel the in-progress workflow when PR is refreshed. +concurrency: + group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} + cancel-in-progress: true + +jobs: + canary-tests: + uses: ./.github/workflows/canary-integration-test.yml + with: + ceph_images: '["quay.io/ceph/ceph:v18"]' + secrets: inherit diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index e200aa74e298..54e6aeef3d44 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -1,23 +1,11 @@ -name: Canary integration tests +name: Reusable canary integration tests on: - push: - tags: - - v* - branches: - - master - - release-* - pull_request: - branches: - - master - - release-* - paths-ignore: - - "Documentation/**" - - "design/**" + # ONLY on.workflow_call ; call this from other files if needed workflow_call: inputs: - ceph-image: - description: 'Ceph image for creating Ceph cluster' - default: 'quay.io/ceph/ceph:v18' + ceph_images: + description: 'JSON list of Ceph images for creating Ceph cluster' + default: '["quay.io/ceph/ceph:v18"]' type: string defaults: @@ -25,15 +13,13 @@ defaults: # reference: https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#using-a-specific-shell shell: bash --noprofile --norc -eo pipefail -x {0} -# cancel the in-progress workflow when PR is refreshed. -concurrency: - group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} - cancel-in-progress: true - jobs: canary: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -49,7 +35,7 @@ jobs: uses: ./.github/workflows/canary-test-config - name: set Ceph version in CephCluster manifest - run: tests/scripts/github-action-helper.sh replace_ceph_image "deploy/examples/cluster-test.yaml" "${{ github.event.inputs.ceph-image }}" + run: tests/scripts/github-action-helper.sh replace_ceph_image "deploy/examples/cluster-test.yaml" "${{ matrix.ceph-image }}" - name: validate-yaml run: tests/scripts/github-action-helper.sh validate_yaml @@ -276,11 +262,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} raw-disk: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -344,11 +333,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} two-osds-in-device: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -390,11 +382,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} osd-with-metadata-partition-device: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -435,11 +430,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} osd-with-metadata-device: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -488,11 +486,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -535,11 +536,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} lvm: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -587,11 +591,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary + name: ${{ github.job }}-${{ matrix.ceph-image }} pvc: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -660,11 +667,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: pvc + name: ${{ github.job }}-${{ matrix.ceph-image }} pvc-db: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -709,11 +719,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: pvc-db + name: ${{ github.job }}-${{ matrix.ceph-image }} pvc-db-wal: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -761,11 +774,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: pvc-db-wal + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption-pvc: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -826,11 +842,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption-pvc-db: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -877,11 +896,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc-db + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption-pvc-db-wal: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -938,11 +960,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc-db-wal + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption-pvc-kms-vault-token-auth: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1021,11 +1046,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc-kms-vault-token-auth + name: ${{ github.job }}-${{ matrix.ceph-image }} - encryption-pvc-kms-vault-k8s-auth: + encryption-pvc-kms-vault-k: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1084,11 +1112,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc-kms-vault-k8s-auth + name: ${{ github.job }}-${{ matrix.ceph-image }} lvm-pvc: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1138,11 +1169,14 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: lvm-pvc + name: ${{ github.job }}-${{ matrix.ceph-image }} multi-cluster-mirroring: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1393,11 +1427,15 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: multi-cluster-mirroring + name: ${{ github.job }}-${{ matrix.ceph-image }} + additional-namespace: rook-ceph-secondary rgw-multisite-testing: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1415,16 +1453,18 @@ jobs: - name: run RGW multisite test uses: ./.github/workflows/rgw-multisite-test - - name: upload test result - uses: actions/upload-artifact@v4 + - name: collect common logs if: always() + uses: ./.github/workflows/collect-logs with: - name: rgw-multisite-testing - path: test + name: ${{ github.job }}-${{ matrix.ceph-image }} encryption-pvc-kms-ibm-kp: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - uses: actions/checkout@v4 with: @@ -1445,17 +1485,14 @@ jobs: ibm-instance-id: ${{ secrets.IBM_INSTANCE_ID }} ibm-service-api-key: ${{ secrets.IBM_SERVICE_API_KEY }} github-token: ${{ secrets.GITHUB_TOKEN }} - - - name: upload test result - uses: actions/upload-artifact@v4 - if: always() - with: - name: encryption-pvc-kms-ibm-kp - path: test + artifact-name: ${{ github.job }}-${{ matrix.ceph-image }} multus-cluster-network: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1532,11 +1569,15 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: canary-multus + name: ${{ github.job }}-${{ matrix.ceph-image }} + additional-namespace: kube-system csi-hostnetwork-disabled: runs-on: ubuntu-20.04 if: "!contains(github.event.pull_request.labels.*.name, 'skip-ci')" + strategy: + matrix: + ceph-image: ${{ fromJson(inputs.ceph_images) }} steps: - name: checkout uses: actions/checkout@v4 @@ -1584,4 +1625,4 @@ jobs: if: always() uses: ./.github/workflows/collect-logs with: - name: csi-hostnetwork-disabled + name: ${{ github.job }}-${{ matrix.ceph-image }} diff --git a/.github/workflows/collect-logs/action.yaml b/.github/workflows/collect-logs/action.yaml index 4bb3c2d729d0..dfbf6feb5a58 100644 --- a/.github/workflows/collect-logs/action.yaml +++ b/.github/workflows/collect-logs/action.yaml @@ -5,17 +5,37 @@ inputs: name: description: Name to use for the workflow required: true + additional-namespace: + description: Additional namespace to collect + required: false runs: using: "composite" steps: + - name: sanitize input name + id: sanitize + shell: bash --noprofile --norc -eo pipefail -x {0} + run: | + raw="${{ inputs.name }}" + no_colon="${raw//:/-}" + no_slash="${no_colon////-}" + # echo to $GITHUB_OUTPUT doesn't work in composite steps: + # https://github.com/actions/runner/issues/2009#issuecomment-1793565031 + echo "ARTIFACT_NAME=${no_slash}" >> $GITHUB_ENV + - name: collect common logs shell: bash --noprofile --norc -eo pipefail -x {0} run: | - tests/scripts/collect-logs.sh ${{ inputs.name }} + export ADDITIONAL_NAMESPACE="${{ inputs.additional-namespace }}" + tests/scripts/collect-logs.sh + + - name: log artifact name + shell: bash --noprofile --norc -eo pipefail {0} + run: | + echo ${{ env.ARTIFACT_NAME }} - name: Upload canary test result - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: ${{ inputs.name }} + name: ${{ env.ARTIFACT_NAME }} path: test diff --git a/.github/workflows/daily-nightly-jobs.yml b/.github/workflows/daily-nightly-jobs.yml index b5e28acb5078..5742876aeaf3 100644 --- a/.github/workflows/daily-nightly-jobs.yml +++ b/.github/workflows/daily-nightly-jobs.yml @@ -395,18 +395,8 @@ jobs: name: ceph-upgrade-suite-quincy-artifact path: /home/runner/work/rook/rook/tests/integration/_output/tests/ - # run default canary suite canary-tests: uses: ./.github/workflows/canary-integration-test.yml - secrets: inherit - - # run canary suite with relevant ceph devel versions - canary-tests-devel: - uses: ./.github/workflows/canary-integration-test.yml - strategy: - matrix: - ceph-image-tag: - ["latest-main-devel", "latest-quincy-devel", "latest-reef-devel"] with: - ceph-image: quay.io/ceph/daemon-base:${{ matrix.ceph-image-tag }} + ceph_images: '["quay.io/ceph/ceph:v18", "quay.io/ceph/daemon-base:latest-main-devel", "quay.io/ceph/daemon-base:latest-quincy-devel", "quay.io/ceph/daemon-base:latest-reef-devel"]' secrets: inherit diff --git a/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml b/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml index 0746b37dce0e..4e9975eaba03 100644 --- a/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml +++ b/.github/workflows/encryption-pvc-kms-ibm-kp/action.yml @@ -7,6 +7,9 @@ inputs: ibm-service-api-key: description: IBM_KP_SERVICE_API_KEY from the calling workflow required: true + artifact-name: + description: the name of the artifact where logs will be stored + required: true runs: using: "composite" @@ -75,7 +78,7 @@ runs: if: always() uses: ./.github/workflows/collect-logs with: - name: encryption-pvc-kms-ibm-kp + name: ${{ inputs.artifact-name }} - name: teardown cluster so that keys are removed from the KMS shell: bash --noprofile --norc -eo pipefail -x {0} diff --git a/.github/workflows/rgw-multisite-test/action.yml b/.github/workflows/rgw-multisite-test/action.yml index aa10c8627463..5fa7c81767f0 100644 --- a/.github/workflows/rgw-multisite-test/action.yml +++ b/.github/workflows/rgw-multisite-test/action.yml @@ -73,9 +73,3 @@ runs: not_committed_msg="there are no changes to commit for RGW configuration period for CephObjectStore" tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_primary_object_zone}" 600 tests/scripts/github-action-helper.sh wait_for_operator_log_message "${not_committed_msg} ${ns_name_secondary_object_zone}" 600 - - - name: collect common logs - if: always() - shell: bash --noprofile --norc -eo pipefail -x {0} - run: | - tests/scripts/collect-logs.sh diff --git a/.mergify.yml b/.mergify.yml index 50b223943dc4..8cbbf4fde51d 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -296,6 +296,64 @@ pull_request_rules: dismiss_reviews: {} delete_head_branch: {} + # release-1.14 branch + - name: automerge backport release-1.14 + conditions: + - author=mergify[bot] + - base=release-1.14 + - label!=do-not-merge + - "status-success=DCO" + - "check-success=linux-build-all (1.21)" + - "check-success=unittests" + - "check-success=golangci-lint" + - "check-success=codegen" + - "check-success=codespell" + - "check-success=lint" + - "check-success=modcheck" + - "check-success=Shellcheck" + - "check-success=yaml-linter" + - "check-success=lint-test" + - "check-success=gen-rbac" + - "check-success=crds-gen" + - "check-success=docs-check" + - "check-success=pylint" + - "check-success=canary (quay.io/ceph/ceph:v18)" + - "check-success=raw-disk (quay.io/ceph/ceph:v18)" + - "check-success=two-osds-in-device (quay.io/ceph/ceph:v18)" + - "check-success=osd-with-metadata-partition-device (quay.io/ceph/ceph:v18)" + - "check-success=osd-with-metadata-device (quay.io/ceph/ceph:v18)" + - "check-success=encryption (quay.io/ceph/ceph:v18)" + - "check-success=lvm (quay.io/ceph/ceph:v18)" + - "check-success=pvc (quay.io/ceph/ceph:v18)" + - "check-success=pvc-db (quay.io/ceph/ceph:v18)" + - "check-success=pvc-db-wal (quay.io/ceph/ceph:v18)" + - "check-success=encryption-pvc (quay.io/ceph/ceph:v18)" + - "check-success=encryption-pvc-db (quay.io/ceph/ceph:v18)" + - "check-success=encryption-pvc-db-wal (quay.io/ceph/ceph:v18)" + - "check-success=encryption-pvc-kms-vault-token-auth (quay.io/ceph/ceph:v18)" + - "check-success=encryption-pvc-kms-vault-k8s-auth (quay.io/ceph/ceph:v18)" + - "check-success=lvm-pvc (quay.io/ceph/ceph:v18)" + # - "check-success=multi-cluster-mirroring (quay.io/ceph/ceph:v18)" + - "check-success=rgw-multisite-testing (quay.io/ceph/ceph:v18)" + # - "check-success=encryption-pvc-kms-ibm-kp (quay.io/ceph/ceph:v18)" + - "check-success=multus-cluster-network (quay.io/ceph/ceph:v18)" + - "check-success=csi-hostnetwork-disabled (quay.io/ceph/ceph:v18)" + - "check-success=TestCephSmokeSuite (v1.23.17)" + - "check-success=TestCephSmokeSuite (v1.29.0)" + - "check-success=TestCephHelmSuite (v1.23.17)" + - "check-success=TestCephHelmSuite (v1.29.0)" + - "check-success=TestCephMultiClusterDeploySuite (v1.29.0)" + - "check-success=TestCephObjectSuite (v1.29.0)" + - "check-success=TestCephUpgradeSuite (v1.23.17)" + - "check-success=TestCephUpgradeSuite (v1.29.0)" + - "check-success=TestHelmUpgradeSuite (v1.23.17)" + - "check-success=TestHelmUpgradeSuite (v1.29.0)" + actions: + merge: + method: merge + dismiss_reviews: {} + delete_head_branch: {} + # release-1.8 branch - actions: backport: diff --git a/tests/scripts/collect-logs.sh b/tests/scripts/collect-logs.sh index 637d446a53f2..c71267e11ce0 100755 --- a/tests/scripts/collect-logs.sh +++ b/tests/scripts/collect-logs.sh @@ -5,7 +5,7 @@ set -x # User parameters : "${CLUSTER_NAMESPACE:="rook-ceph"}" : "${OPERATOR_NAMESPACE:="$CLUSTER_NAMESPACE"}" -: "${KUBE_SYSTEM_NAMESPACE:="kube-system"}" +: "${ADDITIONAL_NAMESPACE:=""}" : "${LOG_DIR:="test"}" LOG_DIR="${LOG_DIR%/}" # remove trailing slash if necessary @@ -22,13 +22,8 @@ if [[ "$OPERATOR_NAMESPACE" != "$CLUSTER_NAMESPACE" ]]; then NAMESPACES+=("$OPERATOR_NAMESPACE") fi -if [ "$1" == "multi-cluster-mirroring" ]; then - NAMESPACES+=("rook-ceph-secondary") -fi - -# Add kube-system namespace for multus test only as we need to debug network in multus test -if [ "$1" == "canary-multus" ]; then - NAMESPACES+=("$KUBE_SYSTEM_NAMESPACE") +if [[ -n "${ADDITIONAL_NAMESPACE}" ]]; then + NAMESPACES+=("${ADDITIONAL_NAMESPACE}") fi for NAMESPACE in "${NAMESPACES[@]}"; do