Skip to content

Commit

Permalink
osd: replace osd to use new backend store
Browse files Browse the repository at this point in the history
If osd store is updated in the ceph cluster, then
delete OSDs one by one, cleanup disks and provision a new OSD on
the same disk

Signed-off-by: sp98 <sapillai@redhat.com>
(cherry picked from commit 886bb35)
  • Loading branch information
sp98 committed Aug 21, 2023
1 parent d91e8e4 commit ab27474
Show file tree
Hide file tree
Showing 25 changed files with 1,230 additions and 63 deletions.
54 changes: 54 additions & 0 deletions Documentation/CRDs/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -3612,6 +3612,18 @@ string
<td>
</td>
</tr>
<tr>
<td>
<code>osd</code><br/>
<em>
<a href="#ceph.rook.io/v1.OSDStatus">
OSDStatus
</a>
</em>
</td>
<td>
</td>
</tr>
</tbody>
</table>
<h3 id="ceph.rook.io/v1.CephVersionSpec">CephVersionSpec
Expand Down Expand Up @@ -8181,6 +8193,35 @@ string
</tr>
</tbody>
</table>
<h3 id="ceph.rook.io/v1.OSDStatus">OSDStatus
</h3>
<p>
(<em>Appears on:</em><a href="#ceph.rook.io/v1.CephStorage">CephStorage</a>)
</p>
<div>
<p>OSDStatus represents OSD status of the ceph Cluster</p>
</div>
<table>
<thead>
<tr>
<th>Field</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<code>storeType</code><br/>
<em>
map[string]int
</em>
</td>
<td>
<p>StoreType is a mapping between the OSD backend stores and number of OSDs using these stores</p>
</td>
</tr>
</tbody>
</table>
<h3 id="ceph.rook.io/v1.OSDStore">OSDStore
</h3>
<p>
Expand Down Expand Up @@ -8209,6 +8250,19 @@ string
<p>Type of backend storage to be used while creating OSDs. If empty, then bluestore will be used</p>
</td>
</tr>
<tr>
<td>
<code>updateStore</code><br/>
<em>
string
</em>
</td>
<td>
<em>(Optional)</em>
<p>UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk
and prepares same OSD on that disk</p>
</td>
</tr>
</tbody>
</table>
<h3 id="ceph.rook.io/v1.ObjectEndpoints">ObjectEndpoints
Expand Down
54 changes: 53 additions & 1 deletion cmd/rook/ceph/osd.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package ceph
import (
"context"
"encoding/json"
"fmt"
"os"
"path"
"strconv"
Expand All @@ -29,8 +30,12 @@ import (
"github.com/pkg/errors"
"github.com/rook/rook/cmd/rook/rook"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"github.com/rook/rook/pkg/clusterd"
cleanup "github.com/rook/rook/pkg/daemon/ceph/cleanup"
"github.com/rook/rook/pkg/daemon/ceph/client"
osddaemon "github.com/rook/rook/pkg/daemon/ceph/osd"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/cluster/osd"
oposd "github.com/rook/rook/pkg/operator/ceph/cluster/osd"
osdcfg "github.com/rook/rook/pkg/operator/ceph/cluster/osd/config"
opcontroller "github.com/rook/rook/pkg/operator/ceph/controller"
Expand Down Expand Up @@ -66,6 +71,7 @@ var (
ownerRefID string
clusterName string
osdID int
replaceOSDID int
osdStoreType string
osdStringID string
osdUUID string
Expand All @@ -88,6 +94,7 @@ func addOSDFlags(command *cobra.Command) {
addOSDConfigFlags(provisionCmd)

// flags specific to provisioning
provisionCmd.Flags().IntVar(&replaceOSDID, "replace-osd", -1, "osd to be destroyed")
provisionCmd.Flags().StringVar(&cfg.devices, "data-devices", "", "comma separated list of devices to use for storage")
provisionCmd.Flags().StringVar(&osdDataDeviceFilter, "data-device-filter", "", "a regex filter for the device names to use, or \"all\"")
provisionCmd.Flags().StringVar(&osdDataDevicePathFilter, "data-device-path-filter", "", "a regex filter for the device path names to use")
Expand Down Expand Up @@ -196,6 +203,7 @@ func writeOSDConfig(cmd *cobra.Command, args []string) error {

// Provision a device or directory for an OSD
func prepareOSD(cmd *cobra.Command, args []string) error {

if err := verifyConfigFlags(provisionCmd); err != nil {
return err
}
Expand Down Expand Up @@ -251,8 +259,19 @@ func prepareOSD(cmd *cobra.Command, args []string) error {
clusterInfo.OwnerInfo = ownerInfo
clusterInfo.Context = cmd.Context()
kv := k8sutil.NewConfigMapKVStore(clusterInfo.Namespace, context.Clientset, ownerInfo)

// destroy the OSD using the OSD ID
var replaceOSD *osd.OSDReplaceInfo
if replaceOSDID != -1 {
osdInfo, err := destroyOSD(context, &clusterInfo, replaceOSDID)
if err != nil {
rook.TerminateFatal(errors.Wrapf(err, "failed to destroy OSD %d.", osdInfo.ID))
}
replaceOSD = &oposd.OSDReplaceInfo{ID: osdInfo.ID, Path: osdInfo.BlockPath}
}

agent := osddaemon.NewAgent(context, dataDevices, cfg.metadataDevice, forceFormat,
cfg.storeConfig, &clusterInfo, cfg.nodeName, kv, cfg.pvcBacked)
cfg.storeConfig, &clusterInfo, cfg.nodeName, kv, replaceOSD, cfg.pvcBacked)

if cfg.metadataDevice != "" {
metaDevice = cfg.metadataDevice
Expand Down Expand Up @@ -398,3 +417,36 @@ func readCephSecret(path string) error {
}
return nil
}

func destroyOSD(context *clusterd.Context, clusterInfo *client.ClusterInfo, osdID int) (*oposd.OSDInfo, error) {
osdInfo, err := osddaemon.GetOSDInfoById(context, clusterInfo, osdID)
if err != nil {
return nil, errors.Wrapf(err, "failed to get OSD info for OSD.%d", osdID)
}

// destroy the osd
logger.Infof("destroying OSD %d on path %q in %q mode", osdInfo.ID, osdInfo.BlockPath, osdInfo.CVMode)
destroyOSDArgs := []string{"osd", "destroy", fmt.Sprintf("osd.%d", osdInfo.ID), "--yes-i-really-mean-it"}
_, err = client.NewCephCommand(context, clusterInfo, destroyOSDArgs).Run()
if err != nil {
return nil, errors.Wrapf(err, "failed to destroy osd.%d.", osdInfo.ID)
}

// Sanitize OSD disk
s := cleanup.NewDiskSanitizer(context, clusterInfo,
&cephv1.SanitizeDisksSpec{
Method: cephv1.SanitizeMethodProperty(cephv1.SanitizeMethodComplete),
DataSource: cephv1.SanitizeDataSourceProperty(cephv1.SanitizeDataSourceZero),
Iteration: 1,
},
)

// TODO: handle disk sanitization errors
if osdInfo.CVMode == "raw" {
s.SanitizeRawDisk([]oposd.OSDInfo{osdInfo})
} else if osdInfo.CVMode == "lvm" {
s.SanitizeLVMDisk([]oposd.OSDInfo{osdInfo})
}

return &osdInfo, nil
}
13 changes: 13 additions & 0 deletions deploy/charts/rook-ceph/templates/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4385,6 +4385,10 @@ spec:
- bluestore
- bluestore-rdr
type: string
updateStore:
description: UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk and prepares same OSD on that disk
pattern: ^$|^yes-really-update-store$
type: string
type: object
useAllDevices:
description: Whether to consume all the storage devices found on a machine
Expand Down Expand Up @@ -4753,6 +4757,15 @@ spec:
type: string
type: object
type: array
osd:
description: OSDStatus represents OSD status of the ceph Cluster
properties:
storeType:
additionalProperties:
type: integer
description: StoreType is a mapping between the OSD backend stores and number of OSDs using these stores
type: object
type: object
type: object
version:
description: ClusterVersion represents the version of a Ceph Cluster
Expand Down
13 changes: 13 additions & 0 deletions deploy/examples/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4383,6 +4383,10 @@ spec:
- bluestore
- bluestore-rdr
type: string
updateStore:
description: UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk and prepares same OSD on that disk
pattern: ^$|^yes-really-update-store$
type: string
type: object
useAllDevices:
description: Whether to consume all the storage devices found on a machine
Expand Down Expand Up @@ -4751,6 +4755,15 @@ spec:
type: string
type: object
type: array
osd:
description: OSDStatus represents OSD status of the ceph Cluster
properties:
storeType:
additionalProperties:
type: integer
description: StoreType is a mapping between the OSD backend stores and number of OSDs using these stores
type: object
type: object
type: object
version:
description: ClusterVersion represents the version of a Ceph Cluster
Expand Down
12 changes: 12 additions & 0 deletions pkg/apis/ceph.rook.io/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,13 +420,20 @@ type Capacity struct {
// CephStorage represents flavors of Ceph Cluster Storage
type CephStorage struct {
DeviceClasses []DeviceClasses `json:"deviceClasses,omitempty"`
OSD OSDStatus `json:"osd,omitempty"`
}

// DeviceClasses represents device classes of a Ceph Cluster
type DeviceClasses struct {
Name string `json:"name,omitempty"`
}

// OSDStatus represents OSD status of the ceph Cluster
type OSDStatus struct {
// StoreType is a mapping between the OSD backend stores and number of OSDs using these stores
StoreType map[string]int `json:"storeType,omitempty"`
}

// ClusterVersion represents the version of a Ceph Cluster
type ClusterVersion struct {
Image string `json:"image,omitempty"`
Expand Down Expand Up @@ -2597,6 +2604,11 @@ type OSDStore struct {
// +optional
// +kubebuilder:validation:Enum=bluestore;bluestore-rdr;
Type string `json:"type,omitempty"`
// UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk
// and prepares same OSD on that disk
// +optional
// +kubebuilder:validation:Pattern=`^$|^yes-really-update-store$`
UpdateStore string `json:"updateStore,omitempty"`
}

// Node is a storage nodes
Expand Down
24 changes: 24 additions & 0 deletions pkg/apis/ceph.rook.io/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions pkg/daemon/ceph/cleanup/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (s *DiskSanitizer) StartSanitizeDisks() {
logger.Errorf("failed to list lvm osd(s). %v", err)
} else {
// Start the sanitizing sequence
s.sanitizeLVMDisk(osdLVMList)
s.SanitizeLVMDisk(osdLVMList)
}

// Raw based OSDs
Expand All @@ -72,11 +72,11 @@ func (s *DiskSanitizer) StartSanitizeDisks() {
logger.Errorf("failed to list raw osd(s). %v", err)
} else {
// Start the sanitizing sequence
s.sanitizeRawDisk(osdRawList)
s.SanitizeRawDisk(osdRawList)
}
}

func (s *DiskSanitizer) sanitizeRawDisk(osdRawList []oposd.OSDInfo) {
func (s *DiskSanitizer) SanitizeRawDisk(osdRawList []oposd.OSDInfo) {
// Initialize work group to wait for completion of all the go routine
var wg sync.WaitGroup

Expand All @@ -93,7 +93,7 @@ func (s *DiskSanitizer) sanitizeRawDisk(osdRawList []oposd.OSDInfo) {
wg.Wait()
}

func (s *DiskSanitizer) sanitizeLVMDisk(osdLVMList []oposd.OSDInfo) {
func (s *DiskSanitizer) SanitizeLVMDisk(osdLVMList []oposd.OSDInfo) {
// Initialize work group to wait for completion of all the go routine
var wg sync.WaitGroup
pvs := []string{}
Expand All @@ -112,7 +112,7 @@ func (s *DiskSanitizer) sanitizeLVMDisk(osdLVMList []oposd.OSDInfo) {
wg.Wait()

var wg2 sync.WaitGroup
// // purge remaining LVM2 metadata from PV
// purge remaining LVM2 metadata from PV
for _, pv := range pvs {
wg2.Add(1)
go s.executeSanitizeCommand(oposd.OSDInfo{BlockPath: pv}, &wg2)
Expand Down
15 changes: 14 additions & 1 deletion pkg/daemon/ceph/osd/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package osd
import (
"github.com/rook/rook/pkg/clusterd"
cephclient "github.com/rook/rook/pkg/daemon/ceph/client"
oposd "github.com/rook/rook/pkg/operator/ceph/cluster/osd"
"github.com/rook/rook/pkg/operator/ceph/cluster/osd/config"
"github.com/rook/rook/pkg/operator/k8sutil"
)
Expand All @@ -37,11 +38,13 @@ type OsdAgent struct {
storeConfig config.StoreConfig
kv *k8sutil.ConfigMapKVStore
pvcBacked bool
replaceOSD *oposd.OSDReplaceInfo
}

// NewAgent is the instantiation of the OSD agent
func NewAgent(context *clusterd.Context, devices []DesiredDevice, metadataDevice string, forceFormat bool,
storeConfig config.StoreConfig, clusterInfo *cephclient.ClusterInfo, nodeName string, kv *k8sutil.ConfigMapKVStore, pvcBacked bool) *OsdAgent {
storeConfig config.StoreConfig, clusterInfo *cephclient.ClusterInfo, nodeName string, kv *k8sutil.ConfigMapKVStore,
replaceOSD *oposd.OSDReplaceInfo, pvcBacked bool) *OsdAgent {

return &OsdAgent{
devices: devices,
Expand All @@ -52,6 +55,7 @@ func NewAgent(context *clusterd.Context, devices []DesiredDevice, metadataDevice
nodeName: nodeName,
kv: kv,
pvcBacked: pvcBacked,
replaceOSD: replaceOSD,
}
}

Expand All @@ -64,3 +68,12 @@ func getDeviceLVPath(context *clusterd.Context, deviceName string) string {
logger.Debugf("logical volume path for device %q is %q", deviceName, output)
return output
}

// GetReplaceOSDId returns the OSD ID based on the device name
func (a *OsdAgent) GetReplaceOSDId(device string) int {
if device == a.replaceOSD.Path {
return a.replaceOSD.ID
}

return -1
}
Loading

0 comments on commit ab27474

Please sign in to comment.