Skip to content

Commit

Permalink
Trash RBD image on namespace deletion.
Browse files Browse the repository at this point in the history
Fixes #953

Signed-off-by: Gil Bregman <gbregman@il.ibm.com>
  • Loading branch information
gbregman committed Jan 12, 2025
1 parent 5bc26c0 commit 44b25bf
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 16 deletions.
22 changes: 22 additions & 0 deletions control/cephutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,28 @@ def create_image(self, pool_name, image_name, size) -> bool:

return True

def delete_image(self, pool_name, image_name) -> bool:
if not pool_name and not image_name:
return True

if not self.pool_exists(pool_name):
self.logger.warning(f"Pool {pool_name} doesn't exist, can't delete RBD image")
return True

with rados.Rados(conffile=self.ceph_conf, rados_id=self.rados_id) as cluster:
with cluster.open_ioctx(pool_name) as ioctx:
rbd_inst = rbd.RBD()
try:
rbd_inst.remove(ioctx, image_name)
except rbd.ImageNotFound:
self.logger.warning(f"Image {pool_name}/{image_name} is not found")
return True
except (rbd.ImageBusy, rbd.ImageHasSnapshots):
self.logger.exception(f"Can't delete image {pool_name}/{image_name}")
return False

return True

def get_image_size(self, pool_name, image_name) -> int:
image_size = 0
if not self.pool_exists(pool_name):
Expand Down
17 changes: 15 additions & 2 deletions control/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,6 +1726,10 @@ def ns_add(self, args):
self.cli.parser.error("--size argument is not allowed for add command when "
"RBD image creation is disabled")

if args.rbd_trash_image_on_delete and not args.rbd_create_image:
self.cli.parser.error("Can't trash associated RBD image on delete if it wasn't "
"created automatically by the gateway")

req = pb2.namespace_add_req(rbd_pool_name=args.rbd_pool,
rbd_image_name=args.rbd_image,
subsystem_nqn=args.subsystem,
Expand All @@ -1736,7 +1740,8 @@ def ns_add(self, args):
create_image=args.rbd_create_image,
size=img_size,
force=args.force,
no_auto_visible=args.no_auto_visible)
no_auto_visible=args.no_auto_visible,
trash_image=args.rbd_trash_image_on_delete)
try:
ret = self.stub.namespace_add(req)
except Exception as ex:
Expand Down Expand Up @@ -1776,7 +1781,7 @@ def ns_del(self, args):

try:
ret = self.stub.namespace_delete(pb2.namespace_delete_req(
subsystem_nqn=args.subsystem, nsid=args.nsid))
subsystem_nqn=args.subsystem, nsid=args.nsid, are_you_sure=args.are_you_sure))
except Exception as ex:
ret = pb2.req_status(status=errno.EINVAL,
error_message=f"Failure deleting namespace:\n{ex}")
Expand Down Expand Up @@ -2454,12 +2459,20 @@ def ns_change_visibility(self, args):
help="Make the namespace visible only to specific hosts",
action='store_true',
required=False),
argument("--rbd-trash-image-on-delete",
help="When deleting the namespace, trash associated RBD image. "
"Only applies to images created automatically by the gateway",
action='store_true',
required=False),
]
ns_del_args_list = ns_common_args + [
argument("--nsid",
help="Namespace ID",
type=int,
required=True),
argument("--are-you-sure",
help="If you choose to delete the associated RBD image, set this to \"yes\"",
required=False),
]
ns_resize_args_list = ns_common_args + [
argument("--nsid",
Expand Down
117 changes: 103 additions & 14 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@


class BdevStatus:
def __init__(self, status, error_message, bdev_name=""):
def __init__(self, status, error_message, bdev_name="", rbd_pool=None, rbd_image_name=None):
self.status = status
self.error_message = error_message
self.bdev_name = bdev_name
self.rbd_pool = rbd_pool
self.rbd_image_name = rbd_image_name


class MonitorGroupService(monitor_pb2_grpc.MonitorGroupServicer):
Expand Down Expand Up @@ -325,17 +327,20 @@ def get_subsystem_dhchap_key(self, subsys) -> str:


class NamespaceInfo:
def __init__(self, nsid, bdev, uuid, anagrpid, auto_visible):
def __init__(self, nsid, bdev, uuid, anagrpid, auto_visible, pool, image):
self.nsid = nsid
self.bdev = bdev
self.uuid = uuid
self.auto_visible = auto_visible
self.anagrpid = anagrpid
self.host_list = []
self.pool = pool
self.image = image

def __str__(self):
return f"nsid: {self.nsid}, bdev: {self.bdev}, uuid: {self.uuid}, " \
f"auto_visible: {self.auto_visible}, anagrpid: {self.anagrpid}, " \
f"pool: {self.pool}, image: {self.image}, " \
f"hosts: {self.host_list}"

def empty(self) -> bool:
Expand Down Expand Up @@ -370,7 +375,7 @@ def set_ana_group_id(self, anagrpid):


class NamespacesLocalList:
EMPTY_NAMESPACE = NamespaceInfo(None, None, None, 0, False)
EMPTY_NAMESPACE = NamespaceInfo(None, None, None, 0, False, None, None)

def __init__(self):
self.namespace_list = defaultdict(dict)
Expand All @@ -385,10 +390,11 @@ def remove_namespace(self, nqn, nsid=None):
else:
self.namespace_list.pop(nqn, None)

def add_namespace(self, nqn, nsid, bdev, uuid, anagrpid, auto_visible):
def add_namespace(self, nqn, nsid, bdev, uuid, anagrpid, auto_visible, pool, image):
if not bdev:
bdev = GatewayService.find_unique_bdev_name(uuid)
self.namespace_list[nqn][nsid] = NamespaceInfo(nsid, bdev, uuid, anagrpid, auto_visible)
self.namespace_list[nqn][nsid] = NamespaceInfo(nsid, bdev, uuid, anagrpid,
auto_visible, pool, image)

def find_namespace(self, nqn, nsid, uuid=None) -> NamespaceInfo:
if nqn not in self.namespace_list:
Expand Down Expand Up @@ -895,25 +901,41 @@ def execute_grpc_function(self, func, request, context):
self._grpc_function_with_lock, func, request, context)

def create_bdev(self, anagrp: int, name, uuid, rbd_pool_name, rbd_image_name,
block_size, create_image, rbd_image_size, context, peer_msg=""):
block_size, create_image, trash_image, rbd_image_size, context, peer_msg=""):
"""Creates a bdev from an RBD image."""

if create_image:
cr_img_msg = "will create image if doesn't exist"
else:
cr_img_msg = "will not create image if doesn't exist"

trsh_msg = ""
if trash_image:
trsh_msg = "will trash the image on namespace delete, "

self.logger.info(f"Received request to create bdev {name} from"
f" {rbd_pool_name}/{rbd_image_name} (size {rbd_image_size} bytes)"
f" with block size {block_size}, {cr_img_msg}, "
f" with block size {block_size}, {cr_img_msg}, {trsh_msg}"
f"context={context}{peer_msg}")

if block_size == 0:
return BdevStatus(status=errno.EINVAL,
error_message=f"Failure creating bdev {name}: block size "
f"can't be zero")

created_rbd_pool = None
created_rbd_image_name = None
rbd_pool_to_save = None
rbd_image_name_to_save = None
if create_image:
if not rbd_pool_name:
return BdevStatus(status=errno.ENODEV,
error_message=f"Failure creating bdev {name}: empty RBD"
f"pool name")
if not rbd_image_name:
return BdevStatus(status=errno.ENODEV,
error_message=f"Failure creating bdev {name}: empty RBD"
f"image name")
if rbd_image_size <= 0:
return BdevStatus(status=errno.EINVAL,
error_message=f"Failure creating bdev {name}: image size "
Expand All @@ -933,6 +955,11 @@ def create_bdev(self, anagrp: int, name, uuid, rbd_pool_name, rbd_image_name,
if rc:
self.logger.info(f"Image {rbd_pool_name}/{rbd_image_name} created, size "
f"is {rbd_image_size} bytes")
created_rbd_pool = rbd_pool_name
created_rbd_image_name = rbd_image_name
if trash_image:
rbd_pool_to_save = rbd_pool_name
rbd_image_name_to_save = rbd_image_name
else:
self.logger.info(f"Image {rbd_pool_name}/{rbd_image_name} already exists "
f"with size {rbd_image_size} bytes")
Expand Down Expand Up @@ -980,18 +1007,26 @@ def create_bdev(self, anagrp: int, name, uuid, rbd_pool_name, rbd_image_name,
if resp:
status = resp["code"]
errmsg = f"Failure creating bdev {name}: {resp['message']}"
self.delete_rbd_image(created_rbd_pool, created_rbd_image_name)
return BdevStatus(status=status, error_message=errmsg)

# Just in case SPDK failed with no exception
if not bdev_name:
errmsg = f"Can't create bdev {name}"
self.logger.error(errmsg)
self.delete_rbd_image(created_rbd_pool, created_rbd_image_name)
return BdevStatus(status=errno.ENODEV, error_message=errmsg)

assert name == bdev_name, f"Created bdev name {bdev_name} differs " \
f"from requested name {name}"

return BdevStatus(status=0, error_message=os.strerror(0), bdev_name=name)
# on update we don't create the image, but we still want to save its name
if not context and trash_image:
rbd_pool_to_save = rbd_pool_name
rbd_image_name_to_save = rbd_image_name

return BdevStatus(status=0, error_message=os.strerror(0), bdev_name=name,
rbd_pool=rbd_pool_to_save, rbd_image_name=rbd_image_name_to_save)

def resize_bdev(self, bdev_name, new_size, peer_msg=""):
"""Resizes a bdev."""
Expand Down Expand Up @@ -1495,12 +1530,15 @@ def check_if_image_used(self, pool_name, image_name):
return errmsg, nqn

def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid,
auto_visible, context):
auto_visible, rbd_pool, rbd_image_name, context):
"""Adds a namespace to a subsystem."""

if context:
assert self.omap_lock.locked(), "OMAP is unlocked when calling create_namespace()"

assert (rbd_pool and rbd_image_name) or ((not rbd_pool) and (not rbd_image_name)), \
"RBD pool and image name should either be both set or both empty"

nsid_msg = ""
if nsid:
nsid_msg = f" using ID {nsid}"
Expand All @@ -1513,9 +1551,12 @@ def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid,
add_namespace_error_prefix = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}"

peer_msg = self.get_peer_message(context)
rbd_msg = ""
if rbd_pool and rbd_image_name:
rbd_msg = f"RBD image {rbd_pool}/{rbd_image_name}, "
self.logger.info(f"Received request to add {bdev_name} to {subsystem_nqn} with load "
f"balancing group id {anagrpid}{nsid_msg}, auto_visible: {auto_visible}, "
f"context: {context}{peer_msg}")
f"{rbd_msg}context: {context}{peer_msg}")

if subsystem_nqn not in self.subsys_max_ns:
errmsg = f"{add_namespace_error_prefix}: No such subsystem"
Expand Down Expand Up @@ -1585,7 +1626,8 @@ def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid,
)
self.subsystem_nsid_bdev_and_uuid.add_namespace(subsystem_nqn, nsid,
bdev_name, uuid,
anagrpid, auto_visible)
anagrpid, auto_visible,
rbd_pool, rbd_image_name)
self.logger.debug(f"subsystem_add_ns: {nsid}")
self.ana_grp_ns_load[anagrpid] += 1
if anagrpid in self.ana_grp_subs_load:
Expand Down Expand Up @@ -1753,6 +1795,11 @@ def namespace_add_safe(self, request, context):
if not request.uuid:
request.uuid = str(uuid.uuid4())

if request.trash_image and not request.create_image:
self.logger.warning = "Can't trash the RBD image on delete if it " \
"wasn't created by the gateway, will reset the flag"
request.trash_image = False

if context:
if request.anagrpid != 0:
grps_list = self.ceph_utils.get_number_created_gateways(self.gateway_pool,
Expand Down Expand Up @@ -1813,7 +1860,7 @@ def namespace_add_safe(self, request, context):
anagrp = request.anagrpid
ret_bdev = self.create_bdev(anagrp, bdev_name, request.uuid, request.rbd_pool_name,
request.rbd_image_name, request.block_size, create_image,
request.size, context, peer_msg)
request.trash_image, request.size, context, peer_msg)
if ret_bdev.status != 0:
errmsg = f"Failure adding namespace {nsid_msg}to {request.subsystem_nqn}: " \
f"{ret_bdev.error_message}"
Expand All @@ -1838,7 +1885,9 @@ def namespace_add_safe(self, request, context):

ret_ns = self.create_namespace(request.subsystem_nqn, bdev_name,
request.nsid, anagrp, request.uuid,
not request.no_auto_visible, context)
not request.no_auto_visible,
ret_bdev.rbd_pool, ret_bdev.rbd_image_name,
context)
if ret_ns.status == 0 and request.nsid and ret_ns.nsid != request.nsid:
errmsg = f"Returned ID {ret_ns.nsid} differs from requested one {request.nsid}"
self.logger.error(errmsg)
Expand All @@ -1859,6 +1908,7 @@ def namespace_add_safe(self, request, context):
errmsg = f"Failure adding namespace {nsid_msg}to {request.subsystem_nqn}: " \
f"{ret_ns.error_message}"
self.logger.error(errmsg)
self.delete_rbd_image(ret_bdev.rbd_pool, ret_bdev.rbd_image_name)
return pb2.nsid_status(status=ret_ns.status, error_message=errmsg)

if context:
Expand All @@ -1873,6 +1923,11 @@ def namespace_add_safe(self, request, context):
errmsg = f"Error persisting namespace {nsid_msg}on {request.subsystem_nqn}"
self.logger.exception(errmsg)
errmsg = f"{errmsg}:\n{ex}"
try:
ret_del = self.delete_bdev(bdev_name, peer_msg=peer_msg)
except Exception:
pass
self.delete_rbd_image(ret_bdev.rbd_pool, ret_bdev.rbd_image_name)
return pb2.req_status(status=errno.EINVAL, error_message=errmsg)

return pb2.nsid_status(status=0, error_message=os.strerror(0), nsid=ret_ns.nsid)
Expand Down Expand Up @@ -2691,6 +2746,20 @@ def namespace_resize(self, request, context=None):
"""Resize a namespace."""
return self.execute_grpc_function(self.namespace_resize_safe, request, context)

def delete_rbd_image(self, pool, image):
if (not pool) and (not image):
return

if (not pool) or (not image):
self.logger.warning("RBD pool and image name should be both set or unset, "
"will not delete RBD image")
return

if self.ceph_utils.delete_image(pool, image):
self.logger.info(f"Deleted RBD image {pool}/{image}")
else:
self.logger.warning(f"Failed to delete RBD image {pool}/{image}")

def namespace_delete_safe(self, request, context):
"""Delete a namespace."""

Expand All @@ -2706,18 +2775,36 @@ def namespace_delete_safe(self, request, context):

peer_msg = self.get_peer_message(context)
self.logger.info(f"Received request to delete namespace {request.nsid} from "
f"{request.subsystem_nqn}, context: {context}{peer_msg}")
f"{request.subsystem_nqn}, "
f"context: {context}{peer_msg}")

find_ret = self.subsystem_nsid_bdev_and_uuid.find_namespace(request.subsystem_nqn,
request.nsid)
if find_ret.empty():
errmsg = f"Failure deleting namespace {request.nsid}: Can't find namespace"
self.logger.error(errmsg)
return pb2.req_status(status=errno.ENODEV, error_message=errmsg)

if find_ret.pool and find_ret.image:
if not request.are_you_sure or request.are_you_sure.lower() != "yes":
errmsg = f"Failure deleting namespace {request.nsid} from " \
f"{request.subsystem_nqn}: Confirmation for trashing " \
f"RBD image is needed"
self.logger.error(errmsg)
return pb2.req_status(status=errno.EINVAL, error_message=errmsg)

bdev_name = find_ret.bdev
if not bdev_name:
self.logger.warning("Can't find namespace's bdev name, will try to "
"delete namespace anyway")
rbd_pool = find_ret.pool
rbd_image_name = find_ret.image

if (rbd_pool and (not rbd_image_name)) or ((not rbd_pool) and rbd_image_name):
self.logger.warning("RBD pool and image name should be both set or unset, "
"will not delete RBD image")
rbd_pool = None
rbd_image_name = None

omap_lock = self.omap_lock.get_omap_lock_to_use(context)
with omap_lock:
Expand All @@ -2733,7 +2820,9 @@ def namespace_delete_safe(self, request, context):
errmsg = f"Failure deleting namespace {request.nsid} from " \
f"{request.subsystem_nqn}: {ret_del.error_message}"
self.logger.error(errmsg)
self.delete_rbd_image(rbd_pool, rbd_image_name)
return pb2.nsid_status(status=ret_del.status, error_message=errmsg)
self.delete_rbd_image(rbd_pool, rbd_image_name)

return pb2.req_status(status=0, error_message=os.strerror(0))

Expand Down
Loading

0 comments on commit 44b25bf

Please sign in to comment.