Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 104 additions & 38 deletions module/os/linux/zfs/zfs_ctldir.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,17 @@ static int zfs_snapshot_no_setuid = 0;
typedef struct {
char *se_name; /* full snapshot name */
char *se_path; /* full mount path */
spa_t *se_spa; /* pool spa */
spa_t *se_spa; /* pool spa (NULL if pending) */
uint64_t se_objsetid; /* snapshot objset id */
struct dentry *se_root_dentry; /* snapshot root dentry */
taskqid_t se_taskqid; /* scheduled unmount taskqid */
avl_node_t se_node_name; /* zfs_snapshots_by_name link */
avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */
zfs_refcount_t se_refcount; /* reference count */
kmutex_t se_mtx; /* protects se_mounting and se_cv */
kcondvar_t se_cv; /* signal mount completion */
boolean_t se_mounting; /* mount operation in progress */
int se_mount_error; /* error from failed mount */
} zfs_snapentry_t;

static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
Expand All @@ -146,6 +150,10 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
se->se_objsetid = objsetid;
se->se_root_dentry = root_dentry;
se->se_taskqid = TASKQID_INVALID;
mutex_init(&se->se_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&se->se_cv, NULL, CV_DEFAULT, NULL);
se->se_mounting = B_FALSE;
se->se_mount_error = 0;

zfs_refcount_create(&se->se_refcount);

Expand All @@ -162,6 +170,8 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)
zfs_refcount_destroy(&se->se_refcount);
kmem_strfree(se->se_name);
kmem_strfree(se->se_path);
mutex_destroy(&se->se_mtx);
cv_destroy(&se->se_cv);

kmem_free(se, sizeof (zfs_snapentry_t));
}
Expand All @@ -187,34 +197,52 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se)
}

/*
* Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
* zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part
* of the trees a reference is held.
* Add a zfs_snapentry_t to the zfs_snapshots_by_name tree. If the entry
* is not pending (se_spa != NULL), also add to zfs_snapshots_by_objsetid.
* While the zfs_snapentry_t is part of the trees a reference is held.
*/
static void
zfsctl_snapshot_add(zfs_snapentry_t *se)
{
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
zfsctl_snapshot_hold(se);
avl_add(&zfs_snapshots_by_name, se);
avl_add(&zfs_snapshots_by_objsetid, se);
if (se->se_spa != NULL)
avl_add(&zfs_snapshots_by_objsetid, se);
}

/*
* Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
* zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped,
* this can result in the structure being freed if that was the last
* remaining reference.
* Remove a zfs_snapentry_t from the zfs_snapshots_by_name tree and
* zfs_snapshots_by_objsetid tree (if not pending). Upon removal a
* reference is dropped, this can result in the structure being freed
* if that was the last remaining reference.
*/
static void
zfsctl_snapshot_remove(zfs_snapentry_t *se)
{
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
avl_remove(&zfs_snapshots_by_name, se);
avl_remove(&zfs_snapshots_by_objsetid, se);
if (se->se_spa != NULL)
avl_remove(&zfs_snapshots_by_objsetid, se);
zfsctl_snapshot_rele(se);
}

/*
* Fill a pending zfs_snapentry_t after mount succeeds. Fills in the
* remaining fields and adds the entry to the zfs_snapshots_by_objsetid tree.
*/
static void
zfsctl_snapshot_fill(zfs_snapentry_t *se, spa_t *spa, uint64_t objsetid,
struct dentry *root_dentry)
{
ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
ASSERT3P(se->se_spa, ==, NULL);
se->se_spa = spa;
se->se_objsetid = objsetid;
se->se_root_dentry = root_dentry;
avl_add(&zfs_snapshots_by_objsetid, se);
}

/*
* Snapshot name comparison function for the zfs_snapshots_by_name.
*/
Expand Down Expand Up @@ -312,6 +340,11 @@ zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
se = zfsctl_snapshot_find_by_name(old_snapname);
if (se == NULL)
return (SET_ERROR(ENOENT));
if (se->se_spa == NULL) {
/* Snapshot mount is in progress */
zfsctl_snapshot_rele(se);
return (SET_ERROR(EBUSY));
}

zfsctl_snapshot_remove(se);
kmem_strfree(se->se_name);
Expand Down Expand Up @@ -430,26 +463,6 @@ zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
return (error);
}

/*
* Check if snapname is currently mounted. Returned non-zero when mounted
* and zero when unmounted.
*/
static boolean_t
zfsctl_snapshot_ismounted(const char *snapname)
{
zfs_snapentry_t *se;
boolean_t ismounted = B_FALSE;

rw_enter(&zfs_snapshot_lock, RW_READER);
if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
zfsctl_snapshot_rele(se);
ismounted = B_TRUE;
}
rw_exit(&zfs_snapshot_lock);

return (ismounted);
}

/*
* Check if the given inode is a part of the virtual .zfs directory.
*/
Expand Down Expand Up @@ -1131,6 +1144,14 @@ zfsctl_snapshot_unmount(const char *snapname, int flags)
}
rw_exit(&zfs_snapshot_lock);

/*
* Wait for any pending auto-mount to complete before unmounting.
*/
mutex_enter(&se->se_mtx);
while (se->se_mounting)
cv_wait(&se->se_cv, &se->se_mtx);
mutex_exit(&se->se_mtx);

exportfs_flush();

if (flags & MNT_FORCE)
Expand Down Expand Up @@ -1232,14 +1253,35 @@ zfsctl_snapshot_mount(struct path *path, int flags)
zfs_snapshot_no_setuid ? "nosuid" : "suid");

/*
* Multiple concurrent automounts of a snapshot are never allowed.
* The snapshot may be manually mounted as many times as desired.
* Check if snapshot is already being mounted. If found, wait for
* pending mount to complete before returning success.
*/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this handle the case of the pending mount failing

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Auto-mounting of snapshots happens transparently on first access (e.g., ls), so I believe it makes sense for parallel processes racing for first access to get the same result as the first mount attempt. Updated the PR to store the mount error in se_mount_error and return it to all waiting processes for consistency.

if (zfsctl_snapshot_ismounted(full_name)) {
error = 0;
rw_enter(&zfs_snapshot_lock, RW_WRITER);
if ((se = zfsctl_snapshot_find_by_name(full_name)) != NULL) {
rw_exit(&zfs_snapshot_lock);
mutex_enter(&se->se_mtx);
while (se->se_mounting)
cv_wait(&se->se_cv, &se->se_mtx);

/*
* Return the same error as the first mount attempt (0 if
* succeeded, error code if failed).
*/
error = se->se_mount_error;
mutex_exit(&se->se_mtx);
zfsctl_snapshot_rele(se);
goto error;
}

/*
* Create pending entry and mark mount in progress.
*/
se = zfsctl_snapshot_alloc(full_name, full_path, NULL, 0, NULL);
se->se_mounting = B_TRUE;
zfsctl_snapshot_add(se);
zfsctl_snapshot_hold(se);
rw_exit(&zfs_snapshot_lock);

/*
* Attempt to mount the snapshot from user space. Normally this
* would be done using the vfs_kern_mount() function, however that
Expand All @@ -1258,6 +1300,9 @@ zfsctl_snapshot_mount(struct path *path, int flags)
argv[9] = full_path;
error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
if (error) {
/*
* Mount failed - cleanup pending entry and signal waiters.
*/
if (!(error & MOUNT_BUSY << 8)) {
zfs_dbgmsg("Unable to automount %s error=%d",
full_path, error);
Expand All @@ -1273,6 +1318,16 @@ zfsctl_snapshot_mount(struct path *path, int flags)
*/
error = 0;
}

rw_enter(&zfs_snapshot_lock, RW_WRITER);
zfsctl_snapshot_remove(se);
rw_exit(&zfs_snapshot_lock);
mutex_enter(&se->se_mtx);
se->se_mount_error = error;
se->se_mounting = B_FALSE;
cv_broadcast(&se->se_cv);
mutex_exit(&se->se_mtx);
zfsctl_snapshot_rele(se);
goto error;
}

Expand All @@ -1289,14 +1344,25 @@ zfsctl_snapshot_mount(struct path *path, int flags)
spath.mnt->mnt_flags |= MNT_SHRINKABLE;

rw_enter(&zfs_snapshot_lock, RW_WRITER);
se = zfsctl_snapshot_alloc(full_name, full_path,
snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
dentry);
zfsctl_snapshot_add(se);
zfsctl_snapshot_fill(se, snap_zfsvfs->z_os->os_spa,
dmu_objset_id(snap_zfsvfs->z_os), dentry);
zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
rw_exit(&zfs_snapshot_lock);
} else {
rw_enter(&zfs_snapshot_lock, RW_WRITER);
zfsctl_snapshot_remove(se);
rw_exit(&zfs_snapshot_lock);
}
path_put(&spath);

/*
* Signal mount completion and cleanup.
*/
mutex_enter(&se->se_mtx);
se->se_mounting = B_FALSE;
cv_broadcast(&se->se_cv);
mutex_exit(&se->se_mtx);
zfsctl_snapshot_rele(se);
error:
kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(full_path, MAXPATHLEN);
Expand Down
Loading