diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c index 1ac60119fdcf..79fd8911102d 100644 --- a/module/os/linux/zfs/zfs_ctldir.c +++ b/module/os/linux/zfs/zfs_ctldir.c @@ -117,13 +117,17 @@ static int zfs_snapshot_no_setuid = 0; typedef struct { char *se_name; /* full snapshot name */ char *se_path; /* full mount path */ - spa_t *se_spa; /* pool spa */ + spa_t *se_spa; /* pool spa (NULL if pending) */ uint64_t se_objsetid; /* snapshot objset id */ struct dentry *se_root_dentry; /* snapshot root dentry */ taskqid_t se_taskqid; /* scheduled unmount taskqid */ avl_node_t se_node_name; /* zfs_snapshots_by_name link */ avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ zfs_refcount_t se_refcount; /* reference count */ + kmutex_t se_mtx; /* protects se_mounting and se_cv */ + kcondvar_t se_cv; /* signal mount completion */ + boolean_t se_mounting; /* mount operation in progress */ + int se_mount_error; /* error from failed mount */ } zfs_snapentry_t; static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); @@ -146,6 +150,10 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa, se->se_objsetid = objsetid; se->se_root_dentry = root_dentry; se->se_taskqid = TASKQID_INVALID; + mutex_init(&se->se_mtx, NULL, MUTEX_DEFAULT, NULL); + cv_init(&se->se_cv, NULL, CV_DEFAULT, NULL); + se->se_mounting = B_FALSE; + se->se_mount_error = 0; zfs_refcount_create(&se->se_refcount); @@ -162,6 +170,8 @@ zfsctl_snapshot_free(zfs_snapentry_t *se) zfs_refcount_destroy(&se->se_refcount); kmem_strfree(se->se_name); kmem_strfree(se->se_path); + mutex_destroy(&se->se_mtx); + cv_destroy(&se->se_cv); kmem_free(se, sizeof (zfs_snapentry_t)); } @@ -187,9 +197,9 @@ zfsctl_snapshot_rele(zfs_snapentry_t *se) } /* - * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and - * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part - * of the trees a reference is held. + * Add a zfs_snapentry_t to the zfs_snapshots_by_name tree. If the entry + * is not pending (se_spa != NULL), also add to zfs_snapshots_by_objsetid. + * While the zfs_snapentry_t is part of the trees a reference is held. */ static void zfsctl_snapshot_add(zfs_snapentry_t *se) @@ -197,24 +207,42 @@ zfsctl_snapshot_add(zfs_snapentry_t *se) ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); zfsctl_snapshot_hold(se); avl_add(&zfs_snapshots_by_name, se); - avl_add(&zfs_snapshots_by_objsetid, se); + if (se->se_spa != NULL) + avl_add(&zfs_snapshots_by_objsetid, se); } /* - * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and - * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, - * this can result in the structure being freed if that was the last - * remaining reference. + * Remove a zfs_snapentry_t from the zfs_snapshots_by_name tree and + * zfs_snapshots_by_objsetid tree (if not pending). Upon removal a + * reference is dropped, this can result in the structure being freed + * if that was the last remaining reference. */ static void zfsctl_snapshot_remove(zfs_snapentry_t *se) { ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); avl_remove(&zfs_snapshots_by_name, se); - avl_remove(&zfs_snapshots_by_objsetid, se); + if (se->se_spa != NULL) + avl_remove(&zfs_snapshots_by_objsetid, se); zfsctl_snapshot_rele(se); } +/* + * Fill a pending zfs_snapentry_t after mount succeeds. Fills in the + * remaining fields and adds the entry to the zfs_snapshots_by_objsetid tree. + */ +static void +zfsctl_snapshot_fill(zfs_snapentry_t *se, spa_t *spa, uint64_t objsetid, + struct dentry *root_dentry) +{ + ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); + ASSERT3P(se->se_spa, ==, NULL); + se->se_spa = spa; + se->se_objsetid = objsetid; + se->se_root_dentry = root_dentry; + avl_add(&zfs_snapshots_by_objsetid, se); +} + /* * Snapshot name comparison function for the zfs_snapshots_by_name. */ @@ -312,6 +340,11 @@ zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname) se = zfsctl_snapshot_find_by_name(old_snapname); if (se == NULL) return (SET_ERROR(ENOENT)); + if (se->se_spa == NULL) { + /* Snapshot mount is in progress */ + zfsctl_snapshot_rele(se); + return (SET_ERROR(EBUSY)); + } zfsctl_snapshot_remove(se); kmem_strfree(se->se_name); @@ -430,26 +463,6 @@ zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) return (error); } -/* - * Check if snapname is currently mounted. Returned non-zero when mounted - * and zero when unmounted. - */ -static boolean_t -zfsctl_snapshot_ismounted(const char *snapname) -{ - zfs_snapentry_t *se; - boolean_t ismounted = B_FALSE; - - rw_enter(&zfs_snapshot_lock, RW_READER); - if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { - zfsctl_snapshot_rele(se); - ismounted = B_TRUE; - } - rw_exit(&zfs_snapshot_lock); - - return (ismounted); -} - /* * Check if the given inode is a part of the virtual .zfs directory. */ @@ -1131,6 +1144,14 @@ zfsctl_snapshot_unmount(const char *snapname, int flags) } rw_exit(&zfs_snapshot_lock); + /* + * Wait for any pending auto-mount to complete before unmounting. + */ + mutex_enter(&se->se_mtx); + while (se->se_mounting) + cv_wait(&se->se_cv, &se->se_mtx); + mutex_exit(&se->se_mtx); + exportfs_flush(); if (flags & MNT_FORCE) @@ -1232,14 +1253,35 @@ zfsctl_snapshot_mount(struct path *path, int flags) zfs_snapshot_no_setuid ? "nosuid" : "suid"); /* - * Multiple concurrent automounts of a snapshot are never allowed. - * The snapshot may be manually mounted as many times as desired. + * Check if snapshot is already being mounted. If found, wait for + * pending mount to complete before returning success. */ - if (zfsctl_snapshot_ismounted(full_name)) { - error = 0; + rw_enter(&zfs_snapshot_lock, RW_WRITER); + if ((se = zfsctl_snapshot_find_by_name(full_name)) != NULL) { + rw_exit(&zfs_snapshot_lock); + mutex_enter(&se->se_mtx); + while (se->se_mounting) + cv_wait(&se->se_cv, &se->se_mtx); + + /* + * Return the same error as the first mount attempt (0 if + * succeeded, error code if failed). + */ + error = se->se_mount_error; + mutex_exit(&se->se_mtx); + zfsctl_snapshot_rele(se); goto error; } + /* + * Create pending entry and mark mount in progress. + */ + se = zfsctl_snapshot_alloc(full_name, full_path, NULL, 0, NULL); + se->se_mounting = B_TRUE; + zfsctl_snapshot_add(se); + zfsctl_snapshot_hold(se); + rw_exit(&zfs_snapshot_lock); + /* * Attempt to mount the snapshot from user space. Normally this * would be done using the vfs_kern_mount() function, however that @@ -1258,6 +1300,9 @@ zfsctl_snapshot_mount(struct path *path, int flags) argv[9] = full_path; error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (error) { + /* + * Mount failed - cleanup pending entry and signal waiters. + */ if (!(error & MOUNT_BUSY << 8)) { zfs_dbgmsg("Unable to automount %s error=%d", full_path, error); @@ -1273,6 +1318,16 @@ zfsctl_snapshot_mount(struct path *path, int flags) */ error = 0; } + + rw_enter(&zfs_snapshot_lock, RW_WRITER); + zfsctl_snapshot_remove(se); + rw_exit(&zfs_snapshot_lock); + mutex_enter(&se->se_mtx); + se->se_mount_error = error; + se->se_mounting = B_FALSE; + cv_broadcast(&se->se_cv); + mutex_exit(&se->se_mtx); + zfsctl_snapshot_rele(se); goto error; } @@ -1289,14 +1344,25 @@ zfsctl_snapshot_mount(struct path *path, int flags) spath.mnt->mnt_flags |= MNT_SHRINKABLE; rw_enter(&zfs_snapshot_lock, RW_WRITER); - se = zfsctl_snapshot_alloc(full_name, full_path, - snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), - dentry); - zfsctl_snapshot_add(se); + zfsctl_snapshot_fill(se, snap_zfsvfs->z_os->os_spa, + dmu_objset_id(snap_zfsvfs->z_os), dentry); zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); rw_exit(&zfs_snapshot_lock); + } else { + rw_enter(&zfs_snapshot_lock, RW_WRITER); + zfsctl_snapshot_remove(se); + rw_exit(&zfs_snapshot_lock); } path_put(&spath); + + /* + * Signal mount completion and cleanup. + */ + mutex_enter(&se->se_mtx); + se->se_mounting = B_FALSE; + cv_broadcast(&se->se_cv); + mutex_exit(&se->se_mtx); + zfsctl_snapshot_rele(se); error: kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); kmem_free(full_path, MAXPATHLEN);