diff --git a/kmod/file.c b/kmod/file.c index 6e9496c0..2f0f3dc6 100644 --- a/kmod/file.c +++ b/kmod/file.c @@ -899,23 +899,14 @@ static ssize_t file_write_iter(struct kiocb* iocb, struct iov_iter* from) { return res; } -int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry) { - inode_lock(&enode->inode); +// shared functionality of ternfs_flush and ternfs_link +// takes the file we're trying to flush/link, the directory we're going to put it in, and a name/len pair to assign +// note: caller must take care of dget_parent/dput +static int flush_and_link(struct ternfs_inode *enode, struct dentry *parent, const char *name, size_t name_len) { + BUG_ON(!inode_is_locked(&enode->inode)); int err = 0; - // Not writing, there's nothing to do, there's nothing to do, files are immutable - if (enode->file.status != TERNFS_FILE_STATUS_WRITING) { - ternfs_debug("status=%d, won't flush", enode->file.status); - goto out_early; - } - - // We are in another process, skip - if (enode->file.owner != current->group_leader) { - ternfs_debug("owner=%p != group_leader=%p, won't flush", enode->file.owner, current->group_leader); - goto out_early; - } - bool file_is_alive_and_flushing = false; // if we've errored out already, just exit @@ -948,7 +939,7 @@ int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry) { ternfs_debug("linking file"); err = ternfs_error_to_linux(ternfs_shard_link_file( (struct ternfs_fs_info*)enode->inode.i_sb->s_fs_info, enode->inode.i_ino, - enode->file.cookie, dentry->d_parent->d_inode->i_ino, dentry->d_name.name, dentry->d_name.len, + enode->file.cookie, parent->d_inode->i_ino, name, name_len, &enode->edge_creation_time )); if (err < 0) { goto out; } @@ -963,11 +954,7 @@ int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry) { // expire the directory listing -- we know for a fact that it // is wrong, it now contains this file. - { - struct dentry* parent = dget_parent(dentry); - WRITE_ONCE(TERNFS_I(d_inode(parent))->dir.mtime_expiry, 0); - dput(parent); - } + WRITE_ONCE(TERNFS_I(d_inode(parent))->dir.mtime_expiry, 0); out: if (err) { @@ -999,17 +986,89 @@ int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry) { mmdrop(enode->file.mm); } enode->file.mm = NULL; - inode_unlock(&enode->inode); + return err; +} + +int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry) { + int err = 0; + inode_lock(&enode->inode); + + // Not writing, there's nothing to do, there's nothing to do, files are immutable + if (enode->file.status != TERNFS_FILE_STATUS_WRITING) { + ternfs_debug("status=%d, won't flush", enode->file.status); + goto out; + } + + // We are in another process, skip + if (enode->file.owner != current->group_leader) { + ternfs_debug("owner=%p != group_leader=%p, won't flush", enode->file.owner, current->group_leader); + goto out; + } -out_early: + + struct dentry *parent = dget_parent(dentry); + + err = flush_and_link(enode, parent, dentry->d_name.name, dentry->d_name.len); + + if (parent) + dput(parent); + +out: inode_unlock(&enode->inode); return err; } +int ternfs_link(struct dentry* old_dentry, struct inode* dir, struct dentry* new_dentry) { + struct inode* inode = d_inode(old_dentry); + struct ternfs_inode* enode = TERNFS_I(inode); + + // should be done by vfs_link + BUG_ON(!inode_is_locked(inode)); + + int err = 0; + + struct dentry* parent = dget_parent(old_dentry); + + // TODO: there are probably cases in which this could be allowed (e.g. cross directory things that happen to be in the same shard) + if (!parent || parent->d_inode != dir) { + ternfs_debug("tried to link a file in a different directory than the one it was opened in"); + err = -EXDEV; + goto out; + } + + // linking existing files is not allowed + // TODO: check i_nlink once we actually start reporting link counts + if (enode->file.status != TERNFS_FILE_STATUS_WRITING) { + ternfs_debug("status=%d, won't link", enode->file.status); + err = -EINVAL; + goto out; + } + + // this is not an error in normal flush (because other processes could close the fd) but linking would be weird + if (enode->file.owner != current->group_leader) { + ternfs_debug("owner=%p != group_leader=%p, won't link", enode->file.owner, current->group_leader); + err = -EPERM; + goto out; + } + + err = flush_and_link(enode, parent, new_dentry->d_name.name, new_dentry->d_name.len); + +out: + if (parent) + dput(parent); + + return err; +} + static int file_flush_internal(struct file* filp, fl_owner_t id) { // can we get write while this is in progress? struct ternfs_inode* enode = TERNFS_I(filp->f_inode); struct dentry* dentry = filp->f_path.dentry; + + // ternfs_file_flush also links, but tmpfiles are only linked when linkat is called + if (unlikely(filp->f_flags & __O_TMPFILE)) + return 0; + return ternfs_file_flush(enode, dentry); } diff --git a/kmod/file.h b/kmod/file.h index 49fbae82..fcc49570 100644 --- a/kmod/file.h +++ b/kmod/file.h @@ -16,6 +16,7 @@ extern int ternfs_file_getattr_refresh_time_jiffies; // this is only relevant fo ssize_t ternfs_file_write(struct ternfs_inode* enode, int flags, loff_t* ppos, struct iov_iter* from); int ternfs_file_flush(struct ternfs_inode* enode, struct dentry* dentry); +int ternfs_link(struct dentry* old_dentry, struct inode* dir, struct dentry* new_dentry); // Also used in ternfs_do_ftruncate to fill the end of the file. ssize_t ternfs_file_write_internal(struct ternfs_inode* enode, int flags, loff_t* ppos, struct iov_iter* from, size_t count); diff --git a/kmod/inode.c b/kmod/inode.c index 7fd90ec3..6c6c47e4 100644 --- a/kmod/inode.c +++ b/kmod/inode.c @@ -607,6 +607,30 @@ static int COMPAT_FUNC_UNS_IMP(ternfs_symlink, struct inode* dir, struct dentry* return 0; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,6,0) +static int COMPAT_FUNC_UNS_IMP(ternfs_tmpfile, struct inode* dir, struct file* file, umode_t mode) { + struct dentry* dentry = file->f_path.dentry; // dentry with a "fake" name +#else +static int COMPAT_FUNC_UNS_IMP(ternfs_tmpfile, struct inode* dir, struct dentry* dentry, umode_t mode) { +#endif + ternfs_debug("ternfs_tempfile: name: %s", dentry->d_name.name); + struct ternfs_inode* enode = ternfs_create_internal(dir, TERNFS_INODE_FILE, dentry); + if (IS_ERR(enode)) { return PTR_ERR(enode); } + + // the file is created in writing status by ternfs_create_internal + // and it remains unlinked until linkat() is called + // once link is called we can then assign a name + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,6,0) + // in 6.6+, finish_open_simple handles d_instantiate + return finish_open_simple(file, 0); +#else + // for older kernels, we need to instantiate as unhashed + d_tmpfile(dentry, &enode->inode); + return 0; +#endif +} + static const char* ternfs_get_link(struct dentry* dentry, struct inode* inode, struct delayed_call* destructor) { // Can't be bothered to think about RCU if (dentry == NULL) { return ERR_PTR(-ECHILD); } @@ -625,12 +649,14 @@ static const char* ternfs_get_link(struct dentry* dentry, struct inode* inode, s static const struct inode_operations ternfs_dir_inode_ops = { .create = ternfs_create, .lookup = ternfs_lookup, + .link = ternfs_link, .unlink = ternfs_unlink, .mkdir = ternfs_mkdir, .rmdir = ternfs_rmdir, .rename = ternfs_rename, .getattr = ternfs_getattr, .symlink = ternfs_symlink, + .tmpfile = ternfs_tmpfile, }; static const struct inode_operations ternfs_file_inode_ops = {