Index: linux-2.6.27.21-0.1/fs/ext4/iopen.c =================================================================== --- /dev/null +++ linux-2.6.27.21-0.1/fs/ext4/iopen.c @@ -0,0 +1,295 @@ +/* + * linux/fs/ext4/iopen.c + * + * Special support for open by inode number + * + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). + * + * This file may be redistributed under the terms of the GNU General + * Public License. + * + * + * Invariants: + * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias + * for an inode at one time. + * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry + * aliases on an inode at the same time. + * + * If we have any connected dentry aliases for an inode, use one of those + * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED + * dentry for this inode, which thereafter will be found by the dcache + * when looking up this inode number in __iopen__, so we don't return here + * until it is gone. + * + * If we get an inode via a regular name lookup, then we "rename" the + * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures + * existing users of the disconnected dentry will continue to use the same + * dentry as the connected users, and there will never be both kinds of + * dentry aliases at one time. + */ + +#include +#include +#include +#include +#include +#include "iopen.h" +#include "ext4.h" +#include "ext4_jbd2.h" + +#ifndef assert +#define assert(test) J_ASSERT(test) +#endif + +#define IOPEN_NAME_LEN 32 + +/* + * This implements looking up an inode by number. + */ +static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode; + unsigned long ino; + struct list_head *lp; + struct dentry *alternate; + char buf[IOPEN_NAME_LEN]; + + if (dentry->d_name.len >= IOPEN_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + memcpy(buf, dentry->d_name.name, dentry->d_name.len); + buf[dentry->d_name.len] = 0; + + if (strcmp(buf, ".") == 0) + ino = dir->i_ino; + else if (strcmp(buf, "..") == 0) + ino = EXT4_ROOT_INO; + else + ino = simple_strtoul(buf, 0, 0); + + if ((ino != EXT4_ROOT_INO && + ino < EXT4_FIRST_INO(dir->i_sb)) || + ino > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) + return ERR_PTR(-ENOENT); + + inode = ext4_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { + /* Newer kernels return -ESTALE for inodes that are not in use, + * but older kernels return a negative dentry. This can only + * happen when doing a lookup in the __iopen__ dir, because the + * "entry" will always be found even if inode is unallocated. + * Handle this here instead of fixing the callers. b=19114 */ + if (PTR_ERR(inode) == -ESTALE) + return (ERR_PTR(-ENOENT)); + return ERR_CAST(inode); + } + + assert(list_empty(&dentry->d_alias)); /* d_instantiate */ + assert(d_unhashed(dentry)); /* d_rehash */ + + /* preferrably return a connected dentry */ + spin_lock(&dcache_lock); + list_for_each(lp, &inode->i_dentry) { + alternate = list_entry(lp, struct dentry, d_alias); + assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); + } + + if (!list_empty(&inode->i_dentry)) { + alternate = list_entry(inode->i_dentry.next, + struct dentry, d_alias); + dget_locked(alternate); + spin_lock(&alternate->d_lock); + alternate->d_flags |= DCACHE_REFERENCED; + spin_unlock(&alternate->d_lock); + iput(inode); + spin_unlock(&dcache_lock); + return alternate; + } + dentry->d_flags |= DCACHE_DISCONNECTED; + + /* d_add(), but don't drop dcache_lock before adding dentry to inode */ + list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ + dentry->d_inode = inode; + + d_rehash_cond(dentry, 0); + spin_unlock(&dcache_lock); + + return NULL; +} + +/* This function is spliced into ext4_lookup and does the move of a + * disconnected dentry (if it exists) to a connected dentry. + */ +struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, + int rehash) +{ + struct dentry *tmp, *goal = NULL; + struct list_head *lp; + + /* verify this dentry is really new */ + assert(dentry->d_inode == NULL); + assert(list_empty(&dentry->d_alias)); /* d_instantiate */ + if (rehash) + assert(d_unhashed(dentry)); /* d_rehash */ + assert(list_empty(&dentry->d_subdirs)); + + spin_lock(&dcache_lock); + if (!inode) + goto do_rehash; + + if (!test_opt(inode->i_sb, IOPEN)) + goto do_instantiate; + + /* preferrably return a connected dentry */ + list_for_each(lp, &inode->i_dentry) { + tmp = list_entry(lp, struct dentry, d_alias); + if (tmp->d_flags & DCACHE_DISCONNECTED) { + assert(tmp->d_alias.next == &inode->i_dentry); + assert(tmp->d_alias.prev == &inode->i_dentry); + goal = tmp; + dget_locked(goal); + break; + } + } + + if (!goal) + goto do_instantiate; + + /* Move the goal to the de hash queue */ + goal->d_flags &= ~DCACHE_DISCONNECTED; + security_d_instantiate(goal, inode); + __d_drop(dentry); + d_rehash_cond(dentry, 0); + d_move_locked(goal, dentry); + spin_unlock(&dcache_lock); + iput(inode); + + return goal; + + /* d_add(), but don't drop dcache_lock before adding dentry to inode */ +do_instantiate: + list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ + dentry->d_inode = inode; +do_rehash: + if (rehash) + d_rehash_cond(dentry, 0); + spin_unlock(&dcache_lock); + + return NULL; +} + +/* + * Similar as d_instantiate() except that it drops the disconnected + * dentry if any. + */ +void iopen_d_instantiate(struct dentry *dentry, struct inode * inode) +{ + struct dentry *dis_dentry; + + /* verify this dentry is really new */ + assert(dentry->d_inode == NULL); + assert(list_empty(&dentry->d_alias)); + + spin_lock(&dcache_lock); + if (!inode || !test_opt(inode->i_sb, IOPEN) || + list_empty(&inode->i_dentry)) + goto do_instantiate; + + /* a disconnected dentry has been added in our back, + * we have to drop this dentry, see bug 16362/15713*/ + dis_dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + spin_lock(&dis_dentry->d_lock); + assert(dis_dentry->d_alias.next == &inode->i_dentry); + assert(dis_dentry->d_alias.prev == &inode->i_dentry); + assert(dis_dentry->d_flags & DCACHE_DISCONNECTED); + __d_drop(dis_dentry); + list_del_init(&dis_dentry->d_alias); + spin_unlock(&dis_dentry->d_lock); + +do_instantiate: + if (inode) + list_add(&dentry->d_alias, &inode->i_dentry); + dentry->d_inode = inode; + spin_unlock(&dcache_lock); + security_d_instantiate(dentry, inode); +} + +/* + * These are the special structures for the iopen pseudo directory. + */ + +static struct inode_operations iopen_inode_operations = { + lookup: iopen_lookup, /* BKL held */ +}; + +static struct file_operations iopen_file_operations = { + read: generic_read_dir, +}; + +static int match_dentry(struct dentry *dentry, const char *name) +{ + int len; + + len = strlen(name); + if (dentry->d_name.len != len) + return 0; + if (strncmp(dentry->d_name.name, name, len)) + return 0; + return 1; +} + +/* + * This function is spliced into ext4_lookup and returns 1 the file + * name is __iopen__ and dentry has been filled in appropriately. + */ +int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode; + + if (dir->i_ino != EXT4_ROOT_INO || + !test_opt(dir->i_sb, IOPEN) || + !match_dentry(dentry, "__iopen__")) + return 0; + + inode = ext4_iget(dir->i_sb, EXT4_BAD_INO); + if (IS_ERR(inode)) + return 0; + + d_add(dentry, inode); + return 1; +} + +/* + * This function is spliced into read_inode; it returns 1 if inode + * number is the one for /__iopen__, in which case the inode is filled + * in appropriately. Otherwise, this fuction returns 0. + */ +int ext4_iopen_get_inode(struct inode *inode) +{ + if (inode->i_ino != EXT4_BAD_INO) + return 0; + + inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; + if (test_opt(inode->i_sb, IOPEN_NOPRIV)) + inode->i_mode |= 0777; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_nlink = 1; + inode->i_size = 4096; + inode->i_atime = inode->i_ctime = inode->i_mtime = ext4_current_time(inode); + EXT4_I(inode)->i_dtime = 0; + EXT4_I(inode)->i_file_acl = 0; + inode->i_blocks = 0; + inode->i_version = 1; + inode->i_generation = 0; + + inode->i_op = &iopen_inode_operations; + inode->i_fop = &iopen_file_operations; + inode->i_mapping->a_ops = 0; + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return 1; +} Index: linux-2.6.27.21-0.1/fs/ext4/iopen.h =================================================================== --- /dev/null +++ linux-2.6.27.21-0.1/fs/ext4/iopen.h @@ -0,0 +1,16 @@ +/* + * iopen.h + * + * Special support for opening files by inode number. + * + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). + * + * This file may be redistributed under the terms of the GNU General + * Public License. + */ + +extern int ext4_check_for_iopen(struct inode *dir, struct dentry *dentry); +extern int ext4_iopen_get_inode(struct inode *inode); +extern struct dentry *iopen_connect_dentry(struct dentry *dentry, + struct inode *inode, int rehash); +extern void iopen_d_instantiate(struct dentry *dentry, struct inode * inode); Index: linux-2.6.27.21-0.1/fs/ext4/inode.c =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c +++ linux-2.6.27.21-0.1/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include #include "ext4_jbd2.h" #include "xattr.h" +#include "iopen.h" #include "acl.h" #include "ext4_extents.h" @@ -4115,6 +4116,9 @@ struct inode *ext4_iget(struct super_blo ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif + if (ext4_iopen_get_inode(inode)) + return inode; + ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) goto bad_inode; Index: linux-2.6.27.21-0.1/fs/ext4/super.c =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/super.c +++ linux-2.6.27.21-0.1/fs/ext4/super.c @@ -955,7 +955,8 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, - Opt_inode_readahead_blks + Opt_inode_readahead_blks, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, }; static const match_table_t tokens = { @@ -1004,6 +1005,9 @@ static const match_table_t tokens = { {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, + {Opt_iopen, "iopen"}, + {Opt_noiopen, "noiopen"}, + {Opt_iopen_nopriv, "iopen_nopriv"}, {Opt_barrier, "barrier=%u"}, {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, @@ -1347,6 +1351,18 @@ set_qf_format: else clear_opt(sbi->s_mount_opt, BARRIER); break; + case Opt_iopen: + set_opt (sbi->s_mount_opt, IOPEN); + clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; + case Opt_noiopen: + clear_opt (sbi->s_mount_opt, IOPEN); + clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; + case Opt_iopen_nopriv: + set_opt (sbi->s_mount_opt, IOPEN); + set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; case Opt_ignore: break; case Opt_resize: Index: linux-2.6.27.21-0.1/fs/ext4/namei.c =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/namei.c +++ linux-2.6.27.21-0.1/fs/ext4/namei.c @@ -39,6 +39,7 @@ #include "namei.h" #include "xattr.h" +#include "iopen.h" #include "acl.h" /* @@ -1054,6 +1055,9 @@ static struct dentry *ext4_lookup(struct if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + if (ext4_check_for_iopen(dir, dentry)) + return NULL; + bh = ext4_find_entry(dir, &dentry->d_name, &de); inode = NULL; if (bh) { @@ -1068,7 +1072,8 @@ static struct dentry *ext4_lookup(struct if (IS_ERR(inode)) return ERR_CAST(inode); } - return d_splice_alias(inode, dentry); + + return iopen_connect_dentry(dentry, inode, 1); } @@ -1717,7 +1722,7 @@ static int ext4_add_nondir(handle_t *han int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); + iopen_d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -1876,7 +1881,7 @@ out_clear_inode: ext4_inc_count(handle, dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); + iopen_d_instantiate(dentry, inode); out_stop: ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) @@ -2142,10 +2147,6 @@ static int ext4_rmdir(struct inode *dir, inode->i_nlink); inode->i_version++; clear_nlink(inode); - /* There's no need to set i_disksize: the fact that i_nlink is - * zero will ensure that the right thing happens during any - * recovery. */ - inode->i_size = 0; ext4_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); @@ -2271,6 +2272,23 @@ out_stop: return err; } +/* Like ext4_add_nondir() except for call to iopen_connect_dentry */ +static int ext4_add_link(handle_t *handle, struct dentry *dentry, + struct inode *inode) +{ + int err = ext4_add_entry(handle, dentry, inode); + if (!err) { + err = ext4_mark_inode_dirty(handle, inode); + if (err == 0) { + dput(iopen_connect_dentry(dentry, inode, 0)); + return 0; + } + } + ext4_dec_count(handle, inode); + iput(inode); + return err; +} + static int ext4_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -2301,7 +2319,8 @@ retry: ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); - err = ext4_add_nondir(handle, dentry, inode); + err = ext4_add_link(handle, dentry, inode); + ext4_orphan_del(handle, inode); ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; Index: linux-2.6.27.21-0.1/fs/ext4/Makefile =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/Makefile +++ linux-2.6.27.21-0.1/fs/ext4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o -ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o Index: linux-2.6.27.21-0.1/fs/ext4/ext4.h =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/ext4.h +++ linux-2.6.27.21-0.1/fs/ext4/ext4.h @@ -540,6 +540,8 @@ do { \ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ +#define EXT4_MOUNT_IOPEN 0x10000000 /* Allow access via iopen */ +#define EXT4_MOUNT_IOPEN_NOPRIV 0x20000000 /* Make iopen world-readable */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt