Index: linux-2.6.9-67/fs/ext3/Makefile =================================================================== --- linux-2.6.9-67.orig/fs/ext3/Makefile +++ linux-2.6.9-67/fs/ext3/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o -ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o Index: linux-2.6.9-67/fs/ext3/inode.c =================================================================== --- linux-2.6.9-67.orig/fs/ext3/inode.c +++ linux-2.6.9-67/fs/ext3/inode.c @@ -37,6 +37,7 @@ #include #include #include "xattr.h" +#include "iopen.h" #include "acl.h" /* @@ -2409,6 +2410,8 @@ void ext3_read_inode(struct inode * inod ei->i_default_acl = EXT3_ACL_NOT_CACHED; #endif ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + if (ext3_iopen_get_inode(inode)) + return; if (ext3_get_inode_loc(inode, &iloc, 0)) goto bad_inode; Index: linux-2.6.9-67/fs/ext3/iopen.c =================================================================== --- /dev/null +++ linux-2.6.9-67/fs/ext3/iopen.c @@ -0,0 +1,315 @@ +/* + * linux/fs/ext3/iopen.c + * + * Special support for open by inode number + * + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). + * + * This file may be redistributed under the terms of the GNU General + * Public License. + * + * + * Invariants: + * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias + * for an inode at one time. + * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry + * aliases on an inode at the same time. + * + * If we have any connected dentry aliases for an inode, use one of those + * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED + * dentry for this inode, which thereafter will be found by the dcache + * when looking up this inode number in __iopen__, so we don't return here + * until it is gone. + * + * If we get an inode via a regular name lookup, then we "rename" the + * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures + * existing users of the disconnected dentry will continue to use the same + * dentry as the connected users, and there will never be both kinds of + * dentry aliases at one time. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "iopen.h" + +#ifndef assert +#define assert(test) J_ASSERT(test) +#endif + +#define IOPEN_NAME_LEN 32 + +/* + * This implements looking up an inode by number. + */ +static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode *inode; + unsigned long ino; + struct list_head *lp; + struct dentry *alternate; + char buf[IOPEN_NAME_LEN]; + + if (dentry->d_name.len >= IOPEN_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + memcpy(buf, dentry->d_name.name, dentry->d_name.len); + buf[dentry->d_name.len] = 0; + + if (strcmp(buf, ".") == 0) + ino = dir->i_ino; + else if (strcmp(buf, "..") == 0) + ino = EXT3_ROOT_INO; + else + ino = simple_strtoul(buf, 0, 0); + + if ((ino != EXT3_ROOT_INO && + //ino != EXT3_ACL_IDX_INO && + //ino != EXT3_ACL_DATA_INO && + ino < EXT3_FIRST_INO(dir->i_sb)) || + ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) + return ERR_PTR(-ENOENT); + + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + if (is_bad_inode(inode)) { + iput(inode); + return ERR_PTR(-ENOENT); + } + + assert(list_empty(&dentry->d_alias)); /* d_instantiate */ + assert(d_unhashed(dentry)); /* d_rehash */ + + /* preferrably return a connected dentry */ + spin_lock(&dcache_lock); + list_for_each(lp, &inode->i_dentry) { + alternate = list_entry(lp, struct dentry, d_alias); + assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); + } + + if (!list_empty(&inode->i_dentry)) { + alternate = list_entry(inode->i_dentry.next, + struct dentry, d_alias); + dget_locked(alternate); + spin_lock(&alternate->d_lock); + alternate->d_flags |= DCACHE_REFERENCED; + spin_unlock(&alternate->d_lock); + iput(inode); + spin_unlock(&dcache_lock); + return alternate; + } + dentry->d_flags |= DCACHE_DISCONNECTED; + + /* d_add(), but don't drop dcache_lock before adding dentry to inode */ + list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ + dentry->d_inode = inode; + + d_rehash_cond(dentry, 0); /* d_rehash */ + spin_unlock(&dcache_lock); + + return NULL; +} + +#define do_switch(x,y) do { \ + __typeof__ (x) __tmp = x; \ + x = y; y = __tmp; } while (0) + +static inline void switch_names(struct dentry *dentry, struct dentry *target) +{ + const unsigned char *old_name, *new_name; + + memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); + old_name = target->d_name.name; + new_name = dentry->d_name.name; + if (old_name == target->d_iname) + old_name = dentry->d_iname; + if (new_name == dentry->d_iname) + new_name = target->d_iname; + target->d_name.name = new_name; + dentry->d_name.name = old_name; +} + +/* This function is spliced into ext3_lookup and does the move of a + * disconnected dentry (if it exists) to a connected dentry. + */ +struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, + int rehash) +{ + struct dentry *tmp, *goal = NULL; + struct list_head *lp; + + /* verify this dentry is really new */ + assert(dentry->d_inode == NULL); + assert(list_empty(&dentry->d_alias)); /* d_instantiate */ + if (rehash) + assert(d_unhashed(dentry)); /* d_rehash */ + assert(list_empty(&dentry->d_subdirs)); + + spin_lock(&dcache_lock); + if (!inode) + goto do_rehash; + + if (!test_opt(inode->i_sb, IOPEN)) + goto do_instantiate; + + /* preferrably return a connected dentry */ + list_for_each(lp, &inode->i_dentry) { + tmp = list_entry(lp, struct dentry, d_alias); + if (tmp->d_flags & DCACHE_DISCONNECTED) { + assert(tmp->d_alias.next == &inode->i_dentry); + assert(tmp->d_alias.prev == &inode->i_dentry); + goal = tmp; + dget_locked(goal); + break; + } + } + + if (!goal) + goto do_instantiate; + + /* Move the goal to the de hash queue */ + goal->d_flags &= ~DCACHE_DISCONNECTED; + security_d_instantiate(goal, inode); + __d_drop(dentry); + d_rehash_cond(dentry, 0); + d_move_locked(goal, dentry); + spin_unlock(&dcache_lock); + iput(inode); + + return goal; + + /* d_add(), but don't drop dcache_lock before adding dentry to inode */ +do_instantiate: + list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ + dentry->d_inode = inode; +do_rehash: + if (rehash) + d_rehash_cond(dentry, 0); /* d_rehash */ + spin_unlock(&dcache_lock); + + return NULL; +} + +/* + * Similar as d_instantiate() except that it drops the disconnected + * dentry if any. + */ +void iopen_d_instantiate(struct dentry *dentry, struct inode * inode) +{ + struct dentry *dis_dentry; + + /* verify this dentry is really new */ + assert(dentry->d_inode == NULL); + assert(list_empty(&dentry->d_alias)); + + spin_lock(&dcache_lock); + if (!inode || !test_opt(inode->i_sb, IOPEN) || + list_empty(&inode->i_dentry)) + goto do_instantiate; + + /* a disconnected dentry has been added in our back, + * we have to drop this dentry, see bug 16362/15713*/ + dis_dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); + spin_lock(&dis_dentry->d_lock); + assert(dis_dentry->d_alias.next == &inode->i_dentry); + assert(dis_dentry->d_alias.prev == &inode->i_dentry); + assert(dis_dentry->d_flags & DCACHE_DISCONNECTED); + __d_drop(dis_dentry); + list_del_init(&dis_dentry->d_alias); + spin_unlock(&dis_dentry->d_lock); + +do_instantiate: + if (inode) + list_add(&dentry->d_alias, &inode->i_dentry); + dentry->d_inode = inode; + spin_unlock(&dcache_lock); + security_d_instantiate(dentry, inode); +} + +/* + * These are the special structures for the iopen pseudo directory. + */ + +static struct inode_operations iopen_inode_operations = { + lookup: iopen_lookup, /* BKL held */ +}; + +static struct file_operations iopen_file_operations = { + read: generic_read_dir, +}; + +static int match_dentry(struct dentry *dentry, const char *name) +{ + int len; + + len = strlen(name); + if (dentry->d_name.len != len) + return 0; + if (strncmp(dentry->d_name.name, name, len)) + return 0; + return 1; +} + +/* + * This function is spliced into ext3_lookup and returns 1 the file + * name is __iopen__ and dentry has been filled in appropriately. + */ +int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode; + + if (dir->i_ino != EXT3_ROOT_INO || + !test_opt(dir->i_sb, IOPEN) || + !match_dentry(dentry, "__iopen__")) + return 0; + + inode = iget(dir->i_sb, EXT3_BAD_INO); + + if (!inode) + return 0; + d_add(dentry, inode); + return 1; +} + +/* + * This function is spliced into read_inode; it returns 1 if inode + * number is the one for /__iopen__, in which case the inode is filled + * in appropriately. Otherwise, this fuction returns 0. + */ +int ext3_iopen_get_inode(struct inode *inode) +{ + if (inode->i_ino != EXT3_BAD_INO) + return 0; + + inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; + if (test_opt(inode->i_sb, IOPEN_NOPRIV)) + inode->i_mode |= 0777; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_nlink = 1; + inode->i_size = 4096; + inode->i_atime = CURRENT_TIME; + inode->i_ctime = CURRENT_TIME; + inode->i_mtime = CURRENT_TIME; + EXT3_I(inode)->i_dtime = 0; + EXT3_I(inode)->i_file_acl = 0; + inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size + * (for stat), not the fs block + * size */ + inode->i_blocks = 0; + inode->i_version = 1; + inode->i_generation = 0; + + inode->i_op = &iopen_inode_operations; + inode->i_fop = &iopen_file_operations; + inode->i_mapping->a_ops = 0; + + return 1; +} Index: linux-2.6.9-67/fs/ext3/iopen.h =================================================================== --- /dev/null +++ linux-2.6.9-67/fs/ext3/iopen.h @@ -0,0 +1,24 @@ +/* + * iopen.h + * + * Special support for opening files by inode number. + * + * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). + * + * This file may be redistributed under the terms of the GNU General + * Public License. + */ + +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); +extern int ext3_iopen_get_inode(struct inode *inode); +extern struct dentry *iopen_connect_dentry(struct dentry *dentry, + struct inode *inode, int rehash); +extern void iopen_d_instantiate(struct dentry *dentry, struct inode * inode); + +#if !defined(HAVE_D_REHASH_COND) && defined(HAVE___D_REHASH) +#define d_rehash_cond(dentry, lock) __d_rehash(dentry, lock) +#endif + +#if !defined(HAVE_D_MOVE_LOCKED) && defined(HAVE___D_MOVE) +#define d_move_locked(dentry, target) __d_move(dentry, target) +#endif Index: linux-2.6.9-67/fs/ext3/namei.c =================================================================== --- linux-2.6.9-67.orig/fs/ext3/namei.c +++ linux-2.6.9-67/fs/ext3/namei.c @@ -37,6 +37,7 @@ #include #include #include "xattr.h" +#include "iopen.h" #include "acl.h" /* @@ -1017,6 +1018,9 @@ static struct dentry *ext3_lookup(struct if (dentry->d_name.len > EXT3_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + if (ext3_check_for_iopen(dir, dentry)) + return NULL; + bh = ext3_find_entry(dentry, &de); inode = NULL; if (bh) { @@ -1027,10 +1031,8 @@ static struct dentry *ext3_lookup(struct if (!inode) return ERR_PTR(-EACCES); } - if (inode) - return d_splice_alias(inode, dentry); - d_add(dentry, inode); - return NULL; + + return iopen_connect_dentry(dentry, inode, 1); } @@ -1669,7 +1671,7 @@ static int ext3_add_nondir(handle_t *han int err = ext3_add_entry(handle, dentry, inode); if (!err) { ext3_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); + iopen_d_instantiate(dentry, inode); return 0; } ext3_dec_count(handle, inode); @@ -1831,7 +1833,7 @@ retry: dir->i_nlink++; ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); + iopen_d_instantiate(dentry, inode); out_stop: ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) @@ -2099,10 +2101,6 @@ static int ext3_rmdir (struct inode * di inode->i_nlink); inode->i_version++; inode->i_nlink = 0; - /* There's no need to set i_disksize: the fact that i_nlink is - * zero will ensure that the right thing happens during any - * recovery. */ - inode->i_size = 0; ext3_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; ext3_mark_inode_dirty(handle, inode); @@ -2225,6 +2223,23 @@ out_stop: return err; } +/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ +static int ext3_add_link(handle_t *handle, struct dentry *dentry, + struct inode *inode) +{ + int err = ext3_add_entry(handle, dentry, inode); + if (!err) { + err = ext3_mark_inode_dirty(handle, inode); + if (err == 0) { + dput(iopen_connect_dentry(dentry, inode, 0)); + return 0; + } + } + ext3_dec_count(handle, inode); + iput(inode); + return err; +} + static int ext3_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { @@ -2254,7 +2269,8 @@ retry: ext3_inc_count(handle, inode); atomic_inc(&inode->i_count); - err = ext3_add_nondir(handle, dentry, inode); + err = ext3_add_link(handle, dentry, inode); + ext3_orphan_del(handle, inode); ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; Index: linux-2.6.9-67/fs/ext3/super.c =================================================================== --- linux-2.6.9-67.orig/fs/ext3/super.c +++ linux-2.6.9-67/fs/ext3/super.c @@ -632,6 +632,7 @@ enum { Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, }; static match_table_t tokens = { @@ -678,6 +679,9 @@ static match_table_t tokens = { {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, {Opt_ignore, "usrquota"}, + {Opt_iopen, "iopen"}, + {Opt_noiopen, "noiopen"}, + {Opt_iopen_nopriv, "iopen_nopriv"}, {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, @@ -959,6 +963,18 @@ clear_qf_name: else clear_opt(sbi->s_mount_opt, BARRIER); break; + case Opt_iopen: + set_opt (sbi->s_mount_opt, IOPEN); + clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; + case Opt_noiopen: + clear_opt (sbi->s_mount_opt, IOPEN); + clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; + case Opt_iopen_nopriv: + set_opt (sbi->s_mount_opt, IOPEN); + set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); + break; case Opt_ignore: break; case Opt_resize: Index: linux-2.6.9-67/include/linux/ext3_fs.h =================================================================== --- linux-2.6.9-67.orig/include/linux/ext3_fs.h +++ linux-2.6.9-67/include/linux/ext3_fs.h @@ -355,6 +355,8 @@ struct ext3_inode { #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */ #define EXT3_MOUNT_RESERVATION 0x20000 /* Preallocation */ +#define EXT3_MOUNT_IOPEN 0x80000 /* Allow access via iopen */ +#define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H