Index: linux-2.6.4-51.0/fs/exec.c =================================================================== --- linux-2.6.4-51.0.orig/fs/exec.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:42.000000000 -0400 @@ -122,8 +122,11 @@ struct file * file; struct nameidata nd; int error; + intent_init(&nd.intent, IT_OPEN); - nd.intent.open.flags = FMODE_READ; + error = user_path_walk_it(library, &nd); + + nd.intent.it_flags = O_RDONLY; error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -136,7 +139,7 @@ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -485,8 +488,9 @@ int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + intent_init(&nd.intent, IT_OPEN); + nd.intent.it_flags = O_RDONLY; + err = path_lookup(name, LOOKUP_FOLLOW, &nd); file = ERR_PTR(err); if (!err) { @@ -499,7 +503,7 @@ err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { Index: linux-2.6.4-51.0/fs/namei.c =================================================================== --- linux-2.6.4-51.0.orig/fs/namei.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/namei.c 2004-04-05 17:36:42.000000000 -0400 @@ -269,8 +269,19 @@ return 0; } +void intent_release(struct lookup_intent *it) +{ + if (!it) + return; + if (it->it_magic != INTENT_MAGIC) + return; + if (it->it_op_release) + it->it_op_release(it); +} + void path_release(struct nameidata *nd) { + intent_release(&nd->intent); dput(nd->dentry); mntput(nd->mnt); } @@ -347,7 +358,10 @@ { struct dentry * result; struct inode *dir = parent->d_inode; + int counter = 0; +again: + counter++; down(&dir->i_sem); /* * First re-do the cached lookup just in case it was created @@ -386,7 +400,10 @@ if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); - result = ERR_PTR(-ENOENT); + if (counter > 10) + result = ERR_PTR(-ESTALE); + if (!IS_ERR(result)) + goto again; } } return result; @@ -563,6 +580,31 @@ return PTR_ERR(dentry); } +static int revalidate_special(struct nameidata *nd) +{ + struct dentry *dentry = nd->dentry; + int err, counter = 0; + + if (!dentry->d_op || !dentry->d_op->d_revalidate) + return 0; + revalidate_again: + if (!dentry->d_op->d_revalidate(dentry, nd)) { + struct dentry *new; + if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd))) + return err; + new = real_lookup(dentry->d_parent, &dentry->d_name, nd); + d_invalidate(dentry); + dput(dentry); + nd->dentry = dentry = new; + counter++; + if (counter < 10) + goto revalidate_again; + printk("excessive revalidate_it loops\n"); + return -ESTALE; + } + return 0; +} + /* * Name resolution. * @@ -663,7 +705,9 @@ if (inode->i_op->follow_link) { mntget(next.mnt); + nd->flags |= LOOKUP_LINK_NOTLAST; err = do_follow_link(next.dentry, nd); + nd->flags &= ~LOOKUP_LINK_NOTLAST; dput(next.dentry); mntput(next.mnt); if (err) @@ -702,14 +746,29 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: + nd->flags |= LOOKUP_LAST; + err = revalidate_special(nd); + nd->flags &= ~LOOKUP_LAST; + if (err) + break; goto return_reval; } + + if (err) { + if (!nd->dentry->d_inode) + err = -ENOENT; + + goto return_err; + } + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); if (err < 0) break; } + nd->flags |= LOOKUP_LAST; err = do_lookup(nd, &this, &next); + nd->flags &= ~LOOKUP_LAST; if (err) break; follow_mount(&next.mnt, &next.dentry); @@ -935,7 +994,7 @@ } /* SMP-safe */ -struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) { unsigned long hash; struct qstr this; @@ -955,11 +1014,16 @@ } this.hash = end_name_hash(hash); - return lookup_hash(&this, base); + return __lookup_hash(&this, base, nd); access: return ERR_PTR(-EACCES); } +struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +{ + return lookup_one_len_it(name, base, len, NULL); +} + /* * namei() * @@ -971,7 +1035,7 @@ * that namei follows links, while lnamei does not. * SMP-safe */ -int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) { char *tmp = getname(name); int err = PTR_ERR(tmp); @@ -983,6 +1047,12 @@ return err; } +int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +{ + intent_init(&nd->intent, IT_LOOKUP); + return __user_walk_it(name, flags, nd); +} + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. @@ -1255,8 +1325,8 @@ acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; + nd->intent.it_flags = flag; + nd->intent.it_create_mode = mode; /* * The simplest case - just a plain lookup. @@ -1271,6 +1341,7 @@ /* * Create - we need to know the parent. */ + nd->intent.it_op |= IT_CREAT; error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; @@ -1287,7 +1358,9 @@ dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; do_last: error = PTR_ERR(dentry); @@ -1392,7 +1465,9 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; putname(nd->last.name); goto do_last; } @@ -2154,7 +2229,9 @@ __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; + struct lookup_intent it = nd->intent; char *name; + if (IS_ERR(link)) goto fail; @@ -2164,6 +2241,10 @@ /* weird __emul_prefix() stuff did it */ goto out; } + + intent_init(&nd->intent, it.it_op); + nd->intent.it_flags = it.it_flags; + nd->intent.it_create_mode = it.it_create_mode; res = link_path_walk(link, nd); out: if (current->link_count || res || nd->last_type!=LAST_NORM) Index: linux-2.6.4-51.0/fs/namespace.c =================================================================== --- linux-2.6.4-51.0.orig/fs/namespace.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/namespace.c 2004-04-07 13:28:23.000000000 -0400 @@ -107,6 +107,7 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { + memset(old_nd, 0, sizeof(*old_nd)); old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; @@ -748,6 +749,7 @@ int retval = 0; int mnt_flags = 0; + intent_init(&nd.intent, IT_LOOKUP); /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; Index: linux-2.6.4-51.0/fs/open.c =================================================================== --- linux-2.6.4-51.0.orig/fs/open.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/open.c 2004-04-05 17:36:42.000000000 -0400 @@ -211,7 +211,7 @@ struct nameidata nd; struct inode * inode; int error; - + intent_init(&nd.intent, IT_GETATTR); error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; @@ -470,6 +470,7 @@ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; + intent_init(&nd.intent, IT_GETATTR); if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; @@ -501,6 +502,7 @@ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; + path_release(&nd); } @@ -515,6 +517,7 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); if (error) @@ -566,6 +569,7 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); if (error) @@ -638,7 +642,7 @@ error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; - + error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; @@ -746,27 +750,8 @@ * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode) -{ - int namei_flags, error; - struct nameidata nd; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) - namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = open_namei(filename, namei_flags, mode, &nd); - if (!error) - return dentry_open(nd.dentry, nd.mnt, flags); - - return ERR_PTR(error); -} - -EXPORT_SYMBOL(filp_open); - -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, + struct lookup_intent *it) { struct file * f; struct inode *inode; @@ -778,6 +763,7 @@ goto cleanup_dentry; f->f_flags = flags; f->f_mode = (flags+1) & O_ACCMODE; + f->f_it = it; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); @@ -797,6 +783,7 @@ error = f->f_op->open(inode,f); if (error) goto cleanup_all; + intent_release(it); } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); @@ -821,6 +808,7 @@ cleanup_file: put_filp(f); cleanup_dentry: + intent_release(it); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -828,6 +816,36 @@ EXPORT_SYMBOL(dentry_open); +struct file *filp_open(const char * filename, int flags, int mode) +{ + int namei_flags, error; + struct file * temp_filp; + struct nameidata nd; + intent_init(&nd.intent, IT_OPEN); + + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; + if (namei_flags & O_TRUNC) + namei_flags |= 2; + + error = open_namei(filename, namei_flags, mode, &nd); + if (!error) { + temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); + return temp_filp; + } + return ERR_PTR(error); +} + + +struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +{ + struct lookup_intent it; + intent_init(&it, IT_LOOKUP); + + return dentry_open_it(dentry, mnt, flags, &it); +} + /* * Find an empty file descriptor entry, and mark it busy. */ Index: linux-2.6.4-51.0/fs/stat.c =================================================================== --- linux-2.6.4-51.0.orig/fs/stat.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/stat.c 2004-04-05 17:36:42.000000000 -0400 @@ -36,7 +36,7 @@ EXPORT_SYMBOL(generic_fillattr); -int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) { struct inode *inode = dentry->d_inode; int retval; @@ -45,6 +45,8 @@ if (retval) return retval; + if (inode->i_op->getattr_it) + return inode->i_op->getattr_it(mnt, dentry, it, stat); if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); @@ -61,14 +63,20 @@ EXPORT_SYMBOL(vfs_getattr); +int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + return vfs_getattr_it(mnt, dentry, NULL, stat); +} + int vfs_stat(char __user *name, struct kstat *stat) { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk(name, &nd); + error = user_path_walk_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -80,10 +88,11 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk_link(name, &nd); + error = user_path_walk_link_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -95,9 +104,12 @@ { struct file *f = fget(fd); int error = -EBADF; + struct nameidata nd; + intent_init(&nd.intent, IT_GETATTR); if (f) { - error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); + error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); + intent_release(&nd.intent); fput(f); } return error; Index: linux-2.6.4-51.0/fs/nfs/dir.c =================================================================== --- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-07 13:27:47.000000000 -0400 @@ -709,7 +709,7 @@ return 0; if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) return 0; - return (nd->intent.open.flags & O_EXCL) != 0; + return (nd->intent.it_flags & O_EXCL) != 0; } static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) @@ -1026,7 +1026,7 @@ attr.ia_valid = ATTR_MODE; if (nd && (nd->flags & LOOKUP_CREATE)) - open_flags = nd->intent.open.flags; + open_flags = nd->intent.it_flags; /* * The 0 argument passed into the create function should one day Index: linux-2.6.4-51.0/fs/inode.c =================================================================== --- linux-2.6.4-51.0.orig/fs/inode.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/inode.c 2004-04-05 17:36:43.000000000 -0400 @@ -221,6 +221,7 @@ inodes_stat.nr_unused--; } +EXPORT_SYMBOL(__iget); /** * clear_inode - clear an inode * @inode: inode to clear Index: linux-2.6.4-51.0/fs/super.c =================================================================== --- linux-2.6.4-51.0.orig/fs/super.c 2004-04-05 12:41:59.000000000 -0400 +++ linux-2.6.4-51.0/fs/super.c 2004-04-05 17:36:43.000000000 -0400 @@ -787,6 +787,8 @@ return (struct vfsmount *)sb; } +EXPORT_SYMBOL(do_kern_mount); + struct vfsmount *kern_mount(struct file_system_type *type) { return do_kern_mount(type->name, 0, type->name, NULL); Index: linux-2.6.4-51.0/include/linux/dcache.h =================================================================== --- linux-2.6.4-51.0.orig/include/linux/dcache.h 2004-04-05 12:42:07.000000000 -0400 +++ linux-2.6.4-51.0/include/linux/dcache.h 2004-04-05 17:36:43.000000000 -0400 @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -35,6 +36,8 @@ char name_str[0]; }; +#include + struct dentry_stat_t { int nr_dentry; int nr_unused; Index: linux-2.6.4-51.0/include/linux/fs.h =================================================================== --- linux-2.6.4-51.0.orig/include/linux/fs.h 2004-04-05 12:42:07.000000000 -0400 +++ linux-2.6.4-51.0/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400 @@ -249,6 +249,8 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 +#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ /* * This is the Inode Attributes structure, used for notify_change(). It @@ -422,6 +424,7 @@ struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; + void *i_filterdata; unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ @@ -554,6 +557,7 @@ spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + struct lookup_intent *f_it; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); @@ -874,7 +878,9 @@ void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); + int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); @@ -1101,6 +1107,7 @@ extern int unregister_filesystem(struct file_system_type *); extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount(struct vfsmount *); +struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); @@ -1165,6 +1172,7 @@ extern int do_truncate(struct dentry *, loff_t start); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); Index: linux-2.6.4-51.0/include/linux/namei.h =================================================================== --- linux-2.6.4-51.0.orig/include/linux/namei.h 2004-04-05 12:42:07.000000000 -0400 +++ linux-2.6.4-51.0/include/linux/namei.h 2004-04-05 17:36:43.000000000 -0400 @@ -2,25 +2,55 @@ #define _LINUX_NAMEI_H #include +#include struct vfsmount; +struct nameidata; -struct open_intent { - int flags; - int create_mode; +/* intent opcodes */ +#define IT_OPEN (1) +#define IT_CREAT (1<<1) +#define IT_READDIR (1<<2) +#define IT_GETATTR (1<<3) +#define IT_LOOKUP (1<<4) +#define IT_UNLINK (1<<5) +#define IT_TRUNC (1<<6) +#define IT_GETXATTR (1<<7) + +struct lustre_intent_data { + int it_disposition; + int it_status; + __u64 it_lock_handle; + void *it_data; + int it_lock_mode; }; +#define INTENT_MAGIC 0x19620323 +struct lookup_intent { + int it_magic; + void (*it_op_release)(struct lookup_intent *); + int it_op; + int it_flags; + int it_create_mode; + union { + struct lustre_intent_data lustre; + } d; +}; + +static inline void intent_init(struct lookup_intent *it, int op) +{ + memset(it, 0, sizeof(*it)); + it->it_magic = INTENT_MAGIC; + it->it_op = op; +} + struct nameidata { struct dentry *dentry; struct vfsmount *mnt; struct qstr last; unsigned int flags; int last_type; - - /* Intent data */ - union { - struct open_intent open; - } intent; + struct lookup_intent intent; }; /* @@ -41,6 +71,9 @@ #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 +#define LOOKUP_LAST (1<<6) +#define LOOKUP_LINK_NOTLAST (1<<7) + /* * Intent data */ @@ -49,6 +82,12 @@ #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); +#define user_path_walk_it(name,nd) \ + __user_walk_it(name, LOOKUP_FOLLOW, nd) +#define user_path_walk_link_it(name,nd) \ + __user_walk_it(name, 0, nd) +extern void intent_release(struct lookup_intent *); #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ @@ -60,7 +99,6 @@ extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - extern int follow_down(struct vfsmount **, struct dentry **); extern int follow_up(struct vfsmount **, struct dentry **); Index: linux-2.6.4-51.0/kernel/exit.c =================================================================== --- linux-2.6.4-51.0.orig/kernel/exit.c 2004-04-05 12:42:08.000000000 -0400 +++ linux-2.6.4-51.0/kernel/exit.c 2004-04-05 17:36:43.000000000 -0400 @@ -259,6 +259,8 @@ write_unlock_irq(&tasklist_lock); } +EXPORT_SYMBOL(reparent_to_init); + void __set_special_pids(pid_t session, pid_t pgrp) { struct task_struct *curr = current; @@ -428,6 +430,8 @@ __exit_files(tsk); } +EXPORT_SYMBOL(exit_files); + static inline void __put_fs_struct(struct fs_struct *fs) { /* No need to hold fs->lock if we are killing it */