fs/exec.c | 18 +++++++--- fs/namei.c | 86 +++++++++++++++++++++++++++++++++++++++++++++---- fs/namespace.c | 2 + fs/nfs/dir.c | 4 +- fs/open.c | 62 +++++++++++++++++++++++------------ fs/stat.c | 24 ++++++++++--- include/linux/dcache.h | 3 + include/linux/fs.h | 8 ++++ include/linux/namei.h | 56 ++++++++++++++++++++++++++----- kernel/ksyms.c | 8 ++++ 10 files changed, 222 insertions(+), 49 deletions(-) Index: linux-2.6.0-test6/fs/exec.c =================================================================== --- linux-2.6.0-test6.orig/fs/exec.c 2003-10-07 15:14:14.000000000 +0800 +++ linux-2.6.0-test6/fs/exec.c 2003-10-07 15:33:15.000000000 +0800 @@ -120,8 +120,11 @@ struct file * file; struct nameidata nd; int error; + intent_init(&nd.intent, IT_OPEN); - nd.intent.open.flags = O_RDONLY; + error = user_path_walk_it(library, &nd); + + nd.intent.it_flags = O_RDONLY; error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -134,7 +137,7 @@ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -471,8 +474,13 @@ struct file *open_exec(const char *name) { struct nameidata nd; - int err = path_lookup(name, LOOKUP_FOLLOW, &nd); - struct file *file = ERR_PTR(err); + int err; + struct file *file; + + intent_init(&nd.intent, IT_OPEN); + nd.intent.it_flags = O_RDONLY; + err = path_lookup(name, LOOKUP_FOLLOW, &nd); + file = ERR_PTR(err); if (!err) { struct inode *inode = nd.dentry->d_inode; @@ -484,7 +492,7 @@ err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { Index: linux-2.6.0-test6/fs/namei.c =================================================================== --- linux-2.6.0-test6.orig/fs/namei.c 2003-10-07 15:14:14.000000000 +0800 +++ linux-2.6.0-test6/fs/namei.c 2003-10-07 15:33:15.000000000 +0800 @@ -264,8 +264,19 @@ return 0; } +void intent_release(struct lookup_intent *it) +{ + if (!it) + return; + if (it->it_magic != INTENT_MAGIC) + return; + if (it->it_op_release) + it->it_op_release(it); +} + void path_release(struct nameidata *nd) { + intent_release(&nd->intent); dput(nd->dentry); mntput(nd->mnt); } @@ -342,7 +353,10 @@ { struct dentry * result; struct inode *dir = parent->d_inode; + int counter = 0; +again: + counter++; down(&dir->i_sem); /* * First re-do the cached lookup just in case it was created @@ -381,7 +395,10 @@ if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); - result = ERR_PTR(-ENOENT); + if (counter > 10) + result = ERR_PTR(-ESTALE); + if (!IS_ERR(result)) + goto again; } } return result; @@ -556,6 +573,31 @@ return PTR_ERR(dentry); } +static int revalidate_special(struct nameidata *nd) +{ + struct dentry *dentry = nd->dentry; + int err, counter = 0; + + if (!dentry->d_op || !dentry->d_op->d_revalidate) + return 0; + revalidate_again: + if (!dentry->d_op->d_revalidate(dentry, nd)) { + struct dentry *new; + if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd))) + return err; + new = real_lookup(dentry->d_parent, &dentry->d_name, nd); + d_invalidate(dentry); + dput(dentry); + dentry = new; + counter++; + if (counter < 10) + goto revalidate_again; + printk("excessive revalidate_it loops\n"); + return -ESTALE; + } + return 0; +} + /* * Name resolution. * @@ -656,7 +698,9 @@ if (inode->i_op->follow_link) { mntget(next.mnt); + nd->flags |= LOOKUP_LINK_NOTLAST; err = do_follow_link(next.dentry, nd); + nd->flags &= ~LOOKUP_LINK_NOTLAST; dput(next.dentry); mntput(next.mnt); if (err) @@ -695,6 +739,11 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: + nd->flags |= LOOKUP_LAST; + err = revalidate_special(nd); + nd->flags &= ~LOOKUP_LAST; + if (err) + break; goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { @@ -702,7 +751,9 @@ if (err < 0) break; } + nd->flags |= LOOKUP_LAST; err = do_lookup(nd, &this, &next); + nd->flags &= ~LOOKUP_LAST; if (err) break; follow_mount(&next.mnt, &next.dentry); @@ -928,7 +979,7 @@ } /* SMP-safe */ -struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) { unsigned long hash; struct qstr this; @@ -948,11 +999,16 @@ } this.hash = end_name_hash(hash); - return lookup_hash(&this, base); + return __lookup_hash(&this, base, nd); access: return ERR_PTR(-EACCES); } +struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +{ + return lookup_one_len_it(name, base, len, NULL); +} + /* * namei() * @@ -964,11 +1020,12 @@ * that namei follows links, while lnamei does not. * SMP-safe */ -int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +int __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) { char *tmp = getname(name); int err = PTR_ERR(tmp); + if (!IS_ERR(tmp)) { err = path_lookup(tmp, flags, nd); putname(tmp); @@ -976,6 +1033,12 @@ return err; } +int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +{ + intent_init(&nd->intent, IT_LOOKUP); + return __user_walk_it(name, flags, nd); +} + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. @@ -1248,8 +1311,8 @@ acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; + nd->intent.it_flags = flag; + nd->intent.it_create_mode = mode; /* * The simplest case - just a plain lookup. @@ -1265,6 +1328,7 @@ /* * Create - we need to know the parent. */ + nd->intent.it_op |= IT_CREAT; error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; @@ -1281,7 +1345,9 @@ dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; do_last: error = PTR_ERR(dentry); @@ -1386,7 +1452,9 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; putname(nd->last.name); goto do_last; } @@ -2148,7 +2216,9 @@ __vfs_follow_link(struct nameidata *nd, const char *link) { int res = 0; + struct lookup_intent it = nd->intent; char *name; + if (IS_ERR(link)) goto fail; @@ -2158,6 +2228,10 @@ /* weird __emul_prefix() stuff did it */ goto out; } + + intent_init(&nd->intent, it.it_op); + nd->intent.it_flags = it.it_flags; + nd->intent.it_create_mode = it.it_create_mode; res = link_path_walk(link, nd); out: if (current->link_count || res || nd->last_type!=LAST_NORM) Index: linux-2.6.0-test6/fs/namespace.c =================================================================== --- linux-2.6.0-test6.orig/fs/namespace.c 2003-09-28 08:50:31.000000000 +0800 +++ linux-2.6.0-test6/fs/namespace.c 2003-10-07 15:33:15.000000000 +0800 @@ -738,6 +738,7 @@ int retval = 0; int mnt_flags = 0; + intent_init(&nd.intent, IT_LOOKUP); /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; @@ -947,6 +948,7 @@ mntput(old_pwdmnt); } } +EXPORT_SYMBOL(set_fs_pwd); static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) { Index: linux-2.6.0-test6/fs/open.c =================================================================== --- linux-2.6.0-test6.orig/fs/open.c 2003-10-07 15:14:14.000000000 +0800 +++ linux-2.6.0-test6/fs/open.c 2003-10-07 15:33:15.000000000 +0800 @@ -202,7 +202,7 @@ struct nameidata nd; struct inode * inode; int error; - + intent_init(&nd.intent, IT_GETATTR); error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; @@ -461,6 +461,7 @@ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; + intent_init(&nd.intent, IT_GETATTR); if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; @@ -492,6 +493,7 @@ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; + path_release(&nd); } @@ -506,6 +508,7 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); if (error) @@ -557,6 +560,7 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); if (error) @@ -629,7 +633,7 @@ error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; - + error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; @@ -737,25 +741,8 @@ * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode) -{ - int namei_flags, error; - struct nameidata nd; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) - namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = open_namei(filename, namei_flags, mode, &nd); - if (!error) - return dentry_open(nd.dentry, nd.mnt, flags); - - return ERR_PTR(error); -} - -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, + struct lookup_intent *it) { struct file * f; struct inode *inode; @@ -767,6 +754,7 @@ goto cleanup_dentry; f->f_flags = flags; f->f_mode = (flags+1) & O_ACCMODE; + f->f_it = it; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); @@ -786,6 +774,7 @@ error = f->f_op->open(inode,f); if (error) goto cleanup_all; + intent_release(it); } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); @@ -810,11 +799,42 @@ cleanup_file: put_filp(f); cleanup_dentry: + intent_release(it); dput(dentry); mntput(mnt); return ERR_PTR(error); } +struct file *filp_open(const char * filename, int flags, int mode) +{ + int namei_flags, error; + struct file * temp_filp; + struct nameidata nd; + intent_init(&nd.intent, IT_OPEN); + + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; + if (namei_flags & O_TRUNC) + namei_flags |= 2; + + error = open_namei(filename, namei_flags, mode, &nd); + if (!error) { + temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); + return temp_filp; + } + return ERR_PTR(error); +} + + +struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +{ + struct lookup_intent it; + intent_init(&it, IT_LOOKUP); + + return dentry_open_it(dentry, mnt, flags, &it); +} + /* * Find an empty file descriptor entry, and mark it busy. */ Index: linux-2.6.0-test6/fs/stat.c =================================================================== --- linux-2.6.0-test6.orig/fs/stat.c 2003-09-28 08:50:10.000000000 +0800 +++ linux-2.6.0-test6/fs/stat.c 2003-10-07 15:33:15.000000000 +0800 @@ -33,7 +33,7 @@ stat->blksize = inode->i_blksize; } -int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) { struct inode *inode = dentry->d_inode; int retval; @@ -44,6 +44,8 @@ if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); + if (inode->i_op->getattr_it) + return inode->i_op->getattr_it(mnt, dentry, it, stat); generic_fillattr(inode, stat); if (!stat->blksize) { @@ -56,14 +58,20 @@ return 0; } +int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + return vfs_getattr_it(mnt, dentry, NULL, stat); +} + int vfs_stat(char __user *name, struct kstat *stat) { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk(name, &nd); + error = user_path_walk_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -73,10 +81,11 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk_link(name, &nd); + error = user_path_walk_link_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -86,9 +95,12 @@ { struct file *f = fget(fd); int error = -EBADF; + struct nameidata nd; + intent_init(&nd.intent, IT_GETATTR); if (f) { - error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); + error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); + intent_release(&nd.intent); fput(f); } return error; Index: linux-2.6.0-test6/include/linux/dcache.h =================================================================== --- linux-2.6.0-test6.orig/include/linux/dcache.h 2003-09-28 08:51:16.000000000 +0800 +++ linux-2.6.0-test6/include/linux/dcache.h 2003-10-07 15:33:15.000000000 +0800 @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -35,6 +36,8 @@ char name_str[0]; }; +#include + struct dentry_stat_t { int nr_dentry; int nr_unused; Index: linux-2.6.0-test6/include/linux/fs.h =================================================================== --- linux-2.6.0-test6.orig/include/linux/fs.h 2003-10-07 15:14:15.000000000 +0800 +++ linux-2.6.0-test6/include/linux/fs.h 2003-10-07 15:34:10.000000000 +0800 @@ -243,6 +243,8 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 +#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ /* * This is the Inode Attributes structure, used for notify_change(). It @@ -402,6 +404,7 @@ struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; + void *i_filterdata; unsigned long i_dnotify_mask; /* Directory notify events */ struct dnotify_struct *i_dnotify; /* for directory notifications */ @@ -530,6 +533,7 @@ struct list_head f_ep_links; spinlock_t f_ep_lock; struct address_space *f_mapping; + struct lookup_intent *f_it; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); @@ -839,7 +843,9 @@ void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); + int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); @@ -1052,6 +1058,7 @@ extern int unregister_filesystem(struct file_system_type *); extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount(struct vfsmount *); +struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); @@ -1119,6 +1126,7 @@ extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); Index: linux-2.6.0-test6/include/linux/namei.h =================================================================== --- linux-2.6.0-test6.orig/include/linux/namei.h 2003-09-28 08:50:15.000000000 +0800 +++ linux-2.6.0-test6/include/linux/namei.h 2003-10-07 15:33:15.000000000 +0800 @@ -2,25 +2,55 @@ #define _LINUX_NAMEI_H #include +#include struct vfsmount; +struct nameidata; -struct open_intent { - int flags; - int create_mode; +/* intent opcodes */ +#define IT_OPEN (1) +#define IT_CREAT (1<<1) +#define IT_READDIR (1<<2) +#define IT_GETATTR (1<<3) +#define IT_LOOKUP (1<<4) +#define IT_UNLINK (1<<5) +#define IT_TRUNC (1<<6) +#define IT_GETXATTR (1<<7) + +struct lustre_intent_data { + int it_disposition; + int it_status; + __u64 it_lock_handle; + void *it_data; + int it_lock_mode; }; +#define INTENT_MAGIC 0x19620323 +struct lookup_intent { + int it_magic; + void (*it_op_release)(struct lookup_intent *); + int it_op; + int it_flags; + int it_create_mode; + union { + struct lustre_intent_data lustre; + } d; +}; + +static inline void intent_init(struct lookup_intent *it, int op) +{ + memset(it, 0, sizeof(*it)); + it->it_magic = INTENT_MAGIC; + it->it_op = op; +} + struct nameidata { struct dentry *dentry; struct vfsmount *mnt; struct qstr last; unsigned int flags; int last_type; - - /* Intent data */ - union { - struct open_intent open; - } intent; + struct lookup_intent intent; }; /* @@ -41,6 +71,9 @@ #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 +#define LOOKUP_LAST (1<<6) +#define LOOKUP_LINK_NOTLAST (1<<7) + /* * Intent data */ @@ -49,6 +82,12 @@ #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); +#define user_path_walk_it(name,nd) \ + __user_walk_it(name, LOOKUP_FOLLOW, nd) +#define user_path_walk_link_it(name,nd) \ + __user_walk_it(name, 0, nd) +extern void intent_release(struct lookup_intent *); #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ @@ -60,7 +99,6 @@ extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - extern int follow_down(struct vfsmount **, struct dentry **); extern int follow_up(struct vfsmount **, struct dentry **); Index: linux-2.6.0-test6/kernel/ksyms.c =================================================================== --- linux-2.6.0-test6.orig/kernel/ksyms.c 2003-10-07 15:21:57.000000000 +0800 +++ linux-2.6.0-test6/kernel/ksyms.c 2003-10-07 15:33:15.000000000 +0800 @@ -220,11 +220,18 @@ EXPORT_SYMBOL(unregister_filesystem); EXPORT_SYMBOL(__mntput); EXPORT_SYMBOL(may_umount); +EXPORT_SYMBOL(reparent_to_init); /* interrupt handling */ EXPORT_SYMBOL(request_irq); EXPORT_SYMBOL(free_irq); +/* lustre */ +EXPORT_SYMBOL(do_kern_mount); +EXPORT_SYMBOL(exit_files); +//EXPORT_SYMBOL(kmem_cache_validate); + + /* waitqueue handling */ EXPORT_SYMBOL(add_wait_queue); EXPORT_SYMBOL(add_wait_queue_exclusive); Index: linux-2.6.0-test6/fs/nfs/dir.c =================================================================== --- linux-2.6.0-test6.orig/fs/nfs/dir.c 2003-09-28 08:50:20.000000000 +0800 +++ linux-2.6.0-test6/fs/nfs/dir.c 2003-10-07 15:33:15.000000000 +0800 @@ -652,7 +652,7 @@ return 0; if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) return 0; - return (nd->intent.open.flags & O_EXCL) != 0; + return (nd->intent.it_flags & O_EXCL) != 0; } static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) @@ -825,7 +825,7 @@ attr.ia_valid = ATTR_MODE; if (nd && (nd->flags & LOOKUP_CREATE)) - open_flags = nd->intent.open.flags; + open_flags = nd->intent.it_flags; /* * The 0 argument passed into the create function should one day Index: linux-2.6.0-test6/fs/inode.c =================================================================== --- linux-2.6.0-test6.orig/fs/inode.c 2003-10-07 15:14:14.000000000 +0800 +++ linux-2.6.0-test6/fs/inode.c 2003-10-07 15:38:08.000000000 +0800 @@ -224,6 +224,7 @@ inodes_stat.nr_unused--; } +EXPORT_SYMBOL(__iget); /** * clear_inode - clear an inode * @inode: inode to clear