--- linux-2.6.12.6.orig/fs/exec.c +++ linux-2.6.12.6/fs/exec.c @@ -122,9 +122,10 @@ struct file * file; struct nameidata nd; int error; + intent_init(&nd.intent, IT_OPEN); - nd.intent.open.flags = FMODE_READ; - error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + nd.intent.it_flags = FMODE_READ|FMODE_EXEC; + error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -136,7 +137,7 @@ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -492,8 +493,9 @@ int err; struct file *file; - nd.intent.open.flags = FMODE_READ; - err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + intent_init(&nd.intent, IT_OPEN); + nd.intent.it_flags = FMODE_READ|FMODE_EXEC; + err = path_lookup(name, LOOKUP_FOLLOW, &nd); file = ERR_PTR(err); if (!err) { @@ -506,7 +508,7 @@ err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { --- linux-2.6.12.6.orig/fs/namei.c +++ linux-2.6.12.6/fs/namei.c @@ -301,8 +301,19 @@ return 0; } +void intent_release(struct lookup_intent *it) +{ + if (!it) + return; + if (it->it_magic != INTENT_MAGIC) + return; + if (it->it_op_release) + it->it_op_release(it); +} + void path_release(struct nameidata *nd) { + intent_release(&nd->intent); dput(nd->dentry); mntput(nd->mnt); } @@ -392,8 +403,11 @@ { struct dentry * result; struct inode *dir = parent->d_inode; + int counter = 0; down(&dir->i_sem); +again: + counter++; /* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. @@ -427,13 +441,16 @@ * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ - up(&dir->i_sem); if (result->d_op && result->d_op->d_revalidate) { if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { dput(result); - result = ERR_PTR(-ENOENT); + if (counter > 10) + result = ERR_PTR(-ESTALE); + if (!IS_ERR(result)) + goto again; } } + up(&dir->i_sem); return result; } @@ -462,6 +479,7 @@ { int res = 0; char *name; + if (IS_ERR(link)) goto fail; @@ -471,6 +489,7 @@ /* weird __emul_prefix() stuff did it */ goto out; } + intent_reset_fs_part(&nd->intent); res = link_path_walk(link, nd); out: if (nd->depth || res || nd->last_type!=LAST_NORM) @@ -703,6 +722,33 @@ return PTR_ERR(dentry); } +static int revalidate_special(struct nameidata *nd) +{ + struct dentry *dentry = nd->dentry; + int err, counter = 0; + + revalidate_again: + if (!dentry->d_op || !dentry->d_op->d_revalidate) + return 0; + if (!dentry->d_op->d_revalidate(dentry, nd)) { + struct dentry *new; + if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd))) + return err; + new = real_lookup(dentry->d_parent, &dentry->d_name, nd); + if (IS_ERR(new)) + return PTR_ERR(new); + d_invalidate(dentry); + dput(dentry); + nd->dentry = dentry = new; + counter++; + if (counter < 10) + goto revalidate_again; + printk("excessive revalidate_it loops\n"); + return -ESTALE; + } + return 0; +} + /* * Name resolution. * This is the basic name resolution function, turning a pathname into @@ -800,7 +846,11 @@ goto out_dput; if (inode->i_op->follow_link) { + int save_flags = nd->flags; + nd->flags |= LOOKUP_LINK_NOTLAST; err = do_follow_link(&next, nd); + if (!(save_flags & LOOKUP_LINK_NOTLAST)) + nd->flags &= ~LOOKUP_LINK_NOTLAST; if (err) goto return_err; err = -ENOENT; @@ -839,6 +889,23 @@ inode = nd->dentry->d_inode; /* fallthrough */ case 1: + nd->flags |= LOOKUP_LAST; + err = revalidate_special(nd); + nd->flags &= ~LOOKUP_LAST; + if (!nd->dentry->d_inode) + err = -ENOENT; + if (err) { + path_release(nd); + goto return_err; + } + if (lookup_flags & LOOKUP_DIRECTORY) { + err = -ENOTDIR; + if(!nd->dentry->d_inode->i_op || + !nd->dentry->d_inode->i_op->lookup) { + path_release(nd); + goto return_err; + } + } goto return_reval; } if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { @@ -846,7 +913,9 @@ if (err < 0) break; } + nd->flags |= LOOKUP_LAST; err = do_lookup(nd, &this, &next); + nd->flags &= ~LOOKUP_LAST; if (err) break; inode = next.dentry->d_inode; @@ -1097,7 +1166,7 @@ } /* SMP-safe */ -struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) { unsigned long hash; struct qstr this; @@ -1117,11 +1186,16 @@ } this.hash = end_name_hash(hash); - return lookup_hash(&this, base); + return __lookup_hash(&this, base, nd); access: return ERR_PTR(-EACCES); } +struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) +{ + return lookup_one_len_it(name, base, len, NULL); +} + /* * namei() * @@ -1133,7 +1207,7 @@ * that namei follows links, while lnamei does not. * SMP-safe */ -int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) { char *tmp = getname(name); int err = PTR_ERR(tmp); @@ -1145,6 +1219,12 @@ return err; } +int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +{ + intent_init(&nd->intent, IT_LOOKUP); + return __user_walk_it(name, flags, nd); +} + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. @@ -1426,8 +1506,8 @@ acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; + nd->intent.it_flags = flag; + nd->intent.it_create_mode = mode; /* * The simplest case - just a plain lookup. @@ -1442,6 +1522,7 @@ /* * Create - we need to know the parent. */ + nd->intent.it_op |= IT_CREAT; error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); if (error) return error; @@ -1458,7 +1539,9 @@ dir = nd->dentry; nd->flags &= ~LOOKUP_PARENT; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; path.mnt = nd->mnt; do_last: @@ -1564,7 +1647,9 @@ } dir = nd->dentry; down(&dir->d_inode->i_sem); + nd->flags |= LOOKUP_LAST; path.dentry = __lookup_hash(&nd->last, nd->dentry, nd); + nd->flags &= ~LOOKUP_LAST; path.mnt = nd->mnt; putname(nd->last.name); goto do_last; --- linux-2.6.12.6.orig/fs/namespace.c +++ linux-2.6.12.6/fs/namespace.c @@ -62,6 +62,7 @@ INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_lustre_list); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -113,6 +114,7 @@ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) { + memset(old_nd, 0, sizeof(*old_nd)); old_nd->dentry = mnt->mnt_mountpoint; old_nd->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; @@ -176,6 +178,9 @@ { struct super_block *sb = mnt->mnt_sb; dput(mnt->mnt_root); + spin_lock(&dcache_lock); + list_del(&mnt->mnt_lustre_list); + spin_unlock(&dcache_lock); free_vfsmnt(mnt); deactivate_super(sb); } @@ -402,6 +407,8 @@ */ lock_kernel(); + if (sb->s_op->umount_lustre) + sb->s_op->umount_lustre(sb); if( (flags&MNT_FORCE) && sb->s_op->umount_begin) sb->s_op->umount_begin(sb); unlock_kernel(); @@ -627,6 +634,7 @@ return err; if (!old_name || !*old_name) return -EINVAL; + intent_init(&old_nd.intent, IT_LOOKUP); err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; @@ -701,6 +709,7 @@ return -EPERM; if (!old_name || !*old_name) return -EINVAL; + intent_init(&old_nd.intent, IT_LOOKUP); err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); if (err) return err; @@ -1012,6 +1021,7 @@ int retval = 0; int mnt_flags = 0; + intent_init(&nd.intent, IT_LOOKUP); /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; --- linux-2.6.12.6.orig/fs/open.c +++ linux-2.6.12.6/fs/open.c @@ -215,12 +215,12 @@ struct nameidata nd; struct inode * inode; int error; - + intent_init(&nd.intent, IT_GETATTR); error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; - error = user_path_walk(path, &nd); + error = user_path_walk_it(path, &nd); if (error) goto out; inode = nd.dentry->d_inode; @@ -474,6 +474,7 @@ int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; + intent_init(&nd.intent, IT_GETATTR); if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; @@ -498,13 +499,14 @@ else current->cap_effective = current->cap_permitted; - res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + res = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (!res) { res = permission(nd.dentry->d_inode, mode, &nd); /* SuS v2 requires we report a read only fs too */ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; + path_release(&nd); } @@ -519,8 +521,9 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); if (error) goto out; @@ -570,8 +573,9 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); if (error) goto out; @@ -750,27 +754,8 @@ * for the internal routines (ie open_namei()/follow_link() etc). 00 is * used by symlinks. */ -struct file *filp_open(const char * filename, int flags, int mode) -{ - int namei_flags, error; - struct nameidata nd; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) - namei_flags++; - if (namei_flags & O_TRUNC) - namei_flags |= 2; - - error = open_namei(filename, namei_flags, mode, &nd); - if (!error) - return dentry_open(nd.dentry, nd.mnt, flags); - - return ERR_PTR(error); -} - -EXPORT_SYMBOL(filp_open); - -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, + struct lookup_intent *it) { struct file * f; struct inode *inode; @@ -782,6 +767,7 @@ goto cleanup_dentry; f->f_flags = flags; f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; + f->f_it = it; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); @@ -800,6 +786,7 @@ error = f->f_op->open(inode,f); if (error) goto cleanup_all; + intent_release(it); } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); @@ -825,6 +812,7 @@ cleanup_file: put_filp(f); cleanup_dentry: + intent_release(it); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -832,6 +820,36 @@ EXPORT_SYMBOL(dentry_open); +struct file *filp_open(const char * filename, int flags, int mode) +{ + int namei_flags, error; + struct file * temp_filp; + struct nameidata nd; + intent_init(&nd.intent, IT_OPEN); + + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; + if (namei_flags & O_TRUNC) + namei_flags |= 2; + + error = open_namei(filename, namei_flags, mode, &nd); + if (!error) { + temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); + return temp_filp; + } + return ERR_PTR(error); +} + + +struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +{ + struct lookup_intent it; + intent_init(&it, IT_LOOKUP); + + return dentry_open_it(dentry, mnt, flags, &it); +} + /* * Find an empty file descriptor entry, and mark it busy. */ --- linux-2.6.12.6.orig/fs/stat.c +++ linux-2.6.12.6/fs/stat.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(generic_fillattr); -int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) { struct inode *inode = dentry->d_inode; int retval; @@ -47,6 +47,8 @@ if (retval) return retval; + if (inode->i_op->getattr_it) + return inode->i_op->getattr_it(mnt, dentry, it, stat); if (inode->i_op->getattr) return inode->i_op->getattr(mnt, dentry, stat); @@ -63,14 +65,20 @@ EXPORT_SYMBOL(vfs_getattr); +int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + return vfs_getattr_it(mnt, dentry, NULL, stat); +} + int vfs_stat(char __user *name, struct kstat *stat) { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk(name, &nd); + error = user_path_walk_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -82,10 +90,11 @@ { struct nameidata nd; int error; + intent_init(&nd.intent, IT_GETATTR); - error = user_path_walk_link(name, &nd); + error = user_path_walk_link_it(name, &nd); if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); + error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); path_release(&nd); } return error; @@ -97,9 +106,12 @@ { struct file *f = fget(fd); int error = -EBADF; + struct nameidata nd; + intent_init(&nd.intent, IT_GETATTR); if (f) { - error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); + error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); + intent_release(&nd.intent); fput(f); } return error; --- linux-2.6.12.6.orig/fs/inode.c +++ linux-2.6.12.6/fs/inode.c @@ -230,6 +230,7 @@ inodes_stat.nr_unused--; } +EXPORT_SYMBOL(__iget); /** * clear_inode - clear an inode * @inode: inode to clear --- linux-2.6.12.6.orig/include/linux/dcache.h +++ linux-2.6.12.6/include/linux/dcache.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -37,6 +38,8 @@ const unsigned char *name; }; +#include + struct dentry_stat_t { int nr_dentry; int nr_unused; --- linux-2.6.12.6.orig/include/linux/fs.h +++ linux-2.6.12.6/include/linux/fs.h @@ -58,6 +58,7 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 +#define FMODE_EXEC 4 /* Internal kernel extensions */ #define FMODE_LSEEK 4 @@ -260,6 +261,8 @@ #define ATTR_ATTR_FLAG 1024 #define ATTR_KILL_SUID 2048 #define ATTR_KILL_SGID 4096 +#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ /* * This is the Inode Attributes structure, used for notify_change(). It @@ -463,6 +466,7 @@ struct block_device *i_bdev; struct cdev *i_cdev; int i_cindex; + void *i_filterdata; __u32 i_generation; @@ -600,6 +604,7 @@ spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + struct lookup_intent *f_it; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); @@ -968,7 +973,9 @@ void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); + int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); @@ -1008,6 +1015,7 @@ int (*remount_fs) (struct super_block *, int *, char *); void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); + void (*umount_lustre) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); @@ -1210,6 +1218,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); +struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); extern long do_mount(char *, char *, char *, unsigned long, void *); extern int vfs_statfs(struct super_block *, struct kstatfs *); @@ -1262,6 +1271,7 @@ extern int do_truncate(struct dentry *, loff_t start); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); --- linux-2.6.12.6.orig/include/linux/namei.h +++ linux-2.6.12.6/include/linux/namei.h @@ -2,14 +2,55 @@ #define _LINUX_NAMEI_H #include +#include struct vfsmount; +struct nameidata; -struct open_intent { - int flags; - int create_mode; +/* intent opcodes */ +#define IT_OPEN (1) +#define IT_CREAT (1<<1) +#define IT_READDIR (1<<2) +#define IT_GETATTR (1<<3) +#define IT_LOOKUP (1<<4) +#define IT_UNLINK (1<<5) +#define IT_TRUNC (1<<6) +#define IT_GETXATTR (1<<7) + +struct lustre_intent_data { + int it_disposition; + int it_status; + __u64 it_lock_handle; + void *it_data; + int it_lock_mode; }; +#define INTENT_MAGIC 0x19620323 +struct lookup_intent { + int it_magic; + void (*it_op_release)(struct lookup_intent *); + int it_op; + int it_flags; + int it_create_mode; + union { + struct lustre_intent_data lustre; + } d; +}; + +static inline void intent_reset_fs_part(struct lookup_intent *it) +{ + memset(&it->d, 0, sizeof(it->d)); + it->it_magic = INTENT_MAGIC; + it->it_op_release = NULL; +} + +static inline void intent_init(struct lookup_intent *it, int op) +{ + memset(it, 0, sizeof(*it)); + it->it_magic = INTENT_MAGIC; + it->it_op = op; +} + enum { MAX_NESTED_LINKS = 5 }; struct nameidata { @@ -21,10 +62,7 @@ unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; - /* Intent data */ - union { - struct open_intent open; - } intent; + struct lookup_intent intent; }; /* @@ -47,6 +85,8 @@ #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 #define LOOKUP_REVAL 64 +#define LOOKUP_LAST (0x1000) +#define LOOKUP_LINK_NOTLAST (0x2000) /* * Intent data */ @@ -55,6 +95,12 @@ #define LOOKUP_ACCESS (0x0400) extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); +extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); +#define user_path_walk_it(name,nd) \ + __user_walk_it(name, LOOKUP_FOLLOW, nd) +#define user_path_walk_link_it(name,nd) \ + __user_walk_it(name, 0, nd) +extern void intent_release(struct lookup_intent *); #define user_path_walk(name,nd) \ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ @@ -67,7 +113,6 @@ extern struct dentry * lookup_one_len(const char *, struct dentry *, int); extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - extern int follow_down(struct vfsmount **, struct dentry **); extern int follow_up(struct vfsmount **, struct dentry **); --- linux-2.6.12.6.orig/include/linux/mount.h +++ linux-2.6.12.6/include/linux/mount.h @@ -36,6 +36,8 @@ struct list_head mnt_list; struct list_head mnt_fslink; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ + struct list_head mnt_lustre_list; /* GNS mount list */ + unsigned long mnt_last_used; /* for GNS auto-umount (jiffies) */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt)