From 3316bfb4720d683c9f10783469d40a3de4a3f6c1 Mon Sep 17 00:00:00 2001 From: green Date: Fri, 25 Feb 2005 13:18:45 +0000 Subject: [PATCH] Updates kernel patcheds for rhel4 kernel (2.6-rhel4 serie). Kernel compiles ok. Starts updating ldiskfs serie for it too, extents and mballoc2 are remain to be done for now --- .../patches/export-ext3-2.6-rhel4.patch | 33 + .../patches/ext3-wantedi-2.6-rhel4.patch | 177 +++++ .../kernel_patches/patches/iopen-2.6-rhel4.patch | 467 ++++++++++++ .../patches/export-ext3-2.6-rhel4.patch | 33 + .../patches/ext3-include-fixes-2.6-rhel4.diff | 20 + .../patches/ext3-wantedi-2.6-rhel4.patch | 177 +++++ .../kernel_patches/patches/iopen-2.6-rhel4.patch | 467 ++++++++++++ .../patches/lookup_bdev_init_intent.patch | 12 + .../patches/nfs-cifs-intent-2.6-rhel4.patch | 117 +++ .../patches/vfs_intent-2.6-rhel4.patch | 826 +++++++++++++++++++++ .../patches/vfs_nointent-2.6-vanilla.patch | 509 +++++++++++++ .../patches/vfs_races-2.6-vanilla.patch | 65 ++ lustre/kernel_patches/series/2.6-rhel4.series | 12 + 13 files changed, 2915 insertions(+) create mode 100644 ldiskfs/kernel_patches/patches/export-ext3-2.6-rhel4.patch create mode 100644 ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch create mode 100644 ldiskfs/kernel_patches/patches/iopen-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.diff create mode 100644 lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/iopen-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/lookup_bdev_init_intent.patch create mode 100644 lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch create mode 100644 lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch create mode 100644 lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch create mode 100644 lustre/kernel_patches/series/2.6-rhel4.series diff --git a/ldiskfs/kernel_patches/patches/export-ext3-2.6-rhel4.patch b/ldiskfs/kernel_patches/patches/export-ext3-2.6-rhel4.patch new file mode 100644 index 0000000..7b4f0c8 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/export-ext3-2.6-rhel4.patch @@ -0,0 +1,33 @@ +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:50:46.077845320 +0200 ++++ linux-stage/fs/ext3/super.c 2005-02-25 14:51:32.241827328 +0200 +@@ -123,6 +123,8 @@ + journal_abort_handle(handle); + } + ++EXPORT_SYMBOL(ext3_journal_abort_handle); ++ + /* Deal with the reporting of failure conditions on a filesystem such as + * inconsistencies detected or read IO failures. + * +@@ -2002,6 +2004,8 @@ + return ret; + } + ++EXPORT_SYMBOL(ext3_force_commit); ++ + /* + * Ext3 always journals updates to the superblock itself, so we don't + * have to propagate any other updates to the superblock on disk at this +@@ -2433,6 +2437,10 @@ + unsigned long *blocks, int *created, int create); + EXPORT_SYMBOL(ext3_map_inode_page); + ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_set_handle); ++EXPORT_SYMBOL(ext3_bread); ++ + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); diff --git a/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch new file mode 100644 index 0000000..1c5c6ab --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch @@ -0,0 +1,177 @@ + fs/ext3/ialloc.c | 35 ++++++++++++++++++++++++++++++++++- + fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ + fs/ext3/namei.c | 21 +++++++++++++++++---- + include/linux/dcache.h | 5 +++++ + include/linux/ext3_fs.h | 5 ++++- + 5 files changed, 85 insertions(+), 6 deletions(-) + +Index: uml-2.6.3/fs/ext3/ialloc.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/ialloc.c 2004-02-20 15:00:48.000000000 +0800 ++++ uml-2.6.3/fs/ext3/ialloc.c 2004-02-21 00:24:45.202693776 +0800 +@@ -420,7 +420,8 @@ + * For other inodes, search forward from the parent directory's block + * group to find a free inode. + */ +-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ++struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, ++ unsigned long goal) + { + struct super_block *sb; + struct buffer_head *bitmap_bh = NULL; +@@ -448,6 +449,38 @@ + + sbi = EXT3_SB(sb); + es = sbi->s_es; ++ if (goal) { ++ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); ++ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); ++ gdp = ext3_get_group_desc(sb, group, &bh2); ++ ++ err = -EIO; ++ bitmap_bh = read_inode_bitmap (sb, group); ++ if (!bitmap_bh) ++ goto fail; ++ ++ BUFFER_TRACE(bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) goto fail; ++ ++ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), ++ ino, bitmap_bh->b_data)) { ++ printk(KERN_ERR "goal inode %lu unavailable\n", goal); ++ /* Oh well, we tried. */ ++ goto continue_allocation; ++ } ++ ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) goto fail; ++ ++ /* We've shortcircuited the allocation system successfully, ++ * now finish filling in the inode. ++ */ ++ goto got; ++ } ++ ++continue_allocation: + if (S_ISDIR(mode)) { + if (test_opt (sb, OLDALLOC)) + group = find_group_dir(sb, dir); +Index: uml-2.6.3/fs/ext3/ioctl.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/ioctl.c 2004-01-09 14:59:26.000000000 +0800 ++++ uml-2.6.3/fs/ext3/ioctl.c 2004-02-21 00:21:04.541239416 +0800 +@@ -24,6 +24,31 @@ + ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { ++ case EXT3_IOC_CREATE_INUM: { ++ char name[32]; ++ struct dentry *dchild, *dparent; ++ int rc = 0; ++ ++ dparent = list_entry(inode->i_dentry.next, struct dentry, ++ d_alias); ++ snprintf(name, sizeof name, "%lu", arg); ++ dchild = lookup_one_len(name, dparent, strlen(name)); ++ if (dchild->d_inode) { ++ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", ++ dparent->d_name.len, dparent->d_name.name, arg, ++ dchild->d_inode->i_ino); ++ rc = -EEXIST; ++ } else { ++ dchild->d_fsdata = (void *)arg; ++ rc = vfs_create(inode, dchild, 0644, NULL); ++ if (rc) ++ printk(KERN_ERR "vfs_create: %d\n", rc); ++ else if (dchild->d_inode->i_ino != arg) ++ rc = -EEXIST; ++ } ++ dput(dchild); ++ return rc; ++ } + case EXT3_IOC_GETFLAGS: + flags = ei->i_flags & EXT3_FL_USER_VISIBLE; + return put_user(flags, (int *) arg); +Index: uml-2.6.3/fs/ext3/namei.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/namei.c 2004-02-20 15:01:27.000000000 +0800 ++++ uml-2.6.3/fs/ext3/namei.c 2004-02-21 00:21:04.611228776 +0800 +@@ -1617,6 +1617,19 @@ + return err; + } + ++static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, ++ int mode, struct dentry *dentry) ++{ ++ unsigned long inum = 0; ++ ++ if (dentry->d_fsdata != NULL) { ++ struct dentry_params *param = ++ (struct dentry_params *) dentry->d_fsdata; ++ inum = param->p_inum; ++ } ++ return ext3_new_inode(handle, dir, mode, inum); ++} ++ + /* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it +@@ -1640,7 +1653,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, mode); ++ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext3_file_inode_operations; +@@ -1670,7 +1683,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, mode); ++ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, inode->i_mode, rdev); +@@ -1702,7 +1715,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); ++ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +@@ -2094,7 +2107,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); ++ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +Index: uml-2.6.3/include/linux/ext3_fs.h +=================================================================== +--- uml-2.6.3.orig/include/linux/ext3_fs.h 2004-01-09 14:59:44.000000000 +0800 ++++ uml-2.6.3/include/linux/ext3_fs.h 2004-02-21 00:21:04.613228472 +0800 +@@ -707,7 +708,8 @@ + dx_hash_info *hinfo); + + /* ialloc.c */ +-extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); ++extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, ++ unsigned long); + extern void ext3_free_inode (handle_t *, struct inode *); + extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); + extern unsigned long ext3_count_free_inodes (struct super_block *); +@@ -792,4 +794,6 @@ + + #endif /* __KERNEL__ */ + ++/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ ++#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) + #endif /* _LINUX_EXT3_FS_H */ diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6-rhel4.patch b/ldiskfs/kernel_patches/patches/iopen-2.6-rhel4.patch new file mode 100644 index 0000000..1547591 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/iopen-2.6-rhel4.patch @@ -0,0 +1,467 @@ +Index: linux-stage/fs/ext3/Makefile +=================================================================== +--- linux-stage.orig/fs/ext3/Makefile 2005-02-25 14:31:53.151076368 +0200 ++++ linux-stage/fs/ext3/Makefile 2005-02-25 14:41:51.259150120 +0200 +@@ -4,7 +4,7 @@ + + obj-$(CONFIG_EXT3_FS) += ext3.o + +-ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\ + ioctl.o namei.o super.o symlink.o hash.o resize.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +Index: linux-stage/fs/ext3/inode.c +=================================================================== +--- linux-stage.orig/fs/ext3/inode.c 2005-02-25 14:37:30.983718000 +0200 ++++ linux-stage/fs/ext3/inode.c 2005-02-25 14:47:42.069818792 +0200 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -2408,6 +2409,8 @@ + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ++ if (ext3_iopen_get_inode(inode)) ++ return; + + if (ext3_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; +Index: linux-stage/fs/ext3/iopen.c +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.c 2005-02-25 14:41:01.017787968 +0200 ++++ linux-stage/fs/ext3/iopen.c 2005-02-25 14:41:01.045783712 +0200 +@@ -0,0 +1,274 @@ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ * ++ * ++ * Invariants: ++ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias ++ * for an inode at one time. ++ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry ++ * aliases on an inode at the same time. ++ * ++ * If we have any connected dentry aliases for an inode, use one of those ++ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED ++ * dentry for this inode, which thereafter will be found by the dcache ++ * when looking up this inode number in __iopen__, so we don't return here ++ * until it is gone. ++ * ++ * If we get an inode via a regular name lookup, then we "rename" the ++ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures ++ * existing users of the disconnected dentry will continue to use the same ++ * dentry as the connected users, and there will never be both kinds of ++ * dentry aliases at one time. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ spin_lock(&alternate->d_lock); ++ alternate->d_flags |= DCACHE_REFERENCED; ++ spin_unlock(&alternate->d_lock); ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++ ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry *dentry, struct dentry *target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++/* This function is spliced into ext3_lookup and does the move of a ++ * disconnected dentry (if it exists) to a connected dentry. ++ */ ++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, ++ int rehash) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* verify this dentry is really new */ ++ assert(dentry->d_inode == NULL); ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ if (rehash) ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ assert(list_empty(&dentry->d_subdirs)); ++ ++ spin_lock(&dcache_lock); ++ if (!inode) ++ goto do_rehash; ++ ++ /* preferrably return a connected dentry */ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) ++ goto do_instantiate; ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~ DCACHE_DISCONNECTED; ++ security_d_instantiate(goal, inode); ++ __d_rehash(dentry, 0); ++ __d_move(goal, dentry); ++ spin_unlock(&dcache_lock); ++ iput(inode); ++ ++ return goal; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++do_instantiate: ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++do_rehash: ++ if (rehash) ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode *inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +Index: linux-stage/fs/ext3/iopen.h +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.h 2005-02-25 14:41:01.017787968 +0200 ++++ linux-stage/fs/ext3/iopen.h 2005-02-25 14:41:01.045783712 +0200 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode *inode); ++extern struct dentry *iopen_connect_dentry(struct dentry *dentry, ++ struct inode *inode, int rehash); +Index: linux-stage/fs/ext3/namei.c +=================================================================== +--- linux-stage.orig/fs/ext3/namei.c 2005-02-25 14:37:28.975023368 +0200 ++++ linux-stage/fs/ext3/namei.c 2005-02-25 14:46:43.090784968 +0200 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -980,6 +981,9 @@ + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -990,10 +994,8 @@ + if (!inode) + return ERR_PTR(-EACCES); + } +- if (inode) +- return d_splice_alias(inode, dentry); +- d_add(dentry, inode); +- return NULL; ++ ++ return iopen_connect_dentry(dentry, inode, 1); + } + + +@@ -2037,10 +2039,6 @@ + inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; +- /* There's no need to set i_disksize: the fact that i_nlink is +- * zero will ensure that the right thing happens during any +- * recovery. */ +- inode->i_size = 0; + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +@@ -2163,6 +2161,23 @@ + return err; + } + ++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ ++static int ext3_add_link(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int err = ext3_add_entry(handle, dentry, inode); ++ if (!err) { ++ err = ext3_mark_inode_dirty(handle, inode); ++ if (err == 0) { ++ dput(iopen_connect_dentry(dentry, inode, 0)); ++ return 0; ++ } ++ } ++ ext3_dec_count(handle, inode); ++ iput(inode); ++ return err; ++} ++ + static int ext3_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) + { +@@ -2186,7 +2201,8 @@ + ext3_inc_count(handle, inode); + atomic_inc(&inode->i_count); + +- err = ext3_add_nondir(handle, dentry, inode); ++ err = ext3_add_link(handle, dentry, inode); ++ ext3_orphan_del(handle,inode); + ext3_journal_stop(handle); + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:37:30.987717392 +0200 ++++ linux-stage/fs/ext3/super.c 2005-02-25 14:44:50.495901992 +0200 +@@ -586,6 +586,7 @@ + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, ++ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + }; + +@@ -633,6 +634,9 @@ + {Opt_ignore, "noquota"}, + {Opt_ignore, "quota"}, + {Opt_ignore, "usrquota"}, ++ {Opt_iopen, "iopen"}, ++ {Opt_noiopen, "noiopen"}, ++ {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_barrier, "barrier=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, +@@ -914,6 +918,18 @@ + else + clear_opt(sbi->s_mount_opt, BARRIER); + break; ++ case Opt_iopen: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_noiopen: ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_iopen_nopriv: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; + case Opt_ignore: + break; + case Opt_resize: +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:37:28.977023064 +0200 ++++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:49:00.569884968 +0200 +@@ -355,6 +355,8 @@ + #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ + #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */ + #define EXT3_MOUNT_RESERVATION 0x20000 /* Preallocation */ ++#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch b/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch new file mode 100644 index 0000000..7b4f0c8 --- /dev/null +++ b/lustre/kernel_patches/patches/export-ext3-2.6-rhel4.patch @@ -0,0 +1,33 @@ +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:50:46.077845320 +0200 ++++ linux-stage/fs/ext3/super.c 2005-02-25 14:51:32.241827328 +0200 +@@ -123,6 +123,8 @@ + journal_abort_handle(handle); + } + ++EXPORT_SYMBOL(ext3_journal_abort_handle); ++ + /* Deal with the reporting of failure conditions on a filesystem such as + * inconsistencies detected or read IO failures. + * +@@ -2002,6 +2004,8 @@ + return ret; + } + ++EXPORT_SYMBOL(ext3_force_commit); ++ + /* + * Ext3 always journals updates to the superblock itself, so we don't + * have to propagate any other updates to the superblock on disk at this +@@ -2433,6 +2437,10 @@ + unsigned long *blocks, int *created, int create); + EXPORT_SYMBOL(ext3_map_inode_page); + ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_set_handle); ++EXPORT_SYMBOL(ext3_bread); ++ + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); diff --git a/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.diff b/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.diff new file mode 100644 index 0000000..49528cf --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.diff @@ -0,0 +1,20 @@ +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:53:56.424908168 +0200 ++++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:53:59.376459464 +0200 +@@ -361,12 +361,13 @@ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ +-#ifndef _LINUX_EXT2_FS_H ++#ifndef clear_opt + #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt + #define set_opt(o, opt) o |= EXT3_MOUNT_##opt + #define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ + EXT3_MOUNT_##opt) +-#else ++#endif ++#ifndef EXT2_MOUNT_NOLOAD + #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD + #define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT + #define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS diff --git a/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch b/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch new file mode 100644 index 0000000..1c5c6ab --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-wantedi-2.6-rhel4.patch @@ -0,0 +1,177 @@ + fs/ext3/ialloc.c | 35 ++++++++++++++++++++++++++++++++++- + fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ + fs/ext3/namei.c | 21 +++++++++++++++++---- + include/linux/dcache.h | 5 +++++ + include/linux/ext3_fs.h | 5 ++++- + 5 files changed, 85 insertions(+), 6 deletions(-) + +Index: uml-2.6.3/fs/ext3/ialloc.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/ialloc.c 2004-02-20 15:00:48.000000000 +0800 ++++ uml-2.6.3/fs/ext3/ialloc.c 2004-02-21 00:24:45.202693776 +0800 +@@ -420,7 +420,8 @@ + * For other inodes, search forward from the parent directory's block + * group to find a free inode. + */ +-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ++struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode, ++ unsigned long goal) + { + struct super_block *sb; + struct buffer_head *bitmap_bh = NULL; +@@ -448,6 +449,38 @@ + + sbi = EXT3_SB(sb); + es = sbi->s_es; ++ if (goal) { ++ group = (goal - 1) / EXT3_INODES_PER_GROUP(sb); ++ ino = (goal - 1) % EXT3_INODES_PER_GROUP(sb); ++ gdp = ext3_get_group_desc(sb, group, &bh2); ++ ++ err = -EIO; ++ bitmap_bh = read_inode_bitmap (sb, group); ++ if (!bitmap_bh) ++ goto fail; ++ ++ BUFFER_TRACE(bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) goto fail; ++ ++ if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), ++ ino, bitmap_bh->b_data)) { ++ printk(KERN_ERR "goal inode %lu unavailable\n", goal); ++ /* Oh well, we tried. */ ++ goto continue_allocation; ++ } ++ ++ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) goto fail; ++ ++ /* We've shortcircuited the allocation system successfully, ++ * now finish filling in the inode. ++ */ ++ goto got; ++ } ++ ++continue_allocation: + if (S_ISDIR(mode)) { + if (test_opt (sb, OLDALLOC)) + group = find_group_dir(sb, dir); +Index: uml-2.6.3/fs/ext3/ioctl.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/ioctl.c 2004-01-09 14:59:26.000000000 +0800 ++++ uml-2.6.3/fs/ext3/ioctl.c 2004-02-21 00:21:04.541239416 +0800 +@@ -24,6 +24,31 @@ + ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); + + switch (cmd) { ++ case EXT3_IOC_CREATE_INUM: { ++ char name[32]; ++ struct dentry *dchild, *dparent; ++ int rc = 0; ++ ++ dparent = list_entry(inode->i_dentry.next, struct dentry, ++ d_alias); ++ snprintf(name, sizeof name, "%lu", arg); ++ dchild = lookup_one_len(name, dparent, strlen(name)); ++ if (dchild->d_inode) { ++ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", ++ dparent->d_name.len, dparent->d_name.name, arg, ++ dchild->d_inode->i_ino); ++ rc = -EEXIST; ++ } else { ++ dchild->d_fsdata = (void *)arg; ++ rc = vfs_create(inode, dchild, 0644, NULL); ++ if (rc) ++ printk(KERN_ERR "vfs_create: %d\n", rc); ++ else if (dchild->d_inode->i_ino != arg) ++ rc = -EEXIST; ++ } ++ dput(dchild); ++ return rc; ++ } + case EXT3_IOC_GETFLAGS: + flags = ei->i_flags & EXT3_FL_USER_VISIBLE; + return put_user(flags, (int *) arg); +Index: uml-2.6.3/fs/ext3/namei.c +=================================================================== +--- uml-2.6.3.orig/fs/ext3/namei.c 2004-02-20 15:01:27.000000000 +0800 ++++ uml-2.6.3/fs/ext3/namei.c 2004-02-21 00:21:04.611228776 +0800 +@@ -1617,6 +1617,19 @@ + return err; + } + ++static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir, ++ int mode, struct dentry *dentry) ++{ ++ unsigned long inum = 0; ++ ++ if (dentry->d_fsdata != NULL) { ++ struct dentry_params *param = ++ (struct dentry_params *) dentry->d_fsdata; ++ inum = param->p_inum; ++ } ++ return ext3_new_inode(handle, dir, mode, inum); ++} ++ + /* + * By the time this is called, we already have created + * the directory cache entry for the new file, but it +@@ -1640,7 +1653,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, mode); ++ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext3_file_inode_operations; +@@ -1670,7 +1683,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, mode); ++ inode = ext3_new_inode_wantedi (handle, dir, mode, dentry); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + init_special_inode(inode, inode->i_mode, rdev); +@@ -1702,7 +1715,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); ++ inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +@@ -2094,7 +2107,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); ++ inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +Index: uml-2.6.3/include/linux/ext3_fs.h +=================================================================== +--- uml-2.6.3.orig/include/linux/ext3_fs.h 2004-01-09 14:59:44.000000000 +0800 ++++ uml-2.6.3/include/linux/ext3_fs.h 2004-02-21 00:21:04.613228472 +0800 +@@ -707,7 +708,8 @@ + dx_hash_info *hinfo); + + /* ialloc.c */ +-extern struct inode * ext3_new_inode (handle_t *, struct inode *, int); ++extern struct inode * ext3_new_inode (handle_t *, struct inode *, int, ++ unsigned long); + extern void ext3_free_inode (handle_t *, struct inode *); + extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); + extern unsigned long ext3_count_free_inodes (struct super_block *); +@@ -792,4 +794,6 @@ + + #endif /* __KERNEL__ */ + ++/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ ++#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) + #endif /* _LINUX_EXT3_FS_H */ diff --git a/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch b/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch new file mode 100644 index 0000000..1547591 --- /dev/null +++ b/lustre/kernel_patches/patches/iopen-2.6-rhel4.patch @@ -0,0 +1,467 @@ +Index: linux-stage/fs/ext3/Makefile +=================================================================== +--- linux-stage.orig/fs/ext3/Makefile 2005-02-25 14:31:53.151076368 +0200 ++++ linux-stage/fs/ext3/Makefile 2005-02-25 14:41:51.259150120 +0200 +@@ -4,7 +4,7 @@ + + obj-$(CONFIG_EXT3_FS) += ext3.o + +-ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ++ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\ + ioctl.o namei.o super.o symlink.o hash.o resize.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +Index: linux-stage/fs/ext3/inode.c +=================================================================== +--- linux-stage.orig/fs/ext3/inode.c 2005-02-25 14:37:30.983718000 +0200 ++++ linux-stage/fs/ext3/inode.c 2005-02-25 14:47:42.069818792 +0200 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -2408,6 +2409,8 @@ + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ++ if (ext3_iopen_get_inode(inode)) ++ return; + + if (ext3_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; +Index: linux-stage/fs/ext3/iopen.c +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.c 2005-02-25 14:41:01.017787968 +0200 ++++ linux-stage/fs/ext3/iopen.c 2005-02-25 14:41:01.045783712 +0200 +@@ -0,0 +1,274 @@ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ * ++ * ++ * Invariants: ++ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias ++ * for an inode at one time. ++ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry ++ * aliases on an inode at the same time. ++ * ++ * If we have any connected dentry aliases for an inode, use one of those ++ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED ++ * dentry for this inode, which thereafter will be found by the dcache ++ * when looking up this inode number in __iopen__, so we don't return here ++ * until it is gone. ++ * ++ * If we get an inode via a regular name lookup, then we "rename" the ++ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures ++ * existing users of the disconnected dentry will continue to use the same ++ * dentry as the connected users, and there will never be both kinds of ++ * dentry aliases at one time. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ spin_lock(&alternate->d_lock); ++ alternate->d_flags |= DCACHE_REFERENCED; ++ spin_unlock(&alternate->d_lock); ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++ ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry *dentry, struct dentry *target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN_MIN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++/* This function is spliced into ext3_lookup and does the move of a ++ * disconnected dentry (if it exists) to a connected dentry. ++ */ ++struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode, ++ int rehash) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* verify this dentry is really new */ ++ assert(dentry->d_inode == NULL); ++ assert(list_empty(&dentry->d_alias)); /* d_instantiate */ ++ if (rehash) ++ assert(d_unhashed(dentry)); /* d_rehash */ ++ assert(list_empty(&dentry->d_subdirs)); ++ ++ spin_lock(&dcache_lock); ++ if (!inode) ++ goto do_rehash; ++ ++ /* preferrably return a connected dentry */ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ ++ if (!goal) ++ goto do_instantiate; ++ ++ /* Move the goal to the de hash queue */ ++ goal->d_flags &= ~ DCACHE_DISCONNECTED; ++ security_d_instantiate(goal, inode); ++ __d_rehash(dentry, 0); ++ __d_move(goal, dentry); ++ spin_unlock(&dcache_lock); ++ iput(inode); ++ ++ return goal; ++ ++ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ ++do_instantiate: ++ list_add(&dentry->d_alias, &inode->i_dentry); /* d_instantiate */ ++ dentry->d_inode = inode; ++do_rehash: ++ if (rehash) ++ __d_rehash(dentry, 0); /* d_rehash */ ++ spin_unlock(&dcache_lock); ++ ++ return NULL; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) ++{ ++ struct inode *inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode *inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +Index: linux-stage/fs/ext3/iopen.h +=================================================================== +--- linux-stage.orig/fs/ext3/iopen.h 2005-02-25 14:41:01.017787968 +0200 ++++ linux-stage/fs/ext3/iopen.h 2005-02-25 14:41:01.045783712 +0200 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode *inode); ++extern struct dentry *iopen_connect_dentry(struct dentry *dentry, ++ struct inode *inode, int rehash); +Index: linux-stage/fs/ext3/namei.c +=================================================================== +--- linux-stage.orig/fs/ext3/namei.c 2005-02-25 14:37:28.975023368 +0200 ++++ linux-stage/fs/ext3/namei.c 2005-02-25 14:46:43.090784968 +0200 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -980,6 +981,9 @@ + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -990,10 +994,8 @@ + if (!inode) + return ERR_PTR(-EACCES); + } +- if (inode) +- return d_splice_alias(inode, dentry); +- d_add(dentry, inode); +- return NULL; ++ ++ return iopen_connect_dentry(dentry, inode, 1); + } + + +@@ -2037,10 +2039,6 @@ + inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; +- /* There's no need to set i_disksize: the fact that i_nlink is +- * zero will ensure that the right thing happens during any +- * recovery. */ +- inode->i_size = 0; + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +@@ -2163,6 +2161,23 @@ + return err; + } + ++/* Like ext3_add_nondir() except for call to iopen_connect_dentry */ ++static int ext3_add_link(handle_t *handle, struct dentry *dentry, ++ struct inode *inode) ++{ ++ int err = ext3_add_entry(handle, dentry, inode); ++ if (!err) { ++ err = ext3_mark_inode_dirty(handle, inode); ++ if (err == 0) { ++ dput(iopen_connect_dentry(dentry, inode, 0)); ++ return 0; ++ } ++ } ++ ext3_dec_count(handle, inode); ++ iput(inode); ++ return err; ++} ++ + static int ext3_link (struct dentry * old_dentry, + struct inode * dir, struct dentry *dentry) + { +@@ -2186,7 +2201,8 @@ + ext3_inc_count(handle, inode); + atomic_inc(&inode->i_count); + +- err = ext3_add_nondir(handle, dentry, inode); ++ err = ext3_add_link(handle, dentry, inode); ++ ext3_orphan_del(handle,inode); + ext3_journal_stop(handle); + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2005-02-25 14:37:30.987717392 +0200 ++++ linux-stage/fs/ext3/super.c 2005-02-25 14:44:50.495901992 +0200 +@@ -586,6 +586,7 @@ + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, ++ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + }; + +@@ -633,6 +634,9 @@ + {Opt_ignore, "noquota"}, + {Opt_ignore, "quota"}, + {Opt_ignore, "usrquota"}, ++ {Opt_iopen, "iopen"}, ++ {Opt_noiopen, "noiopen"}, ++ {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_barrier, "barrier=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, +@@ -914,6 +918,18 @@ + else + clear_opt(sbi->s_mount_opt, BARRIER); + break; ++ case Opt_iopen: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_noiopen: ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_iopen_nopriv: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; + case Opt_ignore: + break; + case Opt_resize: +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2005-02-25 14:37:28.977023064 +0200 ++++ linux-stage/include/linux/ext3_fs.h 2005-02-25 14:49:00.569884968 +0200 +@@ -355,6 +355,8 @@ + #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ + #define EXT3_MOUNT_BARRIER 0x10000 /* Use block barriers */ + #define EXT3_MOUNT_RESERVATION 0x20000 /* Preallocation */ ++#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch b/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch new file mode 100644 index 0000000..5555427 --- /dev/null +++ b/lustre/kernel_patches/patches/lookup_bdev_init_intent.patch @@ -0,0 +1,12 @@ +Index: linux-2.6.7/fs/block_dev.c +=================================================================== +--- linux-2.6.7.orig/fs/block_dev.c 2004-06-16 13:20:26.000000000 +0800 ++++ linux-2.6.7/fs/block_dev.c 2004-08-30 17:36:57.000000000 +0800 +@@ -832,6 +832,7 @@ + if (!path || !*path) + return ERR_PTR(-EINVAL); + ++ intent_init(&nd.intent, IT_LOOKUP); + error = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (error) + return ERR_PTR(error); diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch new file mode 100644 index 0000000..9c66a2a --- /dev/null +++ b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch @@ -0,0 +1,117 @@ +Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/nfs/dir.c 2005-02-25 13:43:42.454529040 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/nfs/dir.c 2005-02-25 13:46:04.832884240 +0200 +@@ -791,7 +791,7 @@ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -812,7 +812,7 @@ + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -820,7 +820,7 @@ + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -836,7 +836,7 @@ + break; + /* This turned out not to be a regular file */ + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ +@@ -875,7 +875,7 @@ + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; +Index: linux-2.6.9-5.0.3.EL/fs/nfs/nfs4proc.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/nfs/nfs4proc.c 2004-10-19 00:54:40.000000000 +0300 ++++ linux-2.6.9-5.0.3.EL/fs/nfs/nfs4proc.c 2005-02-25 13:44:27.537675360 +0200 +@@ -775,17 +775,17 @@ + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; +Index: linux-2.6.9-5.0.3.EL/fs/cifs/dir.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/cifs/dir.c 2004-10-19 00:54:37.000000000 +0300 ++++ linux-2.6.9-5.0.3.EL/fs/cifs/dir.c 2005-02-25 13:44:27.539675056 +0200 +@@ -199,23 +199,23 @@ + } + + if(nd) { +- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) ++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) + desiredAccess = GENERIC_READ; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) { + desiredAccess = GENERIC_WRITE; + write_only = TRUE; +- } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { ++ } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request */ + /* can cause unnecessary access denied on create */ + /* desiredAccess = GENERIC_ALL; */ + desiredAccess = GENERIC_READ | GENERIC_WRITE; + } + +- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; +- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) ++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; +- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) ++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else { + cFYI(1,("Create flag not set in create function")); +@@ -400,7 +400,7 @@ + parent_dir_inode, direntry->d_name.name, direntry)); + + if(nd) { /* BB removeme */ +- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags)); ++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags)); + } /* BB removeme BB */ + /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ + diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch new file mode 100644 index 0000000..d0aaa51 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch @@ -0,0 +1,826 @@ +Index: linux-2.6.9-5.0.3.EL/fs/exec.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/exec.c 2005-02-25 13:43:02.688574384 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/exec.c 2005-02-25 13:43:42.442530864 +0200 +@@ -124,9 +124,10 @@ + struct file * file; + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_OPEN); + +- nd.intent.open.flags = FMODE_READ; +- error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; ++ error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + if (error) + goto out; + +@@ -138,7 +139,7 @@ + if (error) + goto exit; + +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; +@@ -487,8 +488,9 @@ + int err; + struct file *file; + +- nd.intent.open.flags = FMODE_READ; +- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ intent_init(&nd.intent, IT_OPEN); ++ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); + file = ERR_PTR(err); + + if (!err) { +@@ -501,7 +503,7 @@ + err = -EACCES; + file = ERR_PTR(err); + if (!err) { +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { +Index: linux-2.6.9-5.0.3.EL/fs/namei.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/namei.c 2005-02-25 13:43:02.692573776 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/namei.c 2005-02-25 13:43:42.446530256 +0200 +@@ -272,8 +272,19 @@ + return 0; + } + ++void intent_release(struct lookup_intent *it) ++{ ++ if (!it) ++ return; ++ if (it->it_magic != INTENT_MAGIC) ++ return; ++ if (it->it_op_release) ++ it->it_op_release(it); ++} ++ + void path_release(struct nameidata *nd) + { ++ intent_release(&nd->intent); + dput(nd->dentry); + mntput(nd->mnt); + } +@@ -363,7 +374,10 @@ + { + struct dentry * result; + struct inode *dir = parent->d_inode; ++ int counter = 0; + ++again: ++ counter++; + down(&dir->i_sem); + /* + * First re-do the cached lookup just in case it was created +@@ -402,7 +416,10 @@ + if (result->d_op && result->d_op->d_revalidate) { + if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { + dput(result); +- result = ERR_PTR(-ENOENT); ++ if (counter > 10) ++ result = ERR_PTR(-ESTALE); ++ if (!IS_ERR(result)) ++ goto again; + } + } + return result; +@@ -432,7 +449,9 @@ + static inline int __vfs_follow_link(struct nameidata *nd, const char *link) + { + int res = 0; ++ struct lookup_intent it = nd->intent; + char *name; ++ + if (IS_ERR(link)) + goto fail; + +@@ -442,6 +461,9 @@ + /* weird __emul_prefix() stuff did it */ + goto out; + } ++ intent_init(&nd->intent, it.it_op); ++ nd->intent.it_flags = it.it_flags; ++ nd->intent.it_create_mode = it.it_create_mode; + res = link_path_walk(link, nd); + out: + if (nd->depth || res || nd->last_type!=LAST_NORM) +@@ -650,6 +672,33 @@ + return PTR_ERR(dentry); + } + ++static int revalidate_special(struct nameidata *nd) ++{ ++ struct dentry *dentry = nd->dentry; ++ int err, counter = 0; ++ ++ if (!dentry->d_op || !dentry->d_op->d_revalidate) ++ return 0; ++ revalidate_again: ++ if (!dentry->d_op->d_revalidate(dentry, nd)) { ++ struct dentry *new; ++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd))) ++ return err; ++ new = real_lookup(dentry->d_parent, &dentry->d_name, nd); ++ if (IS_ERR(new)) ++ return PTR_ERR(new); ++ d_invalidate(dentry); ++ dput(dentry); ++ nd->dentry = dentry = new; ++ counter++; ++ if (counter < 10) ++ goto revalidate_again; ++ printk("excessive revalidate_it loops\n"); ++ return -ESTALE; ++ } ++ return 0; ++} ++ + /* + * Name resolution. + * +@@ -752,7 +801,9 @@ + + if (inode->i_op->follow_link) { + mntget(next.mnt); ++ nd->flags |= LOOKUP_LINK_NOTLAST; + err = do_follow_link(next.dentry, nd); ++ nd->flags &= ~LOOKUP_LINK_NOTLAST; + dput(next.dentry); + mntput(next.mnt); + if (err) +@@ -791,14 +842,26 @@ + inode = nd->dentry->d_inode; + /* fallthrough */ + case 1: ++ nd->flags |= LOOKUP_LAST; ++ err = revalidate_special(nd); ++ nd->flags &= ~LOOKUP_LAST; ++ if (!nd->dentry->d_inode) ++ err = -ENOENT; ++ if (err) { ++ path_release(nd); ++ goto return_err; ++ } + goto return_reval; + } ++ + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { + err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (err < 0) + break; + } ++ nd->flags |= LOOKUP_LAST; + err = do_lookup(nd, &this, &next, atomic); ++ nd->flags &= ~LOOKUP_LAST; + if (err) + break; + follow_mount(&next.mnt, &next.dentry); +@@ -1016,7 +1079,7 @@ + } + + /* SMP-safe */ +-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) + { + unsigned long hash; + struct qstr this; +@@ -1036,11 +1099,16 @@ + } + this.hash = end_name_hash(hash); + +- return lookup_hash(&this, base); ++ return __lookup_hash(&this, base, nd); + access: + return ERR_PTR(-EACCES); + } + ++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++{ ++ return lookup_one_len_it(name, base, len, NULL); ++} ++ + /* + * namei() + * +@@ -1052,7 +1120,7 @@ + * that namei follows links, while lnamei does not. + * SMP-safe + */ +-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) + { + char *tmp = getname(name); + int err = PTR_ERR(tmp); +@@ -1064,6 +1132,12 @@ + return err; + } + ++int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_LOOKUP); ++ return __user_walk_it(name, flags, nd); ++} ++ + /* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. +@@ -1347,8 +1421,8 @@ + acc_mode |= MAY_APPEND; + + /* Fill in the open() intent data */ +- nd->intent.open.flags = flag; +- nd->intent.open.create_mode = mode; ++ nd->intent.it_flags = flag; ++ nd->intent.it_create_mode = mode; + + /* + * The simplest case - just a plain lookup. +@@ -1363,6 +1437,7 @@ + /* + * Create - we need to know the parent. + */ ++ nd->intent.it_op |= IT_CREAT; + error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + if (error) + return error; +@@ -1379,7 +1454,9 @@ + dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + + do_last: + error = PTR_ERR(dentry); +@@ -1492,7 +1569,9 @@ + } + dir = nd->dentry; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + putname(nd->last.name); + goto do_last; + } +Index: linux-2.6.9-5.0.3.EL/fs/namespace.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/namespace.c 2005-02-25 13:43:02.695573320 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/namespace.c 2005-02-25 13:43:42.448529952 +0200 +@@ -61,6 +61,7 @@ + INIT_LIST_HEAD(&mnt->mnt_mounts); + INIT_LIST_HEAD(&mnt->mnt_list); + INIT_LIST_HEAD(&mnt->mnt_fslink); ++ INIT_LIST_HEAD(&mnt->mnt_lustre_list); + if (name) { + int size = strlen(name)+1; + char *newname = kmalloc(size, GFP_KERNEL); +@@ -114,6 +115,7 @@ + + static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) + { ++ memset(old_nd, 0, sizeof(*old_nd)); + old_nd->dentry = mnt->mnt_mountpoint; + old_nd->mnt = mnt->mnt_parent; + mnt->mnt_parent = mnt; +@@ -177,6 +179,9 @@ + { + struct super_block *sb = mnt->mnt_sb; + dput(mnt->mnt_root); ++ spin_lock(&dcache_lock); ++ list_del(&mnt->mnt_lustre_list); ++ spin_unlock(&dcache_lock); + free_vfsmnt(mnt); + deactivate_super(sb); + } +@@ -403,6 +408,8 @@ + */ + + lock_kernel(); ++ if (sb->s_op->umount_lustre) ++ sb->s_op->umount_lustre(sb); + if( (flags&MNT_FORCE) && sb->s_op->umount_begin) + sb->s_op->umount_begin(sb); + unlock_kernel(); +@@ -627,6 +634,7 @@ + return err; + if (!old_name || !*old_name) + return -EINVAL; ++ intent_init(&old_nd.intent, IT_LOOKUP); + err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; +@@ -701,6 +709,7 @@ + return -EPERM; + if (!old_name || !*old_name) + return -EINVAL; ++ intent_init(&old_nd.intent, IT_LOOKUP); + err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; +@@ -1012,6 +1021,7 @@ + int retval = 0; + int mnt_flags = 0; + ++ intent_init(&nd.intent, IT_LOOKUP); + /* Discard magic */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; +Index: linux-2.6.9-5.0.3.EL/fs/open.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/open.c 2005-02-25 13:43:02.725568760 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/open.c 2005-02-25 13:43:42.451529496 +0200 +@@ -215,12 +215,12 @@ + struct nameidata nd; + struct inode * inode; + int error; +- ++ intent_init(&nd.intent, IT_GETATTR); + error = -EINVAL; + if (length < 0) /* sorry, but loff_t says... */ + goto out; + +- error = user_path_walk(path, &nd); ++ error = user_path_walk_it(path, &nd); + if (error) + goto out; + inode = nd.dentry->d_inode; +@@ -474,6 +474,7 @@ + int old_fsuid, old_fsgid; + kernel_cap_t old_cap; + int res; ++ intent_init(&nd.intent, IT_GETATTR); + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; +@@ -498,13 +499,14 @@ + else + current->cap_effective = current->cap_permitted; + +- res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); ++ res = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + if (!res) { + res = permission(nd.dentry->d_inode, mode, &nd); + /* SuS v2 requires we report a read only fs too */ + if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) + && !special_file(nd.dentry->d_inode->i_mode)) + res = -EROFS; ++ + path_release(&nd); + } + +@@ -519,8 +521,9 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_CHDIR); + +- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + if (error) + goto out; + +@@ -572,8 +575,9 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + if (error) + goto out; + +@@ -754,27 +758,8 @@ + * for the internal routines (ie open_namei()/follow_link() etc). 00 is + * used by symlinks. + */ +-struct file *filp_open(const char * filename, int flags, int mode) +-{ +- int namei_flags, error; +- struct nameidata nd; +- +- namei_flags = flags; +- if ((namei_flags+1) & O_ACCMODE) +- namei_flags++; +- if (namei_flags & O_TRUNC) +- namei_flags |= 2; +- +- error = open_namei(filename, namei_flags, mode, &nd); +- if (!error) +- return dentry_open(nd.dentry, nd.mnt, flags); +- +- return ERR_PTR(error); +-} +- +-EXPORT_SYMBOL(filp_open); +- +-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, ++ struct lookup_intent *it) + { + struct file * f; + struct inode *inode; +@@ -786,6 +771,7 @@ + goto cleanup_dentry; + f->f_flags = flags; + f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; ++ f->f_it = it; + inode = dentry->d_inode; + if (f->f_mode & FMODE_WRITE) { + error = get_write_access(inode); +@@ -804,6 +790,7 @@ + error = f->f_op->open(inode,f); + if (error) + goto cleanup_all; ++ intent_release(it); + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + +@@ -829,6 +816,7 @@ + cleanup_file: + put_filp(f); + cleanup_dentry: ++ intent_release(it); + dput(dentry); + mntput(mnt); + return ERR_PTR(error); +@@ -836,6 +824,36 @@ + + EXPORT_SYMBOL(dentry_open); + ++struct file *filp_open(const char * filename, int flags, int mode) ++{ ++ int namei_flags, error; ++ struct file * temp_filp; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_OPEN); ++ ++ namei_flags = flags; ++ if ((namei_flags+1) & O_ACCMODE) ++ namei_flags++; ++ if (namei_flags & O_TRUNC) ++ namei_flags |= 2; ++ ++ error = open_namei(filename, namei_flags, mode, &nd); ++ if (!error) { ++ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); ++ return temp_filp; ++ } ++ return ERR_PTR(error); ++} ++ ++ ++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++{ ++ struct lookup_intent it; ++ intent_init(&it, IT_LOOKUP); ++ ++ return dentry_open_it(dentry, mnt, flags, &it); ++} ++ + /* + * Find an empty file descriptor entry, and mark it busy. + */ +Index: linux-2.6.9-5.0.3.EL/fs/stat.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/stat.c 2005-02-25 13:43:02.726568608 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/stat.c 2005-02-25 13:43:42.452529344 +0200 +@@ -37,7 +37,7 @@ + + EXPORT_SYMBOL(generic_fillattr); + +-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; + int retval; +@@ -46,6 +46,8 @@ + if (retval) + return retval; + ++ if (inode->i_op->getattr_it) ++ return inode->i_op->getattr_it(mnt, dentry, it, stat); + if (inode->i_op->getattr) + return inode->i_op->getattr(mnt, dentry, stat); + +@@ -62,14 +64,20 @@ + + EXPORT_SYMBOL(vfs_getattr); + ++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++{ ++ return vfs_getattr_it(mnt, dentry, NULL, stat); ++} ++ + int vfs_stat(char __user *name, struct kstat *stat) + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk(name, &nd); ++ error = user_path_walk_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -81,10 +89,11 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk_link(name, &nd); ++ error = user_path_walk_link_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -96,9 +105,12 @@ + { + struct file *f = fget(fd); + int error = -EBADF; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_GETATTR); + + if (f) { +- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); ++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); ++ intent_release(&nd.intent); + fput(f); + } + return error; +Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/nfs/dir.c 2005-02-25 13:43:02.729568152 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/nfs/dir.c 2005-02-25 13:52:18.971006600 +0200 +@@ -718,7 +718,7 @@ + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; +- return (nd->intent.open.flags & O_EXCL) != 0; ++ return (nd->intent.it_flags & O_EXCL) != 0; + } + + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +@@ -1043,7 +1043,7 @@ + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) +- open_flags = nd->intent.open.flags; ++ open_flags = nd->intent.it_flags; + + /* + * The 0 argument passed into the create function should one day +Index: linux-2.6.9-5.0.3.EL/fs/inode.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/fs/inode.c 2005-02-25 13:43:02.731567848 +0200 ++++ linux-2.6.9-5.0.3.EL/fs/inode.c 2005-02-25 13:43:42.457528584 +0200 +@@ -233,6 +233,7 @@ + inodes_stat.nr_unused--; + } + ++EXPORT_SYMBOL(__iget); + /** + * clear_inode - clear an inode + * @inode: inode to clear +Index: linux-2.6.9-5.0.3.EL/include/linux/dcache.h +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/include/linux/dcache.h 2005-02-25 13:43:02.733567544 +0200 ++++ linux-2.6.9-5.0.3.EL/include/linux/dcache.h 2005-02-25 13:43:42.459528280 +0200 +@@ -4,6 +4,7 @@ + #ifdef __KERNEL__ + + #include ++#include + #include + #include + #include +@@ -37,6 +38,8 @@ + const unsigned char *name; + }; + ++#include ++ + struct dentry_stat_t { + int nr_dentry; + int nr_unused; +Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/include/linux/fs.h 2005-02-25 13:43:02.736567088 +0200 ++++ linux-2.6.9-5.0.3.EL/include/linux/fs.h 2005-02-25 13:43:42.462527824 +0200 +@@ -74,6 +74,7 @@ + + #define FMODE_READ 1 + #define FMODE_WRITE 2 ++#define FMODE_EXEC 4 + + /* Internal kernel extensions */ + #define FMODE_LSEEK 4 +@@ -258,6 +259,8 @@ + #define ATTR_ATTR_FLAG 1024 + #define ATTR_KILL_SUID 2048 + #define ATTR_KILL_SGID 4096 ++#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ + + /* + * This is the Inode Attributes structure, used for notify_change(). It +@@ -456,6 +459,7 @@ + struct block_device *i_bdev; + struct cdev *i_cdev; + int i_cindex; ++ void *i_filterdata; + + __u32 i_generation; + +@@ -589,6 +593,7 @@ + spinlock_t f_ep_lock; + #endif /* #ifdef CONFIG_EPOLL */ + struct address_space *f_mapping; ++ struct lookup_intent *f_it; + }; + extern spinlock_t files_lock; + #define file_list_lock() spin_lock(&files_lock); +@@ -934,7 +939,9 @@ + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int, struct nameidata *); + int (*setattr) (struct dentry *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); + int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ++ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); + int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); + ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*listxattr) (struct dentry *, char *, size_t); +@@ -974,6 +981,7 @@ + int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct super_block *); ++ void (*umount_lustre) (struct super_block *); + + int (*show_options)(struct seq_file *, struct vfsmount *); + }; +@@ -1164,6 +1172,7 @@ + extern struct vfsmount *kern_mount(struct file_system_type *); + extern int may_umount_tree(struct vfsmount *); + extern int may_umount(struct vfsmount *); ++struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); + extern long do_mount(char *, char *, char *, unsigned long, void *); + + extern int vfs_statfs(struct super_block *, struct kstatfs *); +@@ -1228,6 +1237,7 @@ + extern int do_truncate(struct dentry *, loff_t start); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); ++extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); + extern int filp_close(struct file *, fl_owner_t id); + extern char * getname(const char __user *); + +Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/include/linux/namei.h 2005-02-25 13:43:02.737566936 +0200 ++++ linux-2.6.9-5.0.3.EL/include/linux/namei.h 2005-02-25 13:53:33.690647488 +0200 +@@ -2,14 +2,49 @@ + #define _LINUX_NAMEI_H + + #include ++#include + + struct vfsmount; ++struct nameidata; + +-struct open_intent { +- int flags; +- int create_mode; ++/* intent opcodes */ ++#define IT_OPEN (1) ++#define IT_CREAT (1<<1) ++#define IT_READDIR (1<<2) ++#define IT_GETATTR (1<<3) ++#define IT_LOOKUP (1<<4) ++#define IT_UNLINK (1<<5) ++#define IT_TRUNC (1<<6) ++#define IT_GETXATTR (1<<7) ++#define IT_CHDIR (1<<8) ++ ++struct lustre_intent_data { ++ int it_disposition; ++ int it_status; ++ __u64 it_lock_handle; ++ void *it_data; ++ int it_lock_mode; + }; + ++#define INTENT_MAGIC 0x19620323 ++struct lookup_intent { ++ int it_magic; ++ void (*it_op_release)(struct lookup_intent *); ++ int it_op; ++ int it_flags; ++ int it_create_mode; ++ union { ++ struct lustre_intent_data lustre; ++ } d; ++}; ++ ++static inline void intent_init(struct lookup_intent *it, int op) ++{ ++ memset(it, 0, sizeof(*it)); ++ it->it_magic = INTENT_MAGIC; ++ it->it_op = op; ++} ++ + enum { MAX_NESTED_LINKS = 8 }; + + struct nameidata { +@@ -20,11 +55,8 @@ + int last_type; + unsigned depth; + char *saved_names[MAX_NESTED_LINKS + 1]; + +- /* Intent data */ +- union { +- struct open_intent open; +- } intent; ++ struct lookup_intent intent; + }; + + /* +@@ -46,6 +79,8 @@ + #define LOOKUP_PARENT 16 + #define LOOKUP_NOALT 32 + #define LOOKUP_ATOMIC 64 ++#define LOOKUP_LAST (1<<7) ++#define LOOKUP_LINK_NOTLAST (1<<8) + + /* + * Intent data +@@ -55,6 +90,12 @@ + #define LOOKUP_ACCESS (0x0400) + + extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); ++extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); ++#define user_path_walk_it(name,nd) \ ++ __user_walk_it(name, LOOKUP_FOLLOW, nd) ++#define user_path_walk_link_it(name,nd) \ ++ __user_walk_it(name, 0, nd) ++extern void intent_release(struct lookup_intent *); + #define user_path_walk(name,nd) \ + __user_walk(name, LOOKUP_FOLLOW, nd) + #define user_path_walk_link(name,nd) \ +@@ -67,7 +108,6 @@ + + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); + extern struct dentry * lookup_hash(struct qstr *, struct dentry *); +- + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); + +Index: linux-2.6.9-5.0.3.EL/include/linux/mount.h +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/include/linux/mount.h 2005-02-25 13:43:02.738566784 +0200 ++++ linux-2.6.9-5.0.3.EL/include/linux/mount.h 2005-02-25 13:43:42.464527520 +0200 +@@ -34,6 +34,8 @@ + struct list_head mnt_list; + struct list_head mnt_fslink; /* link in fs-specific expiry list */ + struct namespace *mnt_namespace; /* containing namespace */ ++ struct list_head mnt_lustre_list; /* GNS mount list */ ++ unsigned long mnt_last_used; /* for GNS auto-umount (jiffies) */ + }; + + static inline struct vfsmount *mntget(struct vfsmount *mnt) +Index: linux-2.6.9-5.0.3.EL/kernel/exit.c +=================================================================== +--- linux-2.6.9-5.0.3.EL.orig/kernel/exit.c 2005-02-25 13:43:02.740566480 +0200 ++++ linux-2.6.9-5.0.3.EL/kernel/exit.c 2005-02-25 13:43:42.466527216 +0200 +@@ -244,6 +244,8 @@ + write_unlock_irq(&tasklist_lock); + } + ++EXPORT_SYMBOL(reparent_to_init); ++ + void __set_special_pids(pid_t session, pid_t pgrp) + { + struct task_struct *curr = current; +@@ -428,6 +430,8 @@ + __exit_files(tsk); + } + ++EXPORT_SYMBOL(exit_files); ++ + static inline void __put_fs_struct(struct fs_struct *fs) + { + /* No need to hold fs->lock if we are killing it */ diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch new file mode 100644 index 0000000..a2dab51 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_nointent-2.6-vanilla.patch @@ -0,0 +1,509 @@ + 0 files changed + +.old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c +.new.........fs/namei.c +Index: linux-2.6.4-51.0/fs/namei.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/namei.c 2004-04-05 17:36:42.000000000 -0400 ++++ linux-2.6.4-51.0/fs/namei.c 2004-04-05 17:36:43.000000000 -0400 +@@ -1276,7 +1276,7 @@ + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0); ++ error = do_truncate(dentry, 0, 1); + } + put_write_access(inode); + if (error) +@@ -1526,6 +1526,7 @@ + char * tmp; + struct dentry * dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + if (S_ISDIR(mode)) + return -EPERM; +@@ -1536,6 +1537,15 @@ + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mknod_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mknod_raw(&nd, mode, dev); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1562,6 +1572,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1603,10 +1614,18 @@ + if (!IS_ERR(tmp)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1616,6 +1635,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1696,6 +1716,7 @@ + char * name; + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1716,6 +1737,16 @@ + error = -EBUSY; + goto exit1; + } ++ ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1774,6 +1805,7 @@ + struct dentry *dentry; + struct nameidata nd; + struct inode *inode = NULL; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1785,6 +1817,13 @@ + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1852,10 +1891,18 @@ + if (!IS_ERR(to)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->symlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->symlink_raw(&nd, from); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1863,6 +1910,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(to); +@@ -1926,6 +1974,8 @@ + struct nameidata nd, old_nd; + int error; + char * to; ++ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent, IT_LOOKUP); + + to = getname(newname); + if (IS_ERR(to)) +@@ -1940,6 +1990,13 @@ + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { +@@ -1990,7 +2047,7 @@ + * locking]. + */ + int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + int error = 0; + struct inode *target; +@@ -2035,7 +2092,7 @@ + } + + int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + struct inode *target; + int error; +@@ -2112,6 +2169,8 @@ + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ intent_init(&newnd.intent, IT_LOOKUP); + + error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (error) +@@ -2134,6 +2193,13 @@ + if (newnd.last_type != LAST_NORM) + goto exit2; + ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ + trap = lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); +@@ -2165,8 +2231,7 @@ + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + exit5: + dput(new_dentry); + exit4: +Index: linux-2.6.4-51.0/fs/open.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/open.c 2004-04-05 17:36:42.000000000 -0400 ++++ linux-2.6.4-51.0/fs/open.c 2004-04-06 01:37:39.000000000 -0400 +@@ -187,9 +187,10 @@ + return error; + } + +-int do_truncate(struct dentry *dentry, loff_t length) ++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) + { + int err; ++ struct inode_operations *op = dentry->d_inode->i_op; + struct iattr newattrs; + + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ +@@ -200,7 +201,14 @@ + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&dentry->d_inode->i_sem); + down_write(&dentry->d_inode->i_alloc_sem); +- err = notify_change(dentry, &newattrs); ++ if (called_from_open) ++ newattrs.ia_valid |= ATTR_FROM_OPEN; ++ if (op->setattr_raw) { ++ newattrs.ia_valid |= ATTR_RAW; ++ newattrs.ia_ctime = CURRENT_TIME; ++ err = op->setattr_raw(dentry->d_inode, &newattrs); ++ } else ++ err = notify_change(dentry, &newattrs); + up_write(&dentry->d_inode->i_alloc_sem); + up(&dentry->d_inode->i_sem); + return err; +@@ -256,7 +264,7 @@ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(nd.dentry, length); ++ error = do_truncate(nd.dentry, length, 0); + } + put_write_access(inode); + +@@ -308,7 +316,7 @@ + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length); ++ error = do_truncate(dentry, length, 0); + out_putf: + fput(file); + out: +@@ -387,9 +395,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -440,9 +458,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -592,36 +620,52 @@ + return error; + } + +-asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++int chmod_common(struct dentry *dentry, mode_t mode) + { +- struct inode * inode; +- struct dentry * dentry; +- struct file * file; +- int err = -EBADF; ++ struct inode * inode = dentry->d_inode; + struct iattr newattrs; ++ int error = -EROFS; + +- file = fget(fd); +- if (!file) ++ if (IS_RDONLY(inode)) + goto out; ++ ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; + +- dentry = file->f_dentry; +- inode = dentry->d_inode; ++ newattrs.ia_mode = mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out; ++ } + +- err = -EROFS; +- if (IS_RDONLY(inode)) +- goto out_putf; +- err = -EPERM; ++ error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto out_putf; ++ goto out; ++ + down(&inode->i_sem); + if (mode == (mode_t) -1) + mode = inode->i_mode; + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- err = notify_change(dentry, &newattrs); ++ error = notify_change(dentry, &newattrs); + up(&inode->i_sem); ++out: ++ return error; ++} + +-out_putf: ++asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) ++{ ++ struct file * file; ++ int err = -EBADF; ++ ++ file = fget(fd); ++ if (!file) ++ goto out; ++ ++ err = chmod_common(file->f_dentry, mode); + fput(file); + out: + return err; +@@ -630,32 +674,13 @@ + asmlinkage long sys_chmod(const char __user * filename, mode_t mode) + { + struct nameidata nd; +- struct inode * inode; + int error; +- struct iattr newattrs; + + error = user_path_walk(filename, &nd); + if (error) + goto out; +- inode = nd.dentry->d_inode; +- +- error = -EROFS; +- if (IS_RDONLY(inode)) +- goto dput_and_out; +- +- error = -EPERM; +- if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) +- goto dput_and_out; +- +- down(&inode->i_sem); +- if (mode == (mode_t) -1) +- mode = inode->i_mode; +- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); +- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); + +-dput_and_out: ++ error = chmod_common(nd.dentry, mode); + path_release(&nd); + out: + return error; +@@ -676,6 +701,18 @@ + if (IS_RDONLY(inode)) + goto out; + error = -EPERM; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; ++ ++ newattrs.ia_uid = user; ++ newattrs.ia_gid = group; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ return error; ++ } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + newattrs.ia_valid = ATTR_CTIME; +@@ -689,6 +726,7 @@ + } + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; ++ + down(&inode->i_sem); + error = notify_change(dentry, &newattrs); + up(&inode->i_sem); +Index: linux-2.6.4-51.0/fs/exec.c +=================================================================== +--- linux-2.6.4-51.0.orig/fs/exec.c 2004-04-05 17:36:42.000000000 -0400 ++++ linux-2.6.4-51.0/fs/exec.c 2004-04-05 17:36:43.000000000 -0400 +@@ -1418,7 +1418,7 @@ + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (do_truncate(file->f_dentry, 0) != 0) ++ if (do_truncate(file->f_dentry, 0, 0) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file); +Index: linux-2.6.4-51.0/include/linux/fs.h +=================================================================== +--- linux-2.6.4-51.0.orig/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400 ++++ linux-2.6.4-51.0/include/linux/fs.h 2004-04-05 17:36:43.000000000 -0400 +@@ -866,13 +866,20 @@ + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link_raw) (struct nameidata *,struct nameidata *); + int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink_raw) (struct nameidata *); + int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir_raw) (struct nameidata *,int); + int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir_raw) (struct nameidata *); + int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); + int (*readlink) (struct dentry *, char __user *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); +@@ -1169,7 +1176,7 @@ + + /* fs/open.c */ + +-extern int do_truncate(struct dentry *, loff_t start); ++extern int do_truncate(struct dentry *, loff_t start, int called_from_open); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); + extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); +Index: linux-2.6.4-51.0/net/unix/af_unix.c +=================================================================== +--- linux-2.6.4-51.0.orig/net/unix/af_unix.c 2004-04-05 12:42:07.000000000 -0400 ++++ linux-2.6.4-51.0/net/unix/af_unix.c 2004-04-05 17:36:43.000000000 -0400 +@@ -676,6 +676,7 @@ + int err = 0; + + if (sunname->sun_path[0]) { ++ intent_init(&nd.intent, IT_LOOKUP); + err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); + if (err) + goto fail; diff --git a/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch b/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch new file mode 100644 index 0000000..ccca64d --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_races-2.6-vanilla.patch @@ -0,0 +1,65 @@ +Index: linux-2.6.7-vanilla/fs/dcache.c +=================================================================== +--- linux-2.6.7-vanilla.orig/fs/dcache.c 2004-07-01 12:09:19.000000000 +0300 ++++ linux-2.6.7-vanilla/fs/dcache.c 2004-07-01 12:29:12.510193264 +0300 +@@ -219,7 +219,14 @@ + spin_unlock(&dcache_lock); + return 0; + } +- /* ++ ++ /* network invalidation by Lustre */ ++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { ++ spin_unlock(&dcache_lock); ++ return 0; ++ } ++ ++ /* + * Check whether to do a partial shrink_dcache + * to get rid of unused child entries. + */ +@@ -1114,19 +1121,28 @@ + * Adds a dentry to the hash according to its name. + */ + +-void d_rehash(struct dentry * entry) ++void __d_rehash(struct dentry * entry, int lock) + { + struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash); + +- spin_lock(&dcache_lock); ++ if (lock) ++ spin_lock(&dcache_lock); + spin_lock(&entry->d_lock); + entry->d_flags &= ~DCACHE_UNHASHED; + spin_unlock(&entry->d_lock); + entry->d_bucket = list; + hlist_add_head_rcu(&entry->d_hash, list); +- spin_unlock(&dcache_lock); ++ if (lock) ++ spin_unlock(&dcache_lock); + } + ++EXPORT_SYMBOL(__d_rehash); ++ ++void d_rehash(struct dentry * entry) ++{ ++ __d_rehash(entry, 1); ++ } ++ + #define do_switch(x,y) do { \ + __typeof__ (x) __tmp = x; \ + x = y; y = __tmp; } while (0) +Index: linux-2.6.7-vanilla/include/linux/dcache.h +=================================================================== +--- linux-2.6.7-vanilla.orig/include/linux/dcache.h 2004-07-01 12:24:53.602553208 +0300 ++++ linux-2.6.7-vanilla/include/linux/dcache.h 2004-07-01 12:27:29.757814000 +0300 +@@ -159,6 +159,8 @@ + + #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ + #define DCACHE_UNHASHED 0x0010 ++#define DCACHE_LUSTRE_INVALID 0x0020 /* Lustre invalidated */ ++ + + extern spinlock_t dcache_lock; + diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series new file mode 100644 index 0000000..ffa4a51 --- /dev/null +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -0,0 +1,12 @@ +lustre_version.patch +vfs_intent-2.6-rhel4.patch +vfs_nointent-2.6-vanilla.patch +vfs_races-2.6-vanilla.patch +ext3-wantedi-misc-2.6-suse.patch +nfs-cifs-intent-2.6-rhel4.patch +iopen-misc-2.6-suse.patch +export-truncate-2.6-suse.patch +export_symbols-2.6-suse.patch +dev_read_only-2.6-suse.patch +export-2.6-suse.patch +lookup_bdev_init_intent.patch -- 1.8.3.1