From: shorthair Date: Sun, 12 Jan 2003 10:43:24 +0000 (+0000) Subject: This branch b_lum25 replace b_lin25 using the latest Lustre tree X-Git-Tag: v1_7_100~1^100~88 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=c5c46a2998979914321a6ff1cc2324e9bb787db4;p=fs%2Flustre-release.git This branch b_lum25 replace b_lin25 using the latest Lustre tree ptlbd can't be compiled with kernel 2.5 now, so changed Makefile temporary Now this branch can mount/umount and do some basic fs syscall --- diff --git a/lustre/kernel_patches/patches/lin-2.5.44.patch b/lustre/kernel_patches/patches/lin-2.5.44.patch new file mode 100644 index 0000000..39f01ff --- /dev/null +++ b/lustre/kernel_patches/patches/lin-2.5.44.patch @@ -0,0 +1,3895 @@ +# This is a BitKeeper generated patch for the following project: +# Project Name: Linux kernel tree +# This patch format is intended for GNU patch command version 2.5 or higher. +# This patch includes the following deltas: +# ChangeSet 1.809 -> 1.814 +# kernel/ksyms.c 1.149 -> 1.152 +# fs/open.c 1.28 -> 1.29 +# fs/ext3/Makefile 1.4 -> 1.5 +# include/linux/ext3_jbd.h 1.5 -> 1.6 +# fs/driverfs/inode.c 1.52 -> 1.53 +# fs/dcache.c 1.33 -> 1.34 +# fs/stat.c 1.13 -> 1.14 +# include/linux/fs.h 1.175 -> 1.178 +# include/linux/namei.h 1.3 -> 1.4 +# fs/namei.c 1.56 -> 1.61 +# fs/nfsd/vfs.c 1.44 -> 1.45 +# arch/um/kernel/mem.c 1.5 -> 1.6 +# fs/ext3/ialloc.c 1.17 -> 1.18 +# fs/ext3/symlink.c 1.3 -> 1.4 +# fs/Makefile 1.42 -> 1.43 +# fs/ext3/namei.c 1.22 -> 1.23 +# include/linux/ext3_fs.h 1.11 -> 1.12 +# net/unix/af_unix.c 1.29 -> 1.30 +# fs/Config.in 1.39 -> 1.40 +# fs/ext3/inode.c 1.42 -> 1.43 +# fs/Config.help 1.21 -> 1.22 +# mm/slab.c 1.33 -> 1.34 +# fs/sysfs/inode.c 1.55 -> 1.56 +# fs/ext3/super.c 1.33 -> 1.34 +# fs/ext3/file.c 1.9 -> 1.10 +# include/linux/slab.h 1.13 -> 1.14 +# include/linux/dcache.h 1.19 -> 1.20 +# (new) -> 1.1 fs/ext3/xattr.h +# (new) -> 1.1 include/linux/mbcache.h +# (new) -> 1.1 include/linux/lustre_version.h +# (new) -> 1.2 fs/ext3/xattr.c +# (new) -> 1.1 fs/mbcache.c +# (new) -> 1.1 fs/ext3/xattr_user.c +# +# The following is the BitKeeper ChangeSet Log +# -------------------------------------------- +# 02/10/20 braam@clusterfs.com 1.810 +# xattrs for UML bk repository +# -------------------------------------------- +# 02/10/20 braam@clusterfs.com 1.811 +# Changes for Lustre +# -------------------------------------------- +# 02/12/17 root@kai.(none) 1.812 +# changed for lustre +# -------------------------------------------- +# 03/01/01 root@kai.(none) 1.813 +# changes for intent of lustre +# -------------------------------------------- +# 03/01/04 root@kai.(none) 1.814 +# fix error for intent +# -------------------------------------------- +# +diff -Nru a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c +--- a/arch/um/kernel/mem.c Sat Jan 4 18:24:12 2003 ++++ b/arch/um/kernel/mem.c Sat Jan 4 18:24:12 2003 +@@ -656,6 +656,22 @@ + return(phys_mem_map(pte_val(pte))); + } + ++struct page *check_get_page(unsigned long kaddr) ++{ ++ struct page *page; ++ struct mem_region *mr; ++ unsigned long phys = __pa(kaddr); ++ unsigned int n = phys_region_index(phys); ++ ++ if(regions[n] == NULL) ++ return NULL; ++ ++ mr = regions[n]; ++ page = (struct page *) mr->mem_map; ++ return page + ((phys_addr(phys)) >> PAGE_SHIFT); ++} ++ ++ + struct mem_region *page_region(struct page *page, int *index_out) + { + int i; +@@ -743,7 +759,7 @@ + (addr <= region->start + region->len)) + return(mk_phys(addr - region->start, i)); + } +- panic("region_pa : no region for virtual address"); ++ //panic("region_pa : no region for virtual address"); + return(0); + } + +diff -Nru a/fs/Config.help b/fs/Config.help +--- a/fs/Config.help Sat Jan 4 18:24:12 2003 ++++ b/fs/Config.help Sat Jan 4 18:24:12 2003 +@@ -154,6 +154,13 @@ + of your root partition (the one containing the directory /) cannot + be compiled as a module, and so this may be dangerous. + ++CONFIG_EXT3_FS_XATTR ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page, or visit ++ for details). ++ ++ If unsure, say N. ++ + CONFIG_JBD + This is a generic journaling layer for block devices. It is + currently used by the ext3 file system, but it could also be used to +diff -Nru a/fs/Config.in b/fs/Config.in +--- a/fs/Config.in Sat Jan 4 18:24:12 2003 ++++ b/fs/Config.in Sat Jan 4 18:24:12 2003 +@@ -27,6 +27,7 @@ + dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL + + tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS ++dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS + # CONFIG_JBD could be its own option (even modular), but until there are + # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS + # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS +@@ -180,6 +181,17 @@ + define_tristate CONFIG_ZISOFS_FS $CONFIG_ISO9660_FS + else + define_tristate CONFIG_ZISOFS_FS n ++fi ++ ++# Meta block cache for Extended Attributes (ext2/ext3) ++if [ "$CONFIG_EXT2_FS_XATTR" = "y" -o "$CONFIG_EXT3_FS_XATTR" = "y" ]; then ++ if [ "$CONFIG_EXT2_FS" = "y" -o "$CONFIG_EXT3_FS" = "y" ]; then ++ define_tristate CONFIG_FS_MBCACHE y ++ else ++ if [ "$CONFIG_EXT2_FS" = "m" -o "$CONFIG_EXT3_FS" = "m" ]; then ++ define_tristate CONFIG_FS_MBCACHE m ++ fi ++ fi + fi + + mainmenu_option next_comment +diff -Nru a/fs/Makefile b/fs/Makefile +--- a/fs/Makefile Sat Jan 4 18:24:12 2003 ++++ b/fs/Makefile Sat Jan 4 18:24:12 2003 +@@ -6,7 +6,7 @@ + # + + export-objs := open.o dcache.o buffer.o bio.o inode.o dquot.o mpage.o aio.o \ +- fcntl.o read_write.o dcookies.o ++ fcntl.o read_write.o dcookies.o mbcache.o + + obj-y := open.o read_write.o devices.o file_table.o buffer.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ +@@ -29,6 +29,8 @@ + obj-y += binfmt_script.o + + obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o ++ ++obj-$(CONFIG_FS_MBCACHE) += mbcache.o + + obj-$(CONFIG_QUOTA) += dquot.o + obj-$(CONFIG_QFMT_V1) += quota_v1.o +diff -Nru a/fs/dcache.c b/fs/dcache.c +--- a/fs/dcache.c Sat Jan 4 18:24:12 2003 ++++ b/fs/dcache.c Sat Jan 4 18:24:12 2003 +@@ -638,6 +638,7 @@ + dentry->d_fsdata = NULL; + dentry->d_mounted = 0; + dentry->d_cookie = NULL; ++ dentry->d_it = NULL; + INIT_LIST_HEAD(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_lru); + INIT_LIST_HEAD(&dentry->d_subdirs); +diff -Nru a/fs/driverfs/inode.c b/fs/driverfs/inode.c +--- a/fs/driverfs/inode.c Sat Jan 4 18:24:12 2003 ++++ b/fs/driverfs/inode.c Sat Jan 4 18:24:12 2003 +@@ -523,7 +523,7 @@ + qstr.name = name; + qstr.len = strlen(name); + qstr.hash = full_name_hash(name,qstr.len); +- return lookup_hash(&qstr,parent); ++ return lookup_hash(&qstr,parent, NULL); + } + + /** +diff -Nru a/fs/ext3/Makefile b/fs/ext3/Makefile +--- a/fs/ext3/Makefile Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/Makefile Sat Jan 4 18:24:12 2003 +@@ -7,4 +7,10 @@ + ext3-objs := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o hash.o + ++export-objs += xattr.o ++ ++ifeq ($(CONFIG_EXT3_FS_XATTR),y) ++ext3-objs += xattr.o xattr_user.o ++endif ++ + include $(TOPDIR)/Rules.make +diff -Nru a/fs/ext3/file.c b/fs/ext3/file.c +--- a/fs/ext3/file.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/file.c Sat Jan 4 18:24:12 2003 +@@ -23,7 +23,7 @@ + #include + #include + #include +-#include ++#include "xattr.h" + + /* + * Called when an inode is released. Note that this is different +@@ -98,5 +98,9 @@ + struct inode_operations ext3_file_inode_operations = { + .truncate = ext3_truncate, + .setattr = ext3_setattr, ++ .setxattr = ext3_setxattr, ++ .getxattr = ext3_getxattr, ++ .listxattr = ext3_listxattr, ++ .removexattr = ext3_removexattr, + }; + +diff -Nru a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c +--- a/fs/ext3/ialloc.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/ialloc.c Sat Jan 4 18:24:12 2003 +@@ -25,6 +25,8 @@ + #include + #include + ++#include "xattr.h" ++ + /* + * ialloc.c contains the inodes allocation and deallocation routines + */ +@@ -118,6 +120,7 @@ + * as writing the quota to disk may need the lock as well. + */ + DQUOT_INIT(inode); ++ ext3_xattr_delete_inode(handle, inode); + DQUOT_FREE_INODE(inode); + DQUOT_DROP(inode); + +diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c +--- a/fs/ext3/inode.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/inode.c Sat Jan 4 18:24:12 2003 +@@ -42,6 +42,18 @@ + */ + #undef SEARCH_FROM_ZERO + ++/* ++ * Test whether an inode is a fast symlink. ++ */ ++static inline int ext3_inode_is_fast_symlink(struct inode *inode) ++{ ++ int ea_blocks = EXT3_I(inode)->i_file_acl ? ++ (inode->i_sb->s_blocksize >> 9) : 0; ++ ++ return (S_ISLNK(inode->i_mode) && ++ inode->i_blocks - ea_blocks == 0); ++} ++ + /* The ext3 forget function must perform a revoke if we are freeing data + * which has been journaled. Metadata (eg. indirect blocks) must be + * revoked in all cases. +@@ -51,7 +63,7 @@ + * still needs to be revoked. + */ + +-static int ext3_forget(handle_t *handle, int is_metadata, ++int ext3_forget(handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + int blocknr) + { +@@ -167,9 +179,7 @@ + { + handle_t *handle; + +- if (is_bad_inode(inode) || +- inode->i_ino == EXT3_ACL_IDX_INO || +- inode->i_ino == EXT3_ACL_DATA_INO) ++ if (is_bad_inode(inode)) + goto no_delete; + + lock_kernel(); +@@ -1979,6 +1989,8 @@ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; ++ if (ext3_inode_is_fast_symlink(inode)) ++ return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + +@@ -2130,8 +2142,6 @@ + struct ext3_group_desc * gdp; + + if ((inode->i_ino != EXT3_ROOT_INO && +- inode->i_ino != EXT3_ACL_IDX_INO && +- inode->i_ino != EXT3_ACL_DATA_INO && + inode->i_ino != EXT3_JOURNAL_INO && + inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || + inode->i_ino > le32_to_cpu( +@@ -2263,10 +2273,7 @@ + + brelse (iloc.bh); + +- if (inode->i_ino == EXT3_ACL_IDX_INO || +- inode->i_ino == EXT3_ACL_DATA_INO) +- /* Nothing to do */ ; +- else if (S_ISREG(inode->i_mode)) { ++ if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext3_file_inode_operations; + inode->i_fop = &ext3_file_operations; + if (ext3_should_writeback_data(inode)) +@@ -2277,18 +2284,20 @@ + inode->i_op = &ext3_dir_inode_operations; + inode->i_fop = &ext3_dir_operations; + } else if (S_ISLNK(inode->i_mode)) { +- if (!inode->i_blocks) ++ if (ext3_inode_is_fast_symlink(inode)) + inode->i_op = &ext3_fast_symlink_inode_operations; + else { +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext3_symlink_inode_operations; + if (ext3_should_writeback_data(inode)) + inode->i_mapping->a_ops = &ext3_writeback_aops; + else + inode->i_mapping->a_ops = &ext3_aops; + } +- } else ++ } else { ++ inode->i_op = &ext3_special_inode_operations; + init_special_inode(inode, inode->i_mode, + le32_to_cpu(iloc.raw_inode->i_block[0])); ++ } + if (ei->i_flags & EXT3_SYNC_FL) + inode->i_flags |= S_SYNC; + if (ei->i_flags & EXT3_APPEND_FL) +diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c +--- a/fs/ext3/namei.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/namei.c Sat Jan 4 18:24:12 2003 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include "xattr.h" + + + /* +@@ -1654,7 +1655,7 @@ + if (IS_DIRSYNC(dir)) + handle->h_sync = 1; + +- inode = ext3_new_inode (handle, dir, S_IFDIR); ++ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_stop; +@@ -1662,7 +1663,6 @@ + inode->i_op = &ext3_dir_inode_operations; + inode->i_fop = &ext3_dir_operations; + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; +- inode->i_blocks = 0; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { + inode->i_nlink--; /* is this nlink == 0? */ +@@ -1689,9 +1689,6 @@ + BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_block); + brelse (dir_block); +- inode->i_mode = S_IFDIR | mode; +- if (dir->i_mode & S_ISGID) +- inode->i_mode |= S_ISGID; + ext3_mark_inode_dirty(handle, inode); + err = ext3_add_entry (handle, dentry, inode); + if (err) { +@@ -2068,7 +2065,7 @@ + goto out_stop; + + if (l > sizeof (EXT3_I(inode)->i_data)) { +- inode->i_op = &page_symlink_inode_operations; ++ inode->i_op = &ext3_symlink_inode_operations; + if (ext3_should_writeback_data(inode)) + inode->i_mapping->a_ops = &ext3_writeback_aops; + else +@@ -2284,4 +2281,17 @@ + .rmdir = ext3_rmdir, + .mknod = ext3_mknod, + .rename = ext3_rename, ++ .setxattr = ext3_setxattr, ++ .getxattr = ext3_getxattr, ++ .listxattr = ext3_listxattr, ++ .removexattr = ext3_removexattr, + }; ++ ++struct inode_operations ext3_special_inode_operations = { ++ .setxattr = ext3_setxattr, ++ .getxattr = ext3_getxattr, ++ .listxattr = ext3_listxattr, ++ .removexattr = ext3_removexattr, ++}; ++ ++ +diff -Nru a/fs/ext3/super.c b/fs/ext3/super.c +--- a/fs/ext3/super.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/super.c Sat Jan 4 18:24:12 2003 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include "xattr.h" + + #ifdef CONFIG_JBD_DEBUG + static int ext3_ro_after; /* Make fs read-only after this many jiffies */ +@@ -405,6 +406,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { + EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); +@@ -554,6 +556,7 @@ + int is_remount) + { + unsigned long *mount_options = &sbi->s_mount_opt; ++ + uid_t *resuid = &sbi->s_resuid; + gid_t *resgid = &sbi->s_resgid; + char * this_char; +@@ -566,6 +569,13 @@ + continue; + if ((value = strchr (this_char, '=')) != NULL) + *value++ = 0; ++#ifdef CONFIG_EXT3_FS_XATTR ++ if (!strcmp (this_char, "user_xattr")) ++ set_opt (*mount_options, XATTR_USER); ++ else if (!strcmp (this_char, "nouser_xattr")) ++ clear_opt (*mount_options, XATTR_USER); ++ else ++#endif + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nouid32")) { +@@ -982,6 +992,12 @@ + sbi->s_mount_opt = 0; + sbi->s_resuid = EXT3_DEF_RESUID; + sbi->s_resgid = EXT3_DEF_RESGID; ++ ++ /* Default extended attribute flags */ ++#ifdef CONFIG_EXT3_FS_XATTR ++ set_opt(sbi->s_mount_opt, XATTR_USER); ++#endif ++ + if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) + goto out_fail; + +@@ -1820,7 +1836,10 @@ + + static int __init init_ext3_fs(void) + { +- int err = init_inodecache(); ++ int err = init_ext3_xattr(); ++ if (err) ++ return err; ++ err = init_inodecache(); + if (err) + goto out1; + err = register_filesystem(&ext3_fs_type); +@@ -1830,6 +1849,7 @@ + out: + destroy_inodecache(); + out1: ++ exit_ext3_xattr(); + return err; + } + +@@ -1837,6 +1857,7 @@ + { + unregister_filesystem(&ext3_fs_type); + destroy_inodecache(); ++ exit_ext3_xattr(); + } + + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); +diff -Nru a/fs/ext3/symlink.c b/fs/ext3/symlink.c +--- a/fs/ext3/symlink.c Sat Jan 4 18:24:12 2003 ++++ b/fs/ext3/symlink.c Sat Jan 4 18:24:12 2003 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include "xattr.h" + + static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) + { +@@ -33,7 +34,20 @@ + return vfs_follow_link(nd, (char*)ei->i_data); + } + ++struct inode_operations ext3_symlink_inode_operations = { ++ .readlink = page_readlink, ++ .follow_link = page_follow_link, ++ .setxattr = ext3_setxattr, ++ .getxattr = ext3_getxattr, ++ .listxattr = ext3_listxattr, ++ .removexattr = ext3_removexattr, ++}; ++ + struct inode_operations ext3_fast_symlink_inode_operations = { +- .readlink = ext3_readlink, /* BKL not held. Don't need */ ++ .readlink = ext3_readlink, /* BKL not held. Don't need */ + .follow_link = ext3_follow_link, /* BKL not held. Don't need */ ++ .setxattr = ext3_setxattr, ++ .getxattr = ext3_getxattr, ++ .listxattr = ext3_listxattr, ++ .removexattr = ext3_removexattr, + }; +diff -Nru a/fs/ext3/xattr.c b/fs/ext3/xattr.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/fs/ext3/xattr.c Sat Jan 4 18:24:12 2003 +@@ -0,0 +1,1131 @@ ++/* ++ * linux/fs/ext3/xattr.c ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ * ++ * Fix by Harrison Xing . ++ * Ext3 code with a lot of help from Eric Jarman . ++ * Extended attributes for symlinks and special files added per ++ * suggestion of Luka Renko . ++ */ ++ ++/* ++ * Extended attributes are stored on disk blocks allocated outside of ++ * any inode. The i_file_acl field is then made to point to this allocated ++ * block. If all extended attributes of an inode are identical, these ++ * inodes may share the same extended attribute block. Such situations ++ * are automatically detected by keeping a cache of recent attribute block ++ * numbers and hashes over the block's contents in memory. ++ * ++ * ++ * Extended attribute block layout: ++ * ++ * +------------------+ ++ * | header | ++ * ¦ entry 1 | | ++ * | entry 2 | | growing downwards ++ * | entry 3 | v ++ * | four null bytes | ++ * | . . . | ++ * | value 1 | ^ ++ * | value 3 | | growing upwards ++ * | value 2 | | ++ * +------------------+ ++ * ++ * The block header is followed by multiple entry descriptors. These entry ++ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD ++ * byte boundaries. The entry descriptors are sorted by attribute name, ++ * so that two extended attribute blocks can be compared efficiently. ++ * ++ * Attribute values are aligned to the end of the block, stored in ++ * no specific order. They are also padded to EXT3_XATTR_PAD byte ++ * boundaries. No additional gaps are left between them. ++ * ++ * Locking strategy ++ * ---------------- ++ * The VFS holdsinode->i_sem semaphore when any of the xattr inode ++ * operations are called, so we are guaranteed that only one ++ * processes accesses extended attributes of an inode at any time. ++ * ++ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that ++ * only a single process is modifying an extended attribute block, even ++ * if the block is shared among inodes. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "xattr.h" ++ ++#define EXT3_EA_USER "user." ++ ++#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) ++#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) ++#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) ++#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) ++ ++#ifdef EXT3_XATTR_DEBUG ++# define ea_idebug(inode, f...) do { \ ++ printk(KERN_DEBUG "inode %s:%ld: ", \ ++ kdevname(inode->i_dev), inode->i_ino); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++# define ea_bdebug(bh, f...) do { \ ++ printk(KERN_DEBUG "block %s:%ld: ", \ ++ kdevname(bh->b_dev), bh->b_blocknr); \ ++ printk(f); \ ++ printk("\n"); \ ++ } while (0) ++#else ++# define ea_idebug(f...) ++# define ea_bdebug(f...) ++#endif ++ ++static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, ++ struct ext3_xattr_header *); ++ ++static int ext3_xattr_cache_insert(struct buffer_head *); ++static struct buffer_head *ext3_xattr_cache_find(struct inode *, ++ struct ext3_xattr_header *); ++static void ext3_xattr_cache_remove(struct buffer_head *); ++static void ext3_xattr_rehash(struct ext3_xattr_header *, ++ struct ext3_xattr_entry *); ++ ++static struct mb_cache *ext3_xattr_cache; ++ ++/* ++ * If a file system does not share extended attributes among inodes, ++ * we should not need the ext3_xattr_sem semaphore. However, the ++ * filesystem may still contain shared blocks, so we always take ++ * the lock. ++ */ ++ ++static DECLARE_MUTEX(ext3_xattr_sem); ++static struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; ++static rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; ++ ++int ++ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) ++{ ++ int error = -EINVAL; ++ ++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { ++ write_lock(&ext3_handler_lock); ++ if (!ext3_xattr_handlers[name_index-1]) { ++ ext3_xattr_handlers[name_index-1] = handler; ++ error = 0; ++ } ++ write_unlock(&ext3_handler_lock); ++ } ++ return error; ++} ++ ++void ++ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) ++{ ++ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { ++ write_lock(&ext3_handler_lock); ++ ext3_xattr_handlers[name_index-1] = NULL; ++ write_unlock(&ext3_handler_lock); ++ } ++} ++ ++static inline const char * ++strcmp_prefix(const char *a, const char *a_prefix) ++{ ++ while (*a_prefix && *a == *a_prefix) { ++ a++; ++ a_prefix++; ++ } ++ return *a_prefix ? NULL : a; ++} ++ ++/* ++ * Decode the extended attribute name, and translate it into ++ * the name_index and name suffix. ++ */ ++static inline struct ext3_xattr_handler * ++ext3_xattr_resolve_name(const char **name) ++{ ++ struct ext3_xattr_handler *handler = NULL; ++ int i; ++ ++ if (!*name) ++ return NULL; ++ read_lock(&ext3_handler_lock); ++ for (i=0; iprefix); ++ if (n) { ++ handler = ext3_xattr_handlers[i]; ++ *name = n; ++ break; ++ } ++ } ++ } ++ read_unlock(&ext3_handler_lock); ++ return handler; ++} ++ ++static inline struct ext3_xattr_handler * ++ext3_xattr_handler(int name_index) ++{ ++ struct ext3_xattr_handler *handler = NULL; ++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { ++ read_lock(&ext3_handler_lock); ++ handler = ext3_xattr_handlers[name_index-1]; ++ read_unlock(&ext3_handler_lock); ++ } ++ return handler; ++} ++ ++/* ++ * Inode operation getxattr() ++ * ++ * dentry->d_inode->i_sem down ++ */ ++ssize_t ++ext3_getxattr(struct dentry *dentry, const char *name, ++ void *buffer, size_t size) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -EOPNOTSUPP; ++ return handler->get(inode, name, buffer, size); ++} ++ ++/* ++ * Inode operation listxattr() ++ * ++ * dentry->d_inode->i_sem down ++ */ ++ssize_t ++ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) ++{ ++ return ext3_xattr_list(dentry->d_inode, buffer, size); ++} ++ ++/* ++ * Inode operation setxattr() ++ * ++ * dentry->d_inode->i_sem down ++ */ ++int ++ext3_setxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size, int flags) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ if (size == 0) ++ value = ""; /* empty EA, do not remove */ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -EOPNOTSUPP; ++ return handler->set(inode, name, value, size, flags); ++} ++ ++/* ++ * Inode operation removexattr() ++ * ++ * dentry->d_inode->i_sem down ++ */ ++int ++ext3_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct ext3_xattr_handler *handler; ++ struct inode *inode = dentry->d_inode; ++ ++ handler = ext3_xattr_resolve_name(&name); ++ if (!handler) ++ return -EOPNOTSUPP; ++ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); ++} ++ ++/* ++ * ext3_xattr_get() ++ * ++ * Copy an extended attribute into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext3_xattr_get(struct inode *inode, int name_index, const char *name, ++ void *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_entry *entry; ++ unsigned int block, size; ++ char *end; ++ int name_len, error; ++ ++ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", ++ name_index, name, buffer, (long)buffer_size); ++ ++ if (name == NULL) ++ return -EINVAL; ++ if (!EXT3_I(inode)->i_file_acl) ++ return -ENODATA; ++ block = EXT3_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* find named attribute */ ++ name_len = strlen(name); ++ ++ error = -ERANGE; ++ if (name_len > 255) ++ goto cleanup; ++ entry = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (name_index == entry->e_name_index && ++ name_len == entry->e_name_len && ++ memcmp(name, entry->e_name, name_len) == 0) ++ goto found; ++ entry = next; ++ } ++ /* Check the remaining name entries */ ++ while (!IS_LAST_ENTRY(entry)) { ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ entry = next; ++ } ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ error = -ENODATA; ++ goto cleanup; ++found: ++ /* check the buffer size */ ++ if (entry->e_value_block != 0) ++ goto bad_block; ++ size = le32_to_cpu(entry->e_value_size); ++ if (size > inode->i_sb->s_blocksize || ++ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) ++ goto bad_block; ++ ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (buffer) { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ /* return value of attribute */ ++ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), ++ size); ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * ext3_xattr_list() ++ * ++ * Copy a list of attribute names into the buffer ++ * provided, or compute the buffer size required. ++ * Buffer is NULL to compute the size of the buffer required. ++ * ++ * Returns a negative error number on failure, or the number of bytes ++ * used / required on success. ++ */ ++int ++ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) ++{ ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_entry *entry; ++ unsigned int block, size = 0; ++ char *buf, *end; ++ int error; ++ ++ ea_idebug(inode, "buffer=%p, buffer_size=%ld", ++ buffer, (long)buffer_size); ++ ++ if (!EXT3_I(inode)->i_file_acl) ++ return 0; ++ block = EXT3_I(inode)->i_file_acl; ++ ea_idebug(inode, "reading block %d", block); ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) ++ return -EIO; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); ++ end = bh->b_data + bh->b_size; ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* compute the size required for the list of attribute names */ ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT3_XATTR_NEXT(entry)) { ++ struct ext3_xattr_handler *handler; ++ struct ext3_xattr_entry *next = ++ EXT3_XATTR_NEXT(entry); ++ if ((char *)next >= end) ++ goto bad_block; ++ ++ handler = ext3_xattr_handler(entry->e_name_index); ++ if (handler) { ++ size += handler->list(NULL, inode, entry->e_name, ++ entry->e_name_len) + 1; ++ } ++ } ++ ++ if (ext3_xattr_cache_insert(bh)) ++ ea_idebug(inode, "cache insert failed"); ++ if (!buffer) { ++ error = size; ++ goto cleanup; ++ } else { ++ error = -ERANGE; ++ if (size > buffer_size) ++ goto cleanup; ++ } ++ ++ /* list the attribute names */ ++ buf = buffer; ++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); ++ entry = EXT3_XATTR_NEXT(entry)) { ++ struct ext3_xattr_handler *handler; ++ ++ handler = ext3_xattr_handler(entry->e_name_index); ++ if (handler) { ++ buf += handler->list(buf, inode, entry->e_name, ++ entry->e_name_len); ++ *buf++ = '\0'; ++ } ++ } ++ error = size; ++ ++cleanup: ++ brelse(bh); ++ ++ return error; ++} ++ ++/* ++ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is ++ * not set, set it. ++ */ ++static void ext3_xattr_update_super_block(handle_t *handle, ++ struct super_block *sb) ++{ ++ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) ++ return; ++ ++ lock_super(sb); ++ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); ++ EXT3_SB(sb)->s_es->s_feature_compat |= ++ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); ++ sb->s_dirt = 1; ++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); ++ unlock_super(sb); ++} ++ ++/* ++ * ext3_xattr_set() ++ * ++ * Create, replace or remove an extended attribute for this inode. Buffer ++ * is NULL to remove an existing extended attribute, and non-NULL to ++ * either replace an existing extended attribute, or create a new extended ++ * attribute. The flags XATTR_REPLACE and XATTR_CREATE ++ * specify that an extended attribute must exist and must not exist ++ * previous to the call, respectively. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++int ++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, ++ const char *name, const void *value, size_t value_len, int flags) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *bh = NULL; ++ struct ext3_xattr_header *header = NULL; ++ struct ext3_xattr_entry *here, *last; ++ unsigned int name_len; ++ int min_offs = sb->s_blocksize, not_found = 1, free, error; ++ char *end; ++ ++ /* ++ * header -- Points either into bh, or to a temporarily ++ * allocated buffer. ++ * here -- The named entry found, or the place for inserting, within ++ * the block pointed to by header. ++ * last -- Points right after the last named entry within the block ++ * pointed to by header. ++ * min_offs -- The offset of the first value (values are aligned ++ * towards the end of the block). ++ * end -- Points right after the block pointed to by header. ++ */ ++ ++ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", ++ name_index, name, value, (long)value_len); ++ ++ if (IS_RDONLY(inode)) ++ return -EROFS; ++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) ++ return -EPERM; ++ if (value == NULL) ++ value_len = 0; ++ if (name == NULL) ++ return -EINVAL; ++ name_len = strlen(name); ++ if (name_len > 255 || value_len > sb->s_blocksize) ++ return -ERANGE; ++ down(&ext3_xattr_sem); ++ ++ if (EXT3_I(inode)->i_file_acl) { ++ /* The inode already has an extended attribute block. */ ++ int block = EXT3_I(inode)->i_file_acl; ++ ++ bh = sb_bread(sb, block); ++ error = -EIO; ++ if (!bh) ++ goto cleanup; ++ ea_bdebug(bh, "b_count=%d, refcount=%d", ++ atomic_read(&(bh->b_count)), ++ le32_to_cpu(HDR(bh)->h_refcount)); ++ header = HDR(bh); ++ end = bh->b_data + bh->b_size; ++ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ header->h_blocks != cpu_to_le32(1)) { ++bad_block: ext3_error(sb, "ext3_xattr_set", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ error = -EIO; ++ goto cleanup; ++ } ++ /* Find the named attribute. */ ++ here = FIRST_ENTRY(bh); ++ while (!IS_LAST_ENTRY(here)) { ++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!here->e_value_block && here->e_value_size) { ++ int offs = le16_to_cpu(here->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ not_found = name_index - here->e_name_index; ++ if (!not_found) ++ not_found = name_len - here->e_name_len; ++ if (!not_found) ++ not_found = memcmp(name, here->e_name,name_len); ++ if (not_found <= 0) ++ break; ++ here = next; ++ } ++ last = here; ++ /* We still need to compute min_offs and last. */ ++ while (!IS_LAST_ENTRY(last)) { ++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); ++ if ((char *)next >= end) ++ goto bad_block; ++ if (!last->e_value_block && last->e_value_size) { ++ int offs = le16_to_cpu(last->e_value_offs); ++ if (offs < min_offs) ++ min_offs = offs; ++ } ++ last = next; ++ } ++ ++ /* Check whether we have enough space left. */ ++ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); ++ } else { ++ /* We will use a new extended attribute block. */ ++ free = sb->s_blocksize - ++ sizeof(struct ext3_xattr_header) - sizeof(__u32); ++ here = last = NULL; /* avoid gcc uninitialized warning. */ ++ } ++ ++ if (not_found) { ++ /* Request to remove a nonexistent attribute? */ ++ error = -ENODATA; ++ if (flags & XATTR_REPLACE) ++ goto cleanup; ++ error = 0; ++ if (value == NULL) ++ goto cleanup; ++ else ++ free -= EXT3_XATTR_LEN(name_len); ++ } else { ++ /* Request to create an existing attribute? */ ++ error = -EEXIST; ++ if (flags & XATTR_CREATE) ++ goto cleanup; ++ if (!here->e_value_block && here->e_value_size) { ++ unsigned int size = le32_to_cpu(here->e_value_size); ++ ++ if (le16_to_cpu(here->e_value_offs) + size > ++ sb->s_blocksize || size > sb->s_blocksize) ++ goto bad_block; ++ free += EXT3_XATTR_SIZE(size); ++ } ++ } ++ free -= EXT3_XATTR_SIZE(value_len); ++ error = -ENOSPC; ++ if (free < 0) ++ goto cleanup; ++ ++ /* Here we know that we can set the new attribute. */ ++ ++ if (header) { ++ if (header->h_refcount == cpu_to_le32(1)) { ++ ea_bdebug(bh, "modifying in-place"); ++ ext3_xattr_cache_remove(bh); ++ error = ext3_journal_get_write_access(handle, bh); ++ if (error) ++ goto cleanup; ++ } else { ++ int offset; ++ ++ ea_bdebug(bh, "cloning"); ++ header = kmalloc(bh->b_size, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memcpy(header, HDR(bh), bh->b_size); ++ header->h_refcount = cpu_to_le32(1); ++ offset = (char *)header - bh->b_data; ++ here = ENTRY((char *)here + offset); ++ last = ENTRY((char *)last + offset); ++ } ++ } else { ++ /* Allocate a buffer where we construct the new block. */ ++ header = kmalloc(sb->s_blocksize, GFP_KERNEL); ++ error = -ENOMEM; ++ if (header == NULL) ++ goto cleanup; ++ memset(header, 0, sb->s_blocksize); ++ end = (char *)header + sb->s_blocksize; ++ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); ++ header->h_blocks = header->h_refcount = cpu_to_le32(1); ++ last = here = ENTRY(header+1); ++ } ++ ++ if (not_found) { ++ /* Insert the new name. */ ++ int size = EXT3_XATTR_LEN(name_len); ++ int rest = (char *)last - (char *)here; ++ memmove((char *)here + size, here, rest); ++ memset(here, 0, size); ++ here->e_name_index = name_index; ++ here->e_name_len = name_len; ++ memcpy(here->e_name, name, name_len); ++ } else { ++ /* Remove the old value. */ ++ if (!here->e_value_block && here->e_value_size) { ++ char *first_val = (char *)header + min_offs; ++ int offs = le16_to_cpu(here->e_value_offs); ++ char *val = (char *)header + offs; ++ size_t size = EXT3_XATTR_SIZE( ++ le32_to_cpu(here->e_value_size)); ++ memmove(first_val + size, first_val, val - first_val); ++ memset(first_val, 0, size); ++ here->e_value_offs = 0; ++ min_offs += size; ++ ++ /* Adjust all value offsets. */ ++ last = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(last)) { ++ int o = le16_to_cpu(last->e_value_offs); ++ if (!last->e_value_block && o < offs) ++ last->e_value_offs = ++ cpu_to_le16(o + size); ++ last = EXT3_XATTR_NEXT(last); ++ } ++ } ++ if (value == NULL) { ++ /* Remove this attribute. */ ++ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { ++ /* This block is now empty. */ ++ error = ext3_xattr_set2(handle, inode, bh,NULL); ++ goto cleanup; ++ } else { ++ /* Remove the old name. */ ++ int size = EXT3_XATTR_LEN(name_len); ++ last = ENTRY((char *)last - size); ++ memmove(here, (char*)here + size, ++ (char*)last - (char*)here); ++ memset(last, 0, size); ++ } ++ } ++ } ++ ++ if (value != NULL) { ++ /* Insert the new value. */ ++ here->e_value_size = cpu_to_le32(value_len); ++ if (value_len) { ++ size_t size = EXT3_XATTR_SIZE(value_len); ++ char *val = (char *)header + min_offs - size; ++ here->e_value_offs = ++ cpu_to_le16((char *)val - (char *)header); ++ memset(val + size - EXT3_XATTR_PAD, 0, ++ EXT3_XATTR_PAD); /* Clear the pad bytes. */ ++ memcpy(val, value, value_len); ++ } ++ } ++ ext3_xattr_rehash(header, here); ++ ++ error = ext3_xattr_set2(handle, inode, bh, header); ++ ++cleanup: ++ brelse(bh); ++ if (!(bh && header == HDR(bh))) ++ kfree(header); ++ up(&ext3_xattr_sem); ++ ++ return error; ++} ++ ++/* ++ * Second half of ext3_xattr_set(): Update the file system. ++ */ ++static int ++ext3_xattr_set2(handle_t *handle, struct inode *inode, ++ struct buffer_head *old_bh, struct ext3_xattr_header *header) ++{ ++ struct super_block *sb = inode->i_sb; ++ struct buffer_head *new_bh = NULL; ++ int error; ++ ++ if (header) { ++ new_bh = ext3_xattr_cache_find(inode, header); ++ if (new_bh) { ++ /* ++ * We found an identical block in the cache. ++ * The old block will be released after updating ++ * the inode. ++ */ ++ ea_bdebug(old_bh, "reusing block %ld", ++ new_bh->b_blocknr); ++ ++ error = -EDQUOT; ++ if (DQUOT_ALLOC_BLOCK(inode, 1)) ++ goto cleanup; ++ ++ error = ext3_journal_get_write_access(handle, new_bh); ++ if (error) ++ goto cleanup; ++ HDR(new_bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); ++ ea_bdebug(new_bh, "refcount now=%d", ++ le32_to_cpu(HDR(new_bh)->h_refcount)); ++ } else if (old_bh && header == HDR(old_bh)) { ++ /* Keep this block. */ ++ new_bh = old_bh; ++ ext3_xattr_cache_insert(new_bh); ++ } else { ++ /* We need to allocate a new block */ ++ int block; ++ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + ++ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); ++ ++ block = ext3_new_block(handle, inode, goal, 0, ++ 0, &error); ++ if (error) ++ goto cleanup; ++ ea_idebug(inode, "creating block %d", block); ++ ++ new_bh = sb_getblk(sb, block); ++ if (!new_bh) { ++getblk_failed: ++ ext3_free_blocks(handle, inode, block, 1); ++ error = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(new_bh); ++ error = ext3_journal_get_create_access(handle, new_bh); ++ if (error) { ++ unlock_buffer(new_bh); ++ goto getblk_failed; ++ } ++ memcpy(new_bh->b_data, header, new_bh->b_size); ++ set_buffer_uptodate(new_bh); ++ unlock_buffer(new_bh); ++ ext3_xattr_cache_insert(new_bh); ++ ++ ext3_xattr_update_super_block(handle, sb); ++ } ++ error = ext3_journal_dirty_metadata(handle, new_bh); ++ if (error) ++ goto cleanup; ++ } ++ ++ /* Update the inode. */ ++ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; ++ inode->i_ctime = CURRENT_TIME; ++ ext3_mark_inode_dirty(handle, inode); ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++ error = 0; ++ if (old_bh && old_bh != new_bh) { ++ /* ++ * If there was an old block, and we are not still using it, ++ * we now release the old block. ++ */ ++ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); ++ ++ error = ext3_journal_get_write_access(handle, old_bh); ++ if (error) ++ goto cleanup; ++ if (refcount == 1) { ++ /* Free the old block. */ ++ ea_bdebug(old_bh, "freeing"); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ++ /* ext3_forget() calls bforget() for us, but we ++ let our caller release old_bh, so we need to ++ duplicate the handle before. */ ++ get_bh(old_bh); ++ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); ++ } else { ++ /* Decrement the refcount only. */ ++ refcount--; ++ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); ++ DQUOT_FREE_BLOCK(inode, 1); ++ ext3_journal_dirty_metadata(handle, old_bh); ++ ea_bdebug(old_bh, "refcount now=%d", refcount); ++ } ++ } ++ ++cleanup: ++ if (old_bh != new_bh) ++ brelse(new_bh); ++ ++ return error; ++} ++ ++/* ++ * ext3_xattr_delete_inode() ++ * ++ * Free extended attribute resources associated with this inode. This ++ * is called immediately before an inode is freed. ++ */ ++void ++ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ++{ ++ struct buffer_head *bh; ++ unsigned int block = EXT3_I(inode)->i_file_acl; ++ ++ if (!block) ++ return; ++ down(&ext3_xattr_sem); ++ ++ bh = sb_bread(inode->i_sb, block); ++ if (!bh) { ++ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ++ "inode %ld: block %d read error", inode->i_ino, block); ++ goto cleanup; ++ } ++ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); ++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || ++ HDR(bh)->h_blocks != cpu_to_le32(1)) { ++ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", ++ "inode %ld: bad block %d", inode->i_ino, block); ++ goto cleanup; ++ } ++ ext3_journal_get_write_access(handle, bh); ++ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { ++ ext3_xattr_cache_remove(bh); ++ ext3_free_blocks(handle, inode, block, 1); ++ ext3_forget(handle, 1, inode, bh, block); ++ bh = NULL; ++ } else { ++ HDR(bh)->h_refcount = cpu_to_le32( ++ le32_to_cpu(HDR(bh)->h_refcount) - 1); ++ ext3_journal_dirty_metadata(handle, bh); ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ DQUOT_FREE_BLOCK(inode, 1); ++ } ++ EXT3_I(inode)->i_file_acl = 0; ++ ++cleanup: ++ brelse(bh); ++ up(&ext3_xattr_sem); ++} ++ ++/* ++ * ext3_xattr_put_super() ++ * ++ * This is called when a file system is unmounted. ++ */ ++void ++ext3_xattr_put_super(struct super_block *sb) ++{ ++ mb_cache_shrink(ext3_xattr_cache, sb->s_bdev); ++} ++ ++/* ++ * ext3_xattr_cache_insert() ++ * ++ * Create a new entry in the extended attribute cache, and insert ++ * it unless such an entry is already in the cache. ++ * ++ * Returns 0, or a negative error number on failure. ++ */ ++static int ++ext3_xattr_cache_insert(struct buffer_head *bh) ++{ ++ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); ++ struct mb_cache_entry *ce; ++ int error; ++ ++ ce = mb_cache_entry_alloc(ext3_xattr_cache); ++ if (!ce) ++ return -ENOMEM; ++ error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); ++ if (error) { ++ mb_cache_entry_free(ce); ++ if (error == -EBUSY) { ++ ea_bdebug(bh, "already in cache (%d cache entries)", ++ atomic_read(&ext3_xattr_cache->c_entry_count)); ++ error = 0; ++ } ++ } else { ++ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, ++ atomic_read(&ext3_xattr_cache->c_entry_count)); ++ mb_cache_entry_release(ce); ++ } ++ return error; ++} ++ ++/* ++ * ext3_xattr_cmp() ++ * ++ * Compare two extended attribute blocks for equality. ++ * ++ * Returns 0 if the blocks are equal, 1 if they differ, and ++ * a negative error number on errors. ++ */ ++static int ++ext3_xattr_cmp(struct ext3_xattr_header *header1, ++ struct ext3_xattr_header *header2) ++{ ++ struct ext3_xattr_entry *entry1, *entry2; ++ ++ entry1 = ENTRY(header1+1); ++ entry2 = ENTRY(header2+1); ++ while (!IS_LAST_ENTRY(entry1)) { ++ if (IS_LAST_ENTRY(entry2)) ++ return 1; ++ if (entry1->e_hash != entry2->e_hash || ++ entry1->e_name_len != entry2->e_name_len || ++ entry1->e_value_size != entry2->e_value_size || ++ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) ++ return 1; ++ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) ++ return -EIO; ++ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), ++ (char *)header2 + le16_to_cpu(entry2->e_value_offs), ++ le32_to_cpu(entry1->e_value_size))) ++ return 1; ++ ++ entry1 = EXT3_XATTR_NEXT(entry1); ++ entry2 = EXT3_XATTR_NEXT(entry2); ++ } ++ if (!IS_LAST_ENTRY(entry2)) ++ return 1; ++ return 0; ++} ++ ++/* ++ * ext3_xattr_cache_find() ++ * ++ * Find an identical extended attribute block. ++ * ++ * Returns a pointer to the block found, or NULL if such a block was ++ * not found or an error occurred. ++ */ ++static struct buffer_head * ++ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) ++{ ++ __u32 hash = le32_to_cpu(header->h_hash); ++ struct mb_cache_entry *ce; ++ ++ if (!header->h_hash) ++ return NULL; /* never share */ ++ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); ++ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_bdev, hash); ++ while (ce) { ++ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); ++ ++ if (!bh) { ++ ext3_error(inode->i_sb, "ext3_xattr_cache_find", ++ "inode %ld: block %ld read error", ++ inode->i_ino, (unsigned long) ce->e_block); ++ } else if (le32_to_cpu(HDR(bh)->h_refcount) > ++ EXT3_XATTR_REFCOUNT_MAX) { ++ ea_idebug(inode, "block %ld refcount %d>%d", ++ (unsigned long) ce->e_block, ++ le32_to_cpu(HDR(bh)->h_refcount), ++ EXT3_XATTR_REFCOUNT_MAX); ++ } else if (!ext3_xattr_cmp(header, HDR(bh))) { ++ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); ++ mb_cache_entry_release(ce); ++ return bh; ++ } ++ brelse(bh); ++ ce = mb_cache_entry_find_next(ce, 0, inode->i_bdev, hash); ++ } ++ return NULL; ++} ++ ++/* ++ * ext3_xattr_cache_remove() ++ * ++ * Remove the cache entry of a block from the cache. Called when a ++ * block becomes invalid. ++ */ ++static void ++ext3_xattr_cache_remove(struct buffer_head *bh) ++{ ++ struct mb_cache_entry *ce; ++ ++ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, ++ bh->b_blocknr); ++ if (ce) { ++ ea_bdebug(bh, "removing (%d cache entries remaining)", ++ atomic_read(&ext3_xattr_cache->c_entry_count)-1); ++ mb_cache_entry_free(ce); ++ } else ++ ea_bdebug(bh, "no cache entry"); ++} ++ ++#define NAME_HASH_SHIFT 5 ++#define VALUE_HASH_SHIFT 16 ++ ++/* ++ * ext3_xattr_hash_entry() ++ * ++ * Compute the hash of an extended attribute. ++ */ ++static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, ++ struct ext3_xattr_entry *entry) ++{ ++ __u32 hash = 0; ++ char *name = entry->e_name; ++ int n; ++ ++ for (n=0; n < entry->e_name_len; n++) { ++ hash = (hash << NAME_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ ++ *name++; ++ } ++ ++ if (entry->e_value_block == 0 && entry->e_value_size != 0) { ++ __u32 *value = (__u32 *)((char *)header + ++ le16_to_cpu(entry->e_value_offs)); ++ for (n = (le32_to_cpu(entry->e_value_size) + ++ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { ++ hash = (hash << VALUE_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ ++ le32_to_cpu(*value++); ++ } ++ } ++ entry->e_hash = cpu_to_le32(hash); ++} ++ ++#undef NAME_HASH_SHIFT ++#undef VALUE_HASH_SHIFT ++ ++#define BLOCK_HASH_SHIFT 16 ++ ++/* ++ * ext3_xattr_rehash() ++ * ++ * Re-compute the extended attribute hash value after an entry has changed. ++ */ ++static void ext3_xattr_rehash(struct ext3_xattr_header *header, ++ struct ext3_xattr_entry *entry) ++{ ++ struct ext3_xattr_entry *here; ++ __u32 hash = 0; ++ ++ ext3_xattr_hash_entry(header, entry); ++ here = ENTRY(header+1); ++ while (!IS_LAST_ENTRY(here)) { ++ if (!here->e_hash) { ++ /* Block is not shared if an entry's hash value == 0 */ ++ hash = 0; ++ break; ++ } ++ hash = (hash << BLOCK_HASH_SHIFT) ^ ++ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ ++ le32_to_cpu(here->e_hash); ++ here = EXT3_XATTR_NEXT(here); ++ } ++ header->h_hash = cpu_to_le32(hash); ++} ++ ++#undef BLOCK_HASH_SHIFT ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_set); ++EXPORT_SYMBOL(ext3_bread); ++ ++int __init ++init_ext3_xattr(void) ++{ ++ int err; ++ ++ err = ext3_xattr_register(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); ++ if (err) ++ return err; ++ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, ++ sizeof(struct mb_cache_entry) + ++ sizeof(struct mb_cache_entry_index), 1, 6); ++ if (!ext3_xattr_cache) { ++ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); ++ return -ENOMEM; ++ } ++ ++ return 0; ++} ++ ++void ++exit_ext3_xattr(void) ++{ ++ if (ext3_xattr_cache) ++ mb_cache_destroy(ext3_xattr_cache); ++ ext3_xattr_cache = NULL; ++ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, &ext3_xattr_user_handler); ++} ++ +diff -Nru a/fs/ext3/xattr.h b/fs/ext3/xattr.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/fs/ext3/xattr.h Sat Jan 4 18:24:12 2003 +@@ -0,0 +1,133 @@ ++/* ++ File: fs/ext3/xattr.h ++ ++ On-disk format of extended attributes for the ext3 filesystem. ++ ++ (C) 2001 Andreas Gruenbacher, ++*/ ++ ++#include ++#include ++ ++/* Magic value in attribute blocks */ ++#define EXT3_XATTR_MAGIC 0xEA020000 ++ ++/* Maximum number of references to one attribute block */ ++#define EXT3_XATTR_REFCOUNT_MAX 1024 ++ ++/* Name indexes */ ++#define EXT3_XATTR_INDEX_MAX 10 ++#define EXT3_XATTR_INDEX_USER 1 ++#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 ++#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 ++ ++struct ext3_xattr_header { ++ __u32 h_magic; /* magic number for identification */ ++ __u32 h_refcount; /* reference count */ ++ __u32 h_blocks; /* number of disk blocks used */ ++ __u32 h_hash; /* hash value of all attributes */ ++ __u32 h_reserved[4]; /* zero right now */ ++}; ++ ++struct ext3_xattr_entry { ++ __u8 e_name_len; /* length of name */ ++ __u8 e_name_index; /* attribute name index */ ++ __u16 e_value_offs; /* offset in disk block of value */ ++ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ ++ __u32 e_value_size; /* size of attribute value */ ++ __u32 e_hash; /* hash value of name and value */ ++ char e_name[0]; /* attribute name */ ++}; ++ ++#define EXT3_XATTR_PAD_BITS 2 ++#define EXT3_XATTR_PAD (1<e_name_len)) ) ++#define EXT3_XATTR_SIZE(size) \ ++ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) ++ ++# ifdef CONFIG_EXT3_FS_XATTR ++ ++struct ext3_xattr_handler { ++ char *prefix; ++ size_t (*list)(char *list, struct inode *inode, const char *name, ++ int name_len); ++ int (*get)(struct inode *inode, const char *name, void *buffer, ++ size_t size); ++ int (*set)(struct inode *inode, const char *name, const void *buffer, ++ size_t size, int flags); ++}; ++ ++extern int ext3_xattr_register(int, struct ext3_xattr_handler *); ++extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); ++ ++extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int); ++extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); ++extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); ++extern int ext3_removexattr(struct dentry *, const char *); ++ ++extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); ++extern int ext3_xattr_list(struct inode *, char *, size_t); ++extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); ++ ++extern void ext3_xattr_delete_inode(handle_t *, struct inode *); ++extern void ext3_xattr_put_super(struct super_block *); ++ ++extern int init_ext3_xattr(void); ++extern void exit_ext3_xattr(void); ++ ++# else /* CONFIG_EXT3_FS_XATTR */ ++# define ext3_setxattr NULL ++# define ext3_getxattr NULL ++# define ext3_listxattr NULL ++# define ext3_removexattr NULL ++ ++static inline int ++ext3_xattr_get(struct inode *inode, int name_index, const char *name, ++ void *buffer, size_t size, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline int ++ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline int ++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, ++ const char *name, const void *value, size_t size, int flags) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline void ++ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) ++{ ++} ++ ++static inline void ++ext3_xattr_put_super(struct super_block *sb) ++{ ++} ++ ++static inline int ++init_ext3_xattr(void) ++{ ++ return 0; ++} ++ ++static inline void ++exit_ext3_xattr(void) ++{ ++} ++ ++# endif /* CONFIG_EXT3_FS_XATTR */ ++ ++extern struct ext3_xattr_handler ext3_xattr_user_handler; +diff -Nru a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/fs/ext3/xattr_user.c Sat Jan 4 18:24:12 2003 +@@ -0,0 +1,99 @@ ++/* ++ * linux/fs/ext3/xattr_user.c ++ * Handler for extended user attributes. ++ * ++ * Copyright (C) 2001 by Andreas Gruenbacher, ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "xattr.h" ++ ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++# include ++#endif ++ ++#define XATTR_USER_PREFIX "user." ++ ++static size_t ++ext3_xattr_user_list(char *list, struct inode *inode, ++ const char *name, int name_len) ++{ ++ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; ++ ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return 0; ++ ++ if (list) { ++ memcpy(list, XATTR_USER_PREFIX, prefix_len); ++ memcpy(list+prefix_len, name, name_len); ++ } ++ return prefix_len + name_len; ++} ++ ++static int ++ext3_xattr_user_get(struct inode *inode, const char *name, ++ void *buffer, size_t size) ++{ ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -EOPNOTSUPP; ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++ error = ext3_permission_locked(inode, MAY_READ); ++#else ++ error = permission(inode, MAY_READ); ++#endif ++ if (error) ++ return error; ++ ++ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, ++ buffer, size); ++} ++ ++static int ++ext3_xattr_user_set(struct inode *inode, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ handle_t *handle; ++ int error; ++ ++ if (strcmp(name, "") == 0) ++ return -EINVAL; ++ if (!test_opt(inode->i_sb, XATTR_USER)) ++ return -EOPNOTSUPP; ++ if ( !S_ISREG(inode->i_mode) && ++ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) ++ return -EPERM; ++#ifdef CONFIG_EXT3_FS_POSIX_ACL ++ error = ext3_permission_locked(inode, MAY_WRITE); ++#else ++ error = permission(inode, MAY_WRITE); ++#endif ++ if (error) ++ return error; ++ ++ lock_kernel(); ++ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, ++ value, size, flags); ++ ext3_journal_stop(handle, inode); ++ unlock_kernel(); ++ ++ return error; ++} ++ ++struct ext3_xattr_handler ext3_xattr_user_handler = { ++ prefix: XATTR_USER_PREFIX, ++ list: ext3_xattr_user_list, ++ get: ext3_xattr_user_get, ++ set: ext3_xattr_user_set, ++}; +diff -Nru a/fs/mbcache.c b/fs/mbcache.c +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/fs/mbcache.c Sat Jan 4 18:24:12 2003 +@@ -0,0 +1,702 @@ ++/* ++ * linux/fs/mbcache.c ++ * (C) 2001-2002 Andreas Gruenbacher, ++ */ ++ ++/* ++ * Filesystem Meta Information Block Cache (mbcache) ++ * ++ * The mbcache caches blocks of block devices that need to be located ++ * by their device/block number, as well as by other criteria (such ++ * as the block's contents). ++ * ++ * There can only be one cache entry in a cache per device and block number. ++ * Additional indexes need not be unique in this sense. The number of ++ * additional indexes (=other criteria) can be hardwired (at compile time) ++ * or specified at cache create time. ++ * ++ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' ++ * in the cache. A valid entry is in the main hash tables of the cache, ++ * and may also be in the lru list. An invalid entry is not in any hashes ++ * or lists. ++ * ++ * A valid cache entry is only in the lru list if no handles refer to it. ++ * Invalid cache entries will be freed when the last handle to the cache ++ * entry is released. ++ */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++#ifdef MB_CACHE_DEBUG ++# define mb_debug(f...) do { \ ++ printk(KERN_DEBUG f); \ ++ printk("\n"); \ ++ } while (0) ++#define mb_assert(c) do { if (!(c)) \ ++ printk(KERN_ERR "assertion " #c " failed\n"); \ ++ } while(0) ++#else ++# define mb_debug(f...) do { } while(0) ++# define mb_assert(c) do { } while(0) ++#endif ++#define mb_error(f...) do { \ ++ printk(KERN_ERR f); \ ++ printk("\n"); \ ++ } while(0) ++ ++MODULE_AUTHOR("Andreas Gruenbacher "); ++MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); ++MODULE_LICENSE("GPL"); ++ ++EXPORT_SYMBOL(mb_cache_create); ++EXPORT_SYMBOL(mb_cache_shrink); ++EXPORT_SYMBOL(mb_cache_destroy); ++EXPORT_SYMBOL(mb_cache_entry_alloc); ++EXPORT_SYMBOL(mb_cache_entry_insert); ++EXPORT_SYMBOL(mb_cache_entry_release); ++EXPORT_SYMBOL(mb_cache_entry_takeout); ++EXPORT_SYMBOL(mb_cache_entry_free); ++EXPORT_SYMBOL(mb_cache_entry_dup); ++EXPORT_SYMBOL(mb_cache_entry_get); ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++EXPORT_SYMBOL(mb_cache_entry_find_first); ++EXPORT_SYMBOL(mb_cache_entry_find_next); ++#endif ++ ++ ++/* ++ * Global data: list of all mbcache's, lru list, and a spinlock for ++ * accessing cache data structures on SMP machines. (The lru list is ++ * global across all mbcaches.) ++ */ ++ ++static LIST_HEAD(mb_cache_list); ++static LIST_HEAD(mb_cache_lru_list); ++static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; ++static struct shrinker *mb_shrinker; ++ ++static inline int ++mb_cache_indexes(struct mb_cache *cache) ++{ ++#ifdef MB_CACHE_INDEXES_COUNT ++ return MB_CACHE_INDEXES_COUNT; ++#else ++ return cache->c_indexes_count; ++#endif ++} ++ ++/* ++ * What the mbcache registers as to get shrunk dynamically. ++ */ ++ ++static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask); ++ ++static inline void ++__mb_cache_entry_takeout_lru(struct mb_cache_entry *ce) ++{ ++ if (!list_empty(&ce->e_lru_list)) ++ list_del_init(&ce->e_lru_list); ++} ++ ++ ++static inline void ++__mb_cache_entry_into_lru(struct mb_cache_entry *ce) ++{ ++ list_add(&ce->e_lru_list, &mb_cache_lru_list); ++} ++ ++ ++static inline int ++__mb_cache_entry_in_lru(struct mb_cache_entry *ce) ++{ ++ return (!list_empty(&ce->e_lru_list)); ++} ++ ++ ++/* ++ * Insert the cache entry into all hashes. ++ */ ++static inline void ++__mb_cache_entry_link(struct mb_cache_entry *ce) ++{ ++ struct mb_cache *cache = ce->e_cache; ++ unsigned int bucket; ++ int n; ++ ++ bucket = hash_long((unsigned long)ce->e_bdev + ++ (ce->e_block & 0xffffff), cache->c_bucket_bits); ++ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); ++ for (n=0; ne_indexes[n].o_key, ++ cache->c_bucket_bits); ++ list_add(&ce->e_indexes[n].o_list, ++ &cache->c_indexes_hash[n][bucket]); ++ } ++} ++ ++ ++/* ++ * Remove the cache entry from all hashes. ++ */ ++static inline void ++__mb_cache_entry_unlink(struct mb_cache_entry *ce) ++{ ++ int n; ++ ++ list_del_init(&ce->e_block_list); ++ for (n = 0; n < mb_cache_indexes(ce->e_cache); n++) ++ list_del(&ce->e_indexes[n].o_list); ++} ++ ++ ++static inline int ++__mb_cache_entry_is_linked(struct mb_cache_entry *ce) ++{ ++ return (!list_empty(&ce->e_block_list)); ++} ++ ++ ++static inline struct mb_cache_entry * ++__mb_cache_entry_read(struct mb_cache_entry *ce) ++{ ++ __mb_cache_entry_takeout_lru(ce); ++ atomic_inc(&ce->e_used); ++ return ce; ++} ++ ++ ++static inline void ++__mb_cache_entry_forget(struct mb_cache_entry *ce) ++{ ++ struct mb_cache *cache = ce->e_cache; ++ ++ mb_assert(atomic_read(&ce->e_used) == 0); ++ atomic_dec(&cache->c_entry_count); ++ if (cache->c_op.free) ++ cache->c_op.free(ce); ++ kmem_cache_free(cache->c_entry_cache, ce); ++} ++ ++ ++static inline void ++__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) ++{ ++ if (atomic_dec_and_test(&ce->e_used)) { ++ if (!__mb_cache_entry_is_linked(ce)) ++ goto forget; ++ __mb_cache_entry_into_lru(ce); ++ } ++ spin_unlock(&mb_cache_spinlock); ++ return; ++forget: ++ spin_unlock(&mb_cache_spinlock); ++ __mb_cache_entry_forget(ce); ++} ++ ++ ++/* ++ * mb_cache_shrink_fn() memory pressure callback ++ * ++ * This function is called by the kernel memory management when memory ++ * gets low. ++ * ++ * @nr_to_scan: Number of objects to scan ++ * @gfp_mask: (ignored) ++ * ++ * Returns the number of objects which are present in the cache. ++ */ ++static int ++mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l; ++ int count = 0; ++ ++ spin_lock(&mb_cache_spinlock); ++ list_for_each_prev(l, &mb_cache_list) { ++ struct mb_cache *cache = ++ list_entry(l, struct mb_cache, c_cache_list); ++ mb_debug("cache %s (%d)", cache->c_name, ++ atomic_read(&cache->c_entry_count)); ++ count += atomic_read(&cache->c_entry_count); ++ } ++ mb_debug("trying to free %d entries", nr_to_scan); ++ if (nr_to_scan == 0) { ++ spin_unlock(&mb_cache_spinlock); ++ goto out; ++ } ++ while (nr_to_scan && !list_empty(&mb_cache_lru_list)) { ++ struct mb_cache_entry *ce = ++ list_entry(mb_cache_lru_list.prev, ++ struct mb_cache_entry, e_lru_list); ++ list_move(&ce->e_lru_list, &free_list); ++ if (__mb_cache_entry_is_linked(ce)) ++ __mb_cache_entry_unlink(ce); ++ nr_to_scan--; ++ } ++ spin_unlock(&mb_cache_spinlock); ++ l = free_list.prev; ++ while (l != &free_list) { ++ struct mb_cache_entry *ce = list_entry(l, ++ struct mb_cache_entry, e_lru_list); ++ l = l->prev; ++ __mb_cache_entry_forget(ce); ++ count--; ++ } ++out: ++ mb_debug("%d remaining entries ", count); ++ return count; ++} ++ ++ ++/* ++ * mb_cache_create() create a new cache ++ * ++ * All entries in one cache are equal size. Cache entries may be from ++ * multiple devices. If this is the first mbcache created, registers ++ * the cache with kernel memory management. Returns NULL if no more ++ * memory was available. ++ * ++ * @name: name of the cache (informal) ++ * @cache_op: contains the callback called when freeing a cache entry ++ * @entry_size: The size of a cache entry, including ++ * struct mb_cache_entry ++ * @indexes_count: number of additional indexes in the cache. Must equal ++ * MB_CACHE_INDEXES_COUNT if the number of indexes is ++ * hardwired. ++ * @bucket_bits: log2(number of hash buckets) ++ */ ++struct mb_cache * ++mb_cache_create(const char *name, struct mb_cache_op *cache_op, ++ size_t entry_size, int indexes_count, int bucket_bits) ++{ ++ int m=0, n, bucket_count = 1 << bucket_bits; ++ struct mb_cache *cache = NULL; ++ ++ if(entry_size < sizeof(struct mb_cache_entry) + ++ indexes_count * sizeof(struct mb_cache_entry_index)) ++ return NULL; ++ ++ cache = kmalloc(sizeof(struct mb_cache) + ++ indexes_count * sizeof(struct list_head), GFP_KERNEL); ++ if (!cache) ++ goto fail; ++ cache->c_name = name; ++ if (cache_op) ++ cache->c_op.free = cache_op->free; ++ else ++ cache->c_op.free = NULL; ++ atomic_set(&cache->c_entry_count, 0); ++ cache->c_bucket_bits = bucket_bits; ++#ifdef MB_CACHE_INDEXES_COUNT ++ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); ++#else ++ cache->c_indexes_count = indexes_count; ++#endif ++ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), ++ GFP_KERNEL); ++ if (!cache->c_block_hash) ++ goto fail; ++ for (n=0; nc_block_hash[n]); ++ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * ++ sizeof(struct list_head), ++ GFP_KERNEL); ++ if (!cache->c_indexes_hash[m]) ++ goto fail; ++ for (n=0; nc_indexes_hash[m][n]); ++ } ++ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, ++ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); ++ if (!cache->c_entry_cache) ++ goto fail; ++ ++ spin_lock(&mb_cache_spinlock); ++ if (list_empty(&mb_cache_list)) { ++ if (mb_shrinker) { ++ printk(KERN_ERR "%s: already have a shrinker!\n", ++ __FUNCTION__); ++ remove_shrinker(mb_shrinker); ++ } ++ mb_shrinker = set_shrinker(DEFAULT_SEEKS, mb_cache_shrink_fn); ++ } ++ list_add(&cache->c_cache_list, &mb_cache_list); ++ spin_unlock(&mb_cache_spinlock); ++ return cache; ++ ++fail: ++ if (cache) { ++ while (--m >= 0) ++ kfree(cache->c_indexes_hash[m]); ++ if (cache->c_block_hash) ++ kfree(cache->c_block_hash); ++ kfree(cache); ++ } ++ return NULL; ++} ++ ++ ++/* ++ * mb_cache_shrink() ++ * ++ * Removes all cache entires of a device from the cache. All cache entries ++ * currently in use cannot be freed, and thus remain in the cache. All others ++ * are freed. ++ * ++ * @cache: which cache to shrink ++ * @bdev: which device's cache entries to shrink ++ */ ++void ++mb_cache_shrink(struct mb_cache *cache, struct block_device *bdev) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l; ++ ++ spin_lock(&mb_cache_spinlock); ++ l = mb_cache_lru_list.prev; ++ while (l != &mb_cache_lru_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ l = l->prev; ++ if (ce->e_bdev == bdev) { ++ list_move(&ce->e_lru_list, &free_list); ++ if (__mb_cache_entry_is_linked(ce)) ++ __mb_cache_entry_unlink(ce); ++ } ++ } ++ spin_unlock(&mb_cache_spinlock); ++ l = free_list.prev; ++ while (l != &free_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ l = l->prev; ++ __mb_cache_entry_forget(ce); ++ } ++} ++ ++ ++/* ++ * mb_cache_destroy() ++ * ++ * Shrinks the cache to its minimum possible size (hopefully 0 entries), ++ * and then destroys it. If this was the last mbcache, un-registers the ++ * mbcache from kernel memory management. ++ */ ++void ++mb_cache_destroy(struct mb_cache *cache) ++{ ++ LIST_HEAD(free_list); ++ struct list_head *l; ++ int n; ++ ++ spin_lock(&mb_cache_spinlock); ++ l = mb_cache_lru_list.prev; ++ while (l != &mb_cache_lru_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ l = l->prev; ++ if (ce->e_cache == cache) { ++ list_move(&ce->e_lru_list, &free_list); ++ if (__mb_cache_entry_is_linked(ce)) ++ __mb_cache_entry_unlink(ce); ++ } ++ } ++ list_del(&cache->c_cache_list); ++ if (list_empty(&mb_cache_list) && mb_shrinker) { ++ remove_shrinker(mb_shrinker); ++ mb_shrinker = 0; ++ } ++ spin_unlock(&mb_cache_spinlock); ++ ++ l = free_list.prev; ++ while (l != &free_list) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_lru_list); ++ l = l->prev; ++ __mb_cache_entry_forget(ce); ++ } ++ ++ if (atomic_read(&cache->c_entry_count) > 0) { ++ mb_error("cache %s: %d orphaned entries", ++ cache->c_name, ++ atomic_read(&cache->c_entry_count)); ++ } ++ ++ kmem_cache_destroy(cache->c_entry_cache); ++ ++ for (n=0; n < mb_cache_indexes(cache); n++) ++ kfree(cache->c_indexes_hash[n]); ++ kfree(cache->c_block_hash); ++ ++ kfree(cache); ++} ++ ++ ++/* ++ * mb_cache_entry_alloc() ++ * ++ * Allocates a new cache entry. The new entry will not be valid initially, ++ * and thus cannot be looked up yet. It should be filled with data, and ++ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL ++ * if no more memory was available. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_alloc(struct mb_cache *cache) ++{ ++ struct mb_cache_entry *ce; ++ ++ atomic_inc(&cache->c_entry_count); ++ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); ++ if (ce) { ++ INIT_LIST_HEAD(&ce->e_lru_list); ++ INIT_LIST_HEAD(&ce->e_block_list); ++ ce->e_cache = cache; ++ atomic_set(&ce->e_used, 1); ++ } ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_insert() ++ * ++ * Inserts an entry that was allocated using mb_cache_entry_alloc() into ++ * the cache. After this, the cache entry can be looked up, but is not yet ++ * in the lru list as the caller still holds a handle to it. Returns 0 on ++ * success, or -EBUSY if a cache entry for that device + inode exists ++ * already (this may happen after a failed lookup, but when another process ++ * has inserted the same cache entry in the meantime). ++ * ++ * @bdev: device the cache entry belongs to ++ * @block: block number ++ * @keys: array of additional keys. There must be indexes_count entries ++ * in the array (as specified when creating the cache). ++ */ ++int ++mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, ++ sector_t block, unsigned int keys[]) ++{ ++ struct mb_cache *cache = ce->e_cache; ++ unsigned int bucket; ++ struct list_head *l; ++ int error = -EBUSY, n; ++ ++ bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), ++ cache->c_bucket_bits); ++ spin_lock(&mb_cache_spinlock); ++ list_for_each_prev(l, &cache->c_block_hash[bucket]) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, e_block_list); ++ if (ce->e_bdev == bdev && ce->e_block == block) ++ goto out; ++ } ++ mb_assert(!__mb_cache_entry_is_linked(ce)); ++ ce->e_bdev = bdev; ++ ce->e_block = block; ++ for (n=0; ne_indexes[n].o_key = keys[n]; ++ __mb_cache_entry_link(ce); ++out: ++ spin_unlock(&mb_cache_spinlock); ++ return error; ++} ++ ++ ++/* ++ * mb_cache_entry_release() ++ * ++ * Release a handle to a cache entry. When the last handle to a cache entry ++ * is released it is either freed (if it is invalid) or otherwise inserted ++ * in to the lru list. ++ */ ++void ++mb_cache_entry_release(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ __mb_cache_entry_release_unlock(ce); ++} ++ ++ ++/* ++ * mb_cache_entry_takeout() ++ * ++ * Take a cache entry out of the cache, making it invalid. The entry can later ++ * be re-inserted using mb_cache_entry_insert(), or released using ++ * mb_cache_entry_release(). ++ */ ++void ++mb_cache_entry_takeout(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ mb_assert(!__mb_cache_entry_in_lru(ce)); ++ if (__mb_cache_entry_is_linked(ce)) ++ __mb_cache_entry_unlink(ce); ++ spin_unlock(&mb_cache_spinlock); ++} ++ ++ ++/* ++ * mb_cache_entry_free() ++ * ++ * This is equivalent to the sequence mb_cache_entry_takeout() -- ++ * mb_cache_entry_release(). ++ */ ++void ++mb_cache_entry_free(struct mb_cache_entry *ce) ++{ ++ spin_lock(&mb_cache_spinlock); ++ mb_assert(!__mb_cache_entry_in_lru(ce)); ++ if (__mb_cache_entry_is_linked(ce)) ++ __mb_cache_entry_unlink(ce); ++ __mb_cache_entry_release_unlock(ce); ++} ++ ++ ++/* ++ * mb_cache_entry_dup() ++ * ++ * Duplicate a handle to a cache entry (does not duplicate the cache entry ++ * itself). After the call, both the old and the new handle must be released. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_dup(struct mb_cache_entry *ce) ++{ ++ atomic_inc(&ce->e_used); ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_get() ++ * ++ * Get a cache entry by device / block number. (There can only be one entry ++ * in the cache per device and block.) Returns NULL if no such cache entry ++ * exists. ++ */ ++struct mb_cache_entry * ++mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, ++ sector_t block) ++{ ++ unsigned int bucket; ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), ++ cache->c_bucket_bits); ++ spin_lock(&mb_cache_spinlock); ++ list_for_each(l, &cache->c_block_hash[bucket]) { ++ ce = list_entry(l, struct mb_cache_entry, e_block_list); ++ if (ce->e_bdev == bdev && ce->e_block == block) { ++ ce = __mb_cache_entry_read(ce); ++ goto cleanup; ++ } ++ } ++ ce = NULL; ++ ++cleanup: ++ spin_unlock(&mb_cache_spinlock); ++ return ce; ++} ++ ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++ ++static struct mb_cache_entry * ++__mb_cache_entry_find(struct list_head *l, struct list_head *head, ++ int index, struct block_device *bdev, unsigned int key) ++{ ++ while (l != head) { ++ struct mb_cache_entry *ce = ++ list_entry(l, struct mb_cache_entry, ++ e_indexes[index].o_list); ++ if (ce->e_bdev == bdev && ++ ce->e_indexes[index].o_key == key) { ++ ce = __mb_cache_entry_read(ce); ++ if (ce) ++ return ce; ++ } ++ l = l->next; ++ } ++ return NULL; ++} ++ ++ ++/* ++ * mb_cache_entry_find_first() ++ * ++ * Find the first cache entry on a given device with a certain key in ++ * an additional index. Additonal matches can be found with ++ * mb_cache_entry_find_next(). Returns NULL if no match was found. ++ * ++ * @cache: the cache to search ++ * @index: the number of the additonal index to search (0<=indexc_bucket_bits); ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ mb_assert(index < mb_cache_indexes(cache)); ++ spin_lock(&mb_cache_spinlock); ++ l = cache->c_indexes_hash[index][bucket].next; ++ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], ++ index, bdev, key); ++ spin_unlock(&mb_cache_spinlock); ++ return ce; ++} ++ ++ ++/* ++ * mb_cache_entry_find_next() ++ * ++ * Find the next cache entry on a given device with a certain key in an ++ * additional index. Returns NULL if no match could be found. The previous ++ * entry is atomatically released, so that mb_cache_entry_find_next() can ++ * be called like this: ++ * ++ * entry = mb_cache_entry_find_first(); ++ * while (entry) { ++ * ... ++ * entry = mb_cache_entry_find_next(entry, ...); ++ * } ++ * ++ * @prev: The previous match ++ * @index: the number of the additonal index to search (0<=indexe_cache; ++ unsigned int bucket = hash_long(key, cache->c_bucket_bits); ++ struct list_head *l; ++ struct mb_cache_entry *ce; ++ ++ mb_assert(index < mb_cache_indexes(cache)); ++ spin_lock(&mb_cache_spinlock); ++ l = prev->e_indexes[index].o_list.next; ++ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], ++ index, bdev, key); ++ __mb_cache_entry_release_unlock(prev); ++ return ce; ++} ++ ++#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ +diff -Nru a/fs/namei.c b/fs/namei.c +--- a/fs/namei.c Sat Jan 4 18:24:12 2003 ++++ b/fs/namei.c Sat Jan 4 18:24:12 2003 +@@ -108,6 +108,14 @@ + * POSIX.1 2.4: an empty pathname is invalid (ENOENT). + * PATH_MAX includes the nul terminator --RR. + */ ++ ++void intent_release(struct dentry *de, struct lookup_intent *it) ++{ ++ if (it && de->d_op && de->d_op->d_intent_release) ++ de->d_op->d_intent_release(de, it); ++ ++} ++ + static inline int do_getname(const char *filename, char *page) + { + int retval; +@@ -265,6 +273,9 @@ + + void path_release(struct nameidata *nd) + { ++ if (&nd->it && nd->dentry && nd->dentry->d_op && ++ nd->dentry->d_op->d_intent_release) ++ nd->dentry->d_op->d_intent_release(nd->dentry, &nd->it); + dput(nd->dentry); + mntput(nd->mnt); + } +@@ -273,10 +284,18 @@ + * Internal lookup() using the new generic dcache. + * SMP-safe + */ +-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) ++static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) + { + struct dentry * dentry = d_lookup(parent, name); + ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { ++ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && ++ !d_invalidate(dentry)) { ++ dput(dentry); ++ dentry = NULL; ++ } ++ return dentry; ++ } else + if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { + if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { + dput(dentry); +@@ -351,7 +370,7 @@ + * make sure that nobody added the entry to the dcache in the meantime.. + * SMP-safe + */ +-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) ++static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) + { + struct dentry * result; + struct inode *dir = parent->d_inode; +@@ -369,7 +388,10 @@ + struct dentry * dentry = d_alloc(parent, name); + result = ERR_PTR(-ENOMEM); + if (dentry) { +- result = dir->i_op->lookup(dir, dentry); ++ if (dir->i_op->lookup2) ++ result = dir->i_op->lookup2(dir, dentry, it); ++ else ++ result = dir->i_op->lookup(dir, dentry); + if (result) + dput(dentry); + else { +@@ -391,6 +413,12 @@ + dput(result); + result = ERR_PTR(-ENOENT); + } ++ } else if (result->d_op && result->d_op->d_revalidate2) { ++ if (!result->d_op->d_revalidate2(result, flags, it) && ++ !d_invalidate(result)) { ++ dput(result); ++ result = ERR_PTR(-ENOENT); ++ } + } + return result; + } +@@ -402,7 +430,7 @@ + * Without that kind of total limit, nasty chains of consecutive + * symlinks can cause almost arbitrarily long lookups. + */ +-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) ++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) + { + int err = -ELOOP; + if (current->link_count >= 5) +@@ -419,10 +447,15 @@ + current->link_count++; + current->total_link_count++; + UPDATE_ATIME(dentry->d_inode); +- err = dentry->d_inode->i_op->follow_link(dentry, nd); ++ if (dentry->d_inode->i_op->follow_link2) { ++ err = dentry->d_inode->i_op->follow_link2(dentry, nd); ++ } ++ else ++ err = dentry->d_inode->i_op->follow_link(dentry, nd); + current->link_count--; + return err; + loop: ++ intent_release(dentry, &nd->it); + path_release(nd); + return err; + } +@@ -523,6 +556,8 @@ + + if (!dentry) + goto dcache_miss; ++ if (dentry->d_op && dentry->d_op->d_revalidate2) ++ goto need_revalidate2; + if (dentry->d_op && dentry->d_op->d_revalidate) + goto need_revalidate; + done: +@@ -534,7 +569,12 @@ + unlock_nd(nd); + + need_lookup: +- dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); ++ if (nd->it.it_op == 0){ ++ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE, NULL); ++ } ++ else{ ++ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE, &nd->it); ++ } + if (IS_ERR(dentry)) + goto fail; + mntget(mnt); +@@ -546,6 +586,18 @@ + lock_nd(nd); + goto done; + ++need_revalidate2: ++ mntget(mnt); ++ dget_locked(dentry); ++ unlock_nd(nd); ++ if (dentry->d_op->d_revalidate2(dentry, flags, &nd->it)) ++ goto relock; ++ if (d_invalidate(dentry)) ++ goto relock; ++ dput(dentry); ++ mntput(mnt); ++ goto need_lookup; ++ + need_revalidate: + mntget(mnt); + dget_locked(dentry); +@@ -577,6 +629,7 @@ + struct inode *inode; + int err; + unsigned int lookup_flags = nd->flags; ++ int save_it_op = nd->it.it_op; + + while (*name=='/') + name++; +@@ -648,6 +701,7 @@ + break; + } + /* This does the actual lookups.. */ ++ nd->it.it_op = 0; + err = do_lookup(nd, &this, &next, &pinned, LOOKUP_CONTINUE); + if (err) + break; +@@ -662,7 +716,7 @@ + if (!inode->i_op) + break; + +- if (inode->i_op->follow_link) { ++ if (inode->i_op->follow_link || inode->i_op->follow_link2) { + mntget(next.mnt); + dget_locked(next.dentry); + unlock_nd(nd); +@@ -684,7 +738,7 @@ + nd->dentry = next.dentry; + } + err = -ENOTDIR; +- if (!inode->i_op->lookup) ++ if (!inode->i_op->lookup && !inode->i_op->lookup2) + break; + continue; + /* here ends the main loop */ +@@ -692,6 +746,7 @@ + last_with_slashes: + lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; + last_component: ++ nd->it.it_op = save_it_op; + if (lookup_flags & LOOKUP_PARENT) + goto lookup_parent; + if (this.name[0] == '.') switch (this.len) { +@@ -717,7 +772,8 @@ + follow_mount(&next.mnt, &next.dentry); + inode = next.dentry->d_inode; + if ((lookup_flags & LOOKUP_FOLLOW) +- && inode && inode->i_op && inode->i_op->follow_link) { ++ && inode && inode->i_op && ++ (inode->i_op->follow_link || inode->i_op->follow_link2)) { + mntget(next.mnt); + dget_locked(next.dentry); + unlock_nd(nd); +@@ -737,7 +793,8 @@ + break; + if (lookup_flags & LOOKUP_DIRECTORY) { + err = -ENOTDIR; +- if (!inode->i_op || !inode->i_op->lookup) ++ if (!inode->i_op || ++ (!inode->i_op->lookup && !inode->i_op->lookup2)) + break; + } + goto return_base; +@@ -886,7 +943,8 @@ + * needs parent already locked. Doesn't follow mounts. + * SMP-safe. + */ +-struct dentry * lookup_hash(struct qstr *name, struct dentry * base) ++struct dentry * lookup_hash(struct qstr *name, struct dentry * base, ++ struct lookup_intent *it) + { + struct dentry * dentry; + struct inode *inode; +@@ -909,13 +967,16 @@ + goto out; + } + +- dentry = cached_lookup(base, name, 0); ++ dentry = cached_lookup(base, name, 0, it); + if (!dentry) { + struct dentry *new = d_alloc(base, name); + dentry = ERR_PTR(-ENOMEM); + if (!new) + goto out; +- dentry = inode->i_op->lookup(inode, new); ++ if (inode->i_op->lookup2) ++ dentry = inode->i_op->lookup2(inode, new, it); ++ else ++ dentry = inode->i_op->lookup(inode, new); + if (!dentry) { + dentry = new; + security_ops->inode_post_lookup(inode, dentry); +@@ -927,7 +988,7 @@ + } + + /* SMP-safe */ +-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct lookup_intent *it) + { + unsigned long hash; + struct qstr this; +@@ -947,11 +1008,16 @@ + } + this.hash = end_name_hash(hash); + +- return lookup_hash(&this, base); ++ return lookup_hash(&this, base, it); + access: + return ERR_PTR(-EACCES); + } + ++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++{ ++ return lookup_one_len_it(name, base, len, NULL); ++} ++ + /* + * namei() + * +@@ -1268,7 +1334,9 @@ + + dir = nd->dentry; + down(&dir->d_inode->i_sem); +- dentry = lookup_hash(&nd->last, nd->dentry); ++ nd->it.it_op |= IT_CREAT; ++ nd->it.it_mode = mode; ++ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); + + do_last: + error = PTR_ERR(dentry); +@@ -1277,6 +1345,7 @@ + goto exit; + } + ++ nd->it.it_mode = mode; + /* Negative dentry, just create the file */ + if (!dentry->d_inode) { + error = vfs_create(dir->d_inode, dentry, +@@ -1310,7 +1379,8 @@ + error = -ENOENT; + if (!dentry->d_inode) + goto exit_dput; +- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) ++ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || ++ dentry->d_inode->i_op->follow_link2)) + goto do_link; + + dput(nd->dentry); +@@ -1325,8 +1395,10 @@ + return 0; + + exit_dput: ++ intent_release(dentry, &nd->it); + dput(dentry); + exit: ++ intent_release(nd->dentry, &nd->it); + path_release(nd); + return error; + +@@ -1348,7 +1420,12 @@ + if (error) + goto exit_dput; + UPDATE_ATIME(dentry->d_inode); +- error = dentry->d_inode->i_op->follow_link(dentry, nd); ++ if(dentry->d_inode->i_op->follow_link2) ++ error = dentry->d_inode->i_op->follow_link2(dentry, nd); ++ else ++ error = dentry->d_inode->i_op->follow_link(dentry, nd); ++ if (error) ++ intent_release(dentry, &nd->it); + dput(dentry); + if (error) + return error; +@@ -1370,7 +1447,7 @@ + } + dir = nd->dentry; + down(&dir->d_inode->i_sem); +- dentry = lookup_hash(&nd->last, nd->dentry); ++ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); + putname(nd->last.name); + goto do_last; + } +@@ -1384,7 +1461,7 @@ + dentry = ERR_PTR(-EEXIST); + if (nd->last_type != LAST_NORM) + goto fail; +- dentry = lookup_hash(&nd->last, nd->dentry); ++ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); + if (IS_ERR(dentry)) + goto fail; + if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) +@@ -1429,6 +1506,7 @@ + char * tmp; + struct dentry * dentry; + struct nameidata nd; ++ struct lookup_intent it = { .it_op = IT_MKNOD, .it_mode = mode }; + + if (S_ISDIR(mode)) + return -EPERM; +@@ -1436,9 +1514,14 @@ + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + ++ nd.it = it; ++ nd.it.it_mode = 0; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ nd.it = it; + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1457,6 +1540,7 @@ + default: + error = -EINVAL; + } ++ intent_release(dentry, &nd.it); + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1495,6 +1579,7 @@ + { + int error = 0; + char * tmp; ++ struct lookup_intent it = { .it_op = IT_MKDIR, .it_mode = mode }; + + tmp = getname(pathname); + error = PTR_ERR(tmp); +@@ -1502,14 +1587,19 @@ + struct dentry *dentry; + struct nameidata nd; + ++ nd.it = it; ++ nd.it.it_mode = 0; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ nd.it = it; + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + error = vfs_mkdir(nd.dentry->d_inode, dentry, + mode & ~current->fs->umask); ++ intent_release(dentry, &nd.it); + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1593,11 +1683,14 @@ + char * name; + struct dentry *dentry; + struct nameidata nd; ++ struct lookup_intent it = { .it_op = IT_RMDIR }; + + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); +- ++ ++ nd.it = it; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(name, LOOKUP_PARENT, &nd); + if (error) + goto exit; +@@ -1614,10 +1707,12 @@ + goto exit1; + } + down(&nd.dentry->d_inode->i_sem); +- dentry = lookup_hash(&nd.last, nd.dentry); ++ nd.it = it; ++ dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + error = vfs_rmdir(nd.dentry->d_inode, dentry); ++ intent_release(dentry, &nd.it); + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1663,11 +1758,14 @@ + char * name; + struct dentry *dentry; + struct nameidata nd; ++ struct lookup_intent it = { .it_op = IT_UNLINK }; + + name = getname(pathname); + if(IS_ERR(name)) + return PTR_ERR(name); + ++ nd.it = it; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(name, LOOKUP_PARENT, &nd); + if (error) + goto exit; +@@ -1675,7 +1773,9 @@ + if (nd.last_type != LAST_NORM) + goto exit1; + down(&nd.dentry->d_inode->i_sem); +- dentry = lookup_hash(&nd.last, nd.dentry); ++ ++ nd.it = it; ++ dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + /* Why not before? Because we want correct error value */ +@@ -1683,6 +1783,7 @@ + goto slashes; + error = vfs_unlink(nd.dentry->d_inode, dentry); + exit2: ++ intent_release(dentry, &nd.it); + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1727,6 +1828,7 @@ + int error = 0; + char * from; + char * to; ++ struct lookup_intent it = { .it_op = IT_SYMLINK }; + + from = getname(oldname); + if(IS_ERR(from)) +@@ -1737,13 +1839,18 @@ + struct dentry *dentry; + struct nameidata nd; + ++ nd.it = it; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ nd.it = it; ++ nd.it.it_data = from; + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { + error = vfs_symlink(nd.dentry->d_inode, dentry, from); ++ intent_release(dentry, &nd.it); + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1810,24 +1917,32 @@ + struct nameidata nd, old_nd; + int error; + char * to; ++ struct lookup_intent it = { .it_op = IT_LINK }; ++ + + to = getname(newname); + if (IS_ERR(to)) + return PTR_ERR(to); + ++ old_nd.it = it; + error = __user_walk(oldname, 0, &old_nd); + if (error) + goto exit; ++ nd.it = it; ++ nd.it.it_op = IT_LOOKUP; + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ nd.it = old_nd.it;//if __user_walk() above didn't change old_nd.it, this step is not need. ++ nd.it.it_op = IT_LINK2; + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { + error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); ++ intent_release(new_dentry, &nd.it); + dput(new_dentry); + } + up(&nd.dentry->d_inode->i_sem); +@@ -1874,7 +1989,8 @@ + * locking]. + */ + int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct lookup_intent *it) + { + int error = 0; + struct inode *target; +@@ -1902,6 +2018,7 @@ + error = -EBUSY; + else + error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); ++ intent_release(new_dentry, it); + if (target) { + if (!error) + target->i_flags |= S_DEAD; +@@ -1919,7 +2036,8 @@ + } + + int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct lookup_intent *it) + { + struct inode *target; + int error; +@@ -1944,12 +2062,14 @@ + } + if (target) + up(&target->i_sem); ++ intent_release(new_dentry, it); + dput(new_dentry); + return error; + } + + int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry, ++ struct lookup_intent *it) + { + int error; + int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); +@@ -1975,9 +2095,9 @@ + DQUOT_INIT(new_dir); + + if (is_dir) +- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); + else +- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); ++ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); + if (!error) { + if (old_dir == new_dir) + inode_dir_notify(old_dir, DN_RENAME); +@@ -1996,11 +2116,16 @@ + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; ++ struct lookup_intent it = { .it_op = IT_RENAME }; + ++ oldnd.it = it; ++ oldnd.it.it_op = IT_LOOKUP; + error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (error) + goto exit; + ++ newnd.it = it; ++ newnd.it.it_op = IT_LOOKUP; + error = path_lookup(newname, LOOKUP_PARENT, &newnd); + if (error) + goto exit1; +@@ -2020,7 +2145,8 @@ + + trap = lock_rename(new_dir, old_dir); + +- old_dentry = lookup_hash(&oldnd.last, old_dir); ++ oldnd.it = it; ++ old_dentry = lookup_hash(&oldnd.last, old_dir, &oldnd.it); + error = PTR_ERR(old_dentry); + if (IS_ERR(old_dentry)) + goto exit3; +@@ -2040,7 +2166,9 @@ + error = -EINVAL; + if (old_dentry == trap) + goto exit4; +- new_dentry = lookup_hash(&newnd.last, new_dir); ++ newnd.it = oldnd.it; ++ newnd.it.it_op = IT_RENAME2; ++ new_dentry = lookup_hash(&newnd.last, new_dir, &newnd.it); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto exit4; +@@ -2050,10 +2178,12 @@ + goto exit5; + + error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ new_dir->d_inode, new_dentry, NULL); + exit5: ++ intent_release(new_dentry, &newnd.it); + dput(new_dentry); + exit4: ++ intent_release(old_dentry, &oldnd.it); + dput(old_dentry); + exit3: + unlock_rename(new_dir, old_dir); +@@ -2135,12 +2265,17 @@ + path_release(nd); + return PTR_ERR(link); + } ++/* if call vfs_follow_link, then equal set nd->it.it_op = 0;then call __vfs_follow_link ++ * if call vfs_follow_link_it, now equal call __vfs_follow_link directly ++*/ + + int vfs_follow_link(struct nameidata *nd, const char *link) + { ++ nd->it.it_op = 0; + return __vfs_follow_link(nd, link); + } + ++ + /* get the link contents into pagecache */ + static char *page_getlink(struct dentry * dentry, struct page **ppage) + { +@@ -2180,7 +2315,9 @@ + { + struct page *page = NULL; + char *s = page_getlink(dentry, &page); +- int res = __vfs_follow_link(nd, s); ++ int res; ++ nd->it.it_op = 0; ++ res = __vfs_follow_link(nd, s); + if (page) { + kunmap(page); + page_cache_release(page); +diff -Nru a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c +--- a/fs/nfsd/vfs.c Sat Jan 4 18:24:12 2003 ++++ b/fs/nfsd/vfs.c Sat Jan 4 18:24:12 2003 +@@ -1292,7 +1292,7 @@ + err = nfserr_perm; + } else + #endif +- err = vfs_rename(fdir, odentry, tdir, ndentry); ++ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_sync_dir(tdentry); + nfsd_sync_dir(fdentry); +diff -Nru a/fs/open.c b/fs/open.c +--- a/fs/open.c Sat Jan 4 18:24:12 2003 ++++ b/fs/open.c Sat Jan 4 18:24:12 2003 +@@ -22,6 +22,8 @@ + + #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) + ++extern void intent_release(struct dentry *de, struct lookup_intent *it); ++ + int vfs_statfs(struct super_block *sb, struct statfs *buf) + { + int retval = -ENODEV; +@@ -96,7 +98,9 @@ + struct nameidata nd; + struct inode * inode; + int error; ++ struct lookup_intent it = { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = -EINVAL; + if (length < 0) /* sorry, but loff_t says... */ + goto out; +@@ -237,7 +241,9 @@ + struct nameidata nd; + struct inode * inode; + struct iattr newattrs; ++ struct lookup_intent it = { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = user_path_walk(filename, &nd); + if (error) + goto out; +@@ -283,9 +289,10 @@ + struct nameidata nd; + struct inode * inode; + struct iattr newattrs; ++ struct lookup_intent it = { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = user_path_walk(filename, &nd); +- + if (error) + goto out; + inode = nd.dentry->d_inode; +@@ -329,7 +336,9 @@ + int old_fsuid, old_fsgid; + kernel_cap_t old_cap; + int res; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + ++ nd.it = it; + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; + +@@ -374,7 +383,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + ++ nd.it = it; + error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + if (error) + goto out; +@@ -425,7 +436,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + ++ nd.it = it; + error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + if (error) + goto out; +@@ -488,7 +501,9 @@ + struct inode * inode; + int error; + struct iattr newattrs; ++ struct lookup_intent it= { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = user_path_walk(filename, &nd); + if (error) + goto out; +@@ -555,7 +570,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = user_path_walk(filename, &nd); + if (!error) { + error = chown_common(nd.dentry, user, group); +@@ -568,7 +585,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_SETATTR }; + ++ nd.it = it; + error = user_path_walk_link(filename, &nd); + if (!error) { + error = chown_common(nd.dentry, user, group); +@@ -605,11 +624,16 @@ + * for the internal routines (ie open_namei()/follow_link() etc). 00 is + * used by symlinks. + */ ++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, ++ int flags, struct lookup_intent *it); ++ + struct file *filp_open(const char * filename, int flags, int mode) + { + int namei_flags, error; + struct nameidata nd; ++ struct lookup_intent it = { .it_op = IT_OPEN }; + ++ nd.it = it; + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; +@@ -618,12 +642,13 @@ + + error = open_namei(filename, namei_flags, mode, &nd); + if (!error) +- return dentry_open(nd.dentry, nd.mnt, flags); ++ return dentry_open_it(nd.dentry, nd.mnt, flags, &nd.it); + + return ERR_PTR(error); + } + +-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, ++ int flags, struct lookup_intent *it) + { + struct file * f; + struct inode *inode; +@@ -666,6 +691,7 @@ + } + } + ++ intent_release(dentry, it); + return f; + + cleanup_all: +@@ -678,10 +704,17 @@ + cleanup_file: + put_filp(f); + cleanup_dentry: ++ intent_release(dentry, it); + dput(dentry); + mntput(mnt); + return ERR_PTR(error); + } ++ ++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++{ ++ return dentry_open_it(dentry, mnt, flags, NULL); ++} ++ + + /* + * Find an empty file descriptor entry, and mark it busy. +diff -Nru a/fs/stat.c b/fs/stat.c +--- a/fs/stat.c Sat Jan 4 18:24:12 2003 ++++ b/fs/stat.c Sat Jan 4 18:24:12 2003 +@@ -61,7 +61,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + ++ nd.it = it; + error = user_path_walk(name, &nd); + if (!error) { + error = vfs_getattr(nd.mnt, nd.dentry, stat); +@@ -74,7 +76,9 @@ + { + struct nameidata nd; + int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + ++ nd.it = it; + error = user_path_walk_link(name, &nd); + if (!error) { + error = vfs_getattr(nd.mnt, nd.dentry, stat); +diff -Nru a/fs/sysfs/inode.c b/fs/sysfs/inode.c +--- a/fs/sysfs/inode.c Sat Jan 4 18:24:12 2003 ++++ b/fs/sysfs/inode.c Sat Jan 4 18:24:12 2003 +@@ -471,7 +471,7 @@ + qstr.name = name; + qstr.len = strlen(name); + qstr.hash = full_name_hash(name,qstr.len); +- return lookup_hash(&qstr,parent); ++ return lookup_hash(&qstr,parent,NULL); + } + + /** +diff -Nru a/include/linux/dcache.h b/include/linux/dcache.h +--- a/include/linux/dcache.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/dcache.h Sat Jan 4 18:24:12 2003 +@@ -9,6 +9,24 @@ + #include + #include /* for BUG() */ + ++#define IT_OPEN (1) ++#define IT_CREAT (1<<1) ++#define IT_MKDIR (1<<2) ++#define IT_LINK (1<<3) ++#define IT_LINK2 (1<<4) ++#define IT_SYMLINK (1<<5) ++#define IT_UNLINK (1<<6) ++#define IT_RMDIR (1<<7) ++#define IT_RENAME (1<<8) ++#define IT_RENAME2 (1<<9) ++#define IT_READDIR (1<<10) ++#define IT_GETATTR (1<<11) ++#define IT_SETATTR (1<<12) ++#define IT_READLINK (1<<13) ++#define IT_MKNOD (1<<14) ++#define IT_LOOKUP (1<<15) ++ ++ + /* + * linux/include/linux/dcache.h + * +@@ -30,6 +48,8 @@ + unsigned int hash; + }; + ++#include ++ + struct dentry_stat_t { + int nr_dentry; + int nr_unused; +@@ -79,6 +99,7 @@ + struct list_head d_subdirs; /* our children */ + struct list_head d_alias; /* inode alias list */ + int d_mounted; ++ struct lookup_intent *d_it; + struct qstr d_name; + unsigned long d_time; /* used by d_revalidate */ + struct dentry_operations *d_op; +@@ -96,6 +117,8 @@ + int (*d_delete)(struct dentry *); + void (*d_release)(struct dentry *); + void (*d_iput)(struct dentry *, struct inode *); ++ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); ++ void (*d_intent_release)(struct dentry *, struct lookup_intent *); + }; + + /* the dentry parameter passed to d_hash and d_compare is the parent +diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h +--- a/include/linux/ext3_fs.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/ext3_fs.h Sat Jan 4 18:24:12 2003 +@@ -64,8 +64,6 @@ + */ + #define EXT3_BAD_INO 1 /* Bad blocks inode */ + #define EXT3_ROOT_INO 2 /* Root inode */ +-#define EXT3_ACL_IDX_INO 3 /* ACL inode */ +-#define EXT3_ACL_DATA_INO 4 /* ACL inode */ + #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ + #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ + #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ +@@ -95,7 +93,6 @@ + #else + # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) + #endif +-#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) + #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) + #ifdef __KERNEL__ + # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +@@ -130,28 +127,6 @@ + #endif + + /* +- * ACL structures +- */ +-struct ext3_acl_header /* Header of Access Control Lists */ +-{ +- __u32 aclh_size; +- __u32 aclh_file_count; +- __u32 aclh_acle_count; +- __u32 aclh_first_acle; +-}; +- +-struct ext3_acl_entry /* Access Control List Entry */ +-{ +- __u32 acle_size; +- __u16 acle_perms; /* Access permissions */ +- __u16 acle_type; /* Type of entry */ +- __u16 acle_tag; /* User or group identity */ +- __u16 acle_pad1; +- __u32 acle_next; /* Pointer on next entry for the */ +- /* same inode or on next free entry */ +-}; +- +-/* + * Structure of a blocks group descriptor + */ + struct ext3_group_desc +@@ -347,6 +322,7 @@ + #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ + #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ ++#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H +@@ -529,7 +505,7 @@ + #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + +-#define EXT3_FEATURE_COMPAT_SUPP 0 ++#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ +@@ -713,6 +689,7 @@ + + + /* inode.c */ ++extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); + +@@ -781,8 +758,10 @@ + + /* namei.c */ + extern struct inode_operations ext3_dir_inode_operations; ++extern struct inode_operations ext3_special_inode_operations; + + /* symlink.c */ ++extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + + +diff -Nru a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h +--- a/include/linux/ext3_jbd.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/ext3_jbd.h Sat Jan 4 18:24:12 2003 +@@ -30,13 +30,19 @@ + + #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 + ++/* Extended attributes may touch two data buffers, two bitmap buffers, ++ * and two group and summaries. */ ++ ++#define EXT3_XATTR_TRANS_BLOCKS 8 ++ + /* Define the minimum size for a transaction which modifies data. This + * needs to take into account the fact that we may end up modifying two + * quota files too (one for the group, one for the user quota). The + * superblock only gets updated once, of course, so don't bother + * counting that again for the quota updates. */ + +-#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) ++#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ ++ EXT3_XATTR_TRANS_BLOCKS - 2) + + extern int ext3_writepage_trans_blocks(struct inode *inode); + +diff -Nru a/include/linux/fs.h b/include/linux/fs.h +--- a/include/linux/fs.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/fs.h Sat Jan 4 18:24:12 2003 +@@ -700,7 +700,7 @@ + extern int vfs_link(struct dentry *, struct inode *, struct dentry *); + extern int vfs_rmdir(struct inode *, struct dentry *); + extern int vfs_unlink(struct inode *, struct dentry *); +-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); ++extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct lookup_intent *it); + + /* + * File types +@@ -769,6 +769,8 @@ + struct inode_operations { + int (*create) (struct inode *,struct dentry *,int); + struct dentry * (*lookup) (struct inode *,struct dentry *); ++ struct dentry * (*lookup2) (struct inode *,struct dentry *, ++ struct lookup_intent *); + int (*link) (struct dentry *,struct inode *,struct dentry *); + int (*unlink) (struct inode *,struct dentry *); + int (*symlink) (struct inode *,struct dentry *,const char *); +@@ -779,6 +781,7 @@ + struct inode *, struct dentry *); + int (*readlink) (struct dentry *, char *,int); + int (*follow_link) (struct dentry *, struct nameidata *); ++ int (*follow_link2) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int); + int (*setattr) (struct dentry *, struct iattr *); +@@ -995,6 +998,7 @@ + extern int unregister_filesystem(struct file_system_type *); + extern struct vfsmount *kern_mount(struct file_system_type *); + extern int may_umount(struct vfsmount *); ++struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); + extern long do_mount(char *, char *, char *, unsigned long, void *); + + #define kern_umount mntput +diff -Nru a/include/linux/lustre_version.h b/include/linux/lustre_version.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/include/linux/lustre_version.h Sat Jan 4 18:24:12 2003 +@@ -0,0 +1 @@ ++#define LUSTRE_KERNEL_VERSION 4 +diff -Nru a/include/linux/mbcache.h b/include/linux/mbcache.h +--- /dev/null Wed Dec 31 16:00:00 1969 ++++ b/include/linux/mbcache.h Sat Jan 4 18:24:12 2003 +@@ -0,0 +1,72 @@ ++/* ++ File: linux/mbcache.h ++ ++ (C) 2001 by Andreas Gruenbacher, ++*/ ++ ++/* Hardwire the number of additional indexes */ ++#define MB_CACHE_INDEXES_COUNT 1 ++ ++struct mb_cache_entry; ++ ++struct mb_cache_op { ++ void (*free)(struct mb_cache_entry *); ++}; ++ ++struct mb_cache { ++ struct list_head c_cache_list; ++ const char *c_name; ++ struct mb_cache_op c_op; ++ atomic_t c_entry_count; ++ int c_bucket_bits; ++#ifndef MB_CACHE_INDEXES_COUNT ++ int c_indexes_count; ++#endif ++ kmem_cache_t *c_entry_cache; ++ struct list_head *c_block_hash; ++ struct list_head *c_indexes_hash[0]; ++}; ++ ++struct mb_cache_entry_index { ++ struct list_head o_list; ++ unsigned int o_key; ++}; ++ ++struct mb_cache_entry { ++ struct list_head e_lru_list; ++ struct mb_cache *e_cache; ++ atomic_t e_used; ++ struct block_device *e_bdev; ++ sector_t e_block; ++ struct list_head e_block_list; ++ struct mb_cache_entry_index e_indexes[0]; ++}; ++ ++/* Functions on caches */ ++ ++struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, ++ int, int); ++void mb_cache_shrink(struct mb_cache *, struct block_device *); ++void mb_cache_destroy(struct mb_cache *); ++ ++/* Functions on cache entries */ ++ ++struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); ++int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *, ++ sector_t, unsigned int[]); ++void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); ++void mb_cache_entry_release(struct mb_cache_entry *); ++void mb_cache_entry_takeout(struct mb_cache_entry *); ++void mb_cache_entry_free(struct mb_cache_entry *); ++struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); ++struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, ++ struct block_device *, ++ sector_t); ++#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) ++struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, ++ struct block_device *, ++ unsigned int); ++struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, ++ struct block_device *, ++ unsigned int); ++#endif +diff -Nru a/include/linux/namei.h b/include/linux/namei.h +--- a/include/linux/namei.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/namei.h Sat Jan 4 18:24:12 2003 +@@ -5,6 +5,17 @@ + + struct vfsmount; + ++struct lookup_intent { ++ int it_op; ++ int it_mode; ++ int it_disposition; ++ int it_status; ++ struct iattr *it_iattr; ++ __u64 it_lock_handle[2]; ++ int it_lock_mode; ++ void *it_data; ++}; ++ + struct nameidata { + struct dentry *dentry; + struct vfsmount *mnt; +@@ -13,6 +24,7 @@ + int last_type; + struct dentry *old_dentry; + struct vfsmount *old_mnt; ++ struct lookup_intent it; + }; + + /* +@@ -46,7 +58,7 @@ + extern void path_release(struct nameidata *); + + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); +-extern struct dentry * lookup_hash(struct qstr *, struct dentry *); ++extern struct dentry * lookup_hash(struct qstr *, struct dentry *, struct lookup_intent *); + + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); +diff -Nru a/include/linux/slab.h b/include/linux/slab.h +--- a/include/linux/slab.h Sat Jan 4 18:24:12 2003 ++++ b/include/linux/slab.h Sat Jan 4 18:24:12 2003 +@@ -56,6 +56,7 @@ + extern int kmem_cache_shrink(kmem_cache_t *); + extern void *kmem_cache_alloc(kmem_cache_t *, int); + extern void kmem_cache_free(kmem_cache_t *, void *); ++extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); + extern unsigned int kmem_cache_size(kmem_cache_t *); + + extern void *kmalloc(size_t, int); +diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c +--- a/kernel/ksyms.c Sat Jan 4 18:24:12 2003 ++++ b/kernel/ksyms.c Sat Jan 4 18:24:12 2003 +@@ -365,6 +365,13 @@ + EXPORT_SYMBOL(tty_get_baud_rate); + EXPORT_SYMBOL(do_SAK); + ++/* lustre */ ++EXPORT_SYMBOL(panic_notifier_list); ++//EXPORT_SYMBOL(pagecache_lock_cacheline); ++EXPORT_SYMBOL(do_kern_mount); ++EXPORT_SYMBOL(exit_files); ++EXPORT_SYMBOL(kmem_cache_validate); ++ + /* filesystem registration */ + EXPORT_SYMBOL(register_filesystem); + EXPORT_SYMBOL(unregister_filesystem); +@@ -529,6 +536,7 @@ + EXPORT_SYMBOL(seq_lseek); + EXPORT_SYMBOL(single_open); + EXPORT_SYMBOL(single_release); ++EXPORT_SYMBOL(reparent_to_init); + + /* Program loader interfaces */ + EXPORT_SYMBOL(setup_arg_pages); +diff -Nru a/mm/slab.c b/mm/slab.c +--- a/mm/slab.c Sat Jan 4 18:24:12 2003 ++++ b/mm/slab.c Sat Jan 4 18:24:12 2003 +@@ -1236,6 +1236,59 @@ + * Called with the cache-lock held. + */ + ++extern struct page *check_get_page(unsigned long kaddr); ++struct page *page_mem_map(struct page *page); ++static int kmem_check_cache_obj (kmem_cache_t * cachep, ++ slab_t *slabp, void * objp) ++{ ++ int i; ++ unsigned int objnr; ++ ++#if DEBUG ++ if (cachep->flags & SLAB_RED_ZONE) { ++ objp -= BYTES_PER_WORD; ++ if ( *(unsigned long *)objp != RED_MAGIC2) ++ /* Either write before start, or a double free. */ ++ return 0; ++ if (*(unsigned long *)(objp+cachep->objsize - ++ BYTES_PER_WORD) != RED_MAGIC2) ++ /* Either write past end, or a double free. */ ++ return 0; ++ } ++#endif ++ ++ objnr = (objp-slabp->s_mem)/cachep->objsize; ++ if (objnr >= cachep->num) ++ return 0; ++ if (objp != slabp->s_mem + objnr*cachep->objsize) ++ return 0; ++ ++ /* Check slab's freelist to see if this obj is there. */ ++ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { ++ if (i == objnr) ++ return 0; ++ } ++ return 1; ++} ++ ++ ++int kmem_cache_validate(kmem_cache_t *cachep, void *objp) ++{ ++ struct page *page = check_get_page((unsigned long)objp); ++ ++ if (!page_mem_map(page)) ++ return 0; ++ ++ if (!PageSlab(page)) ++ return 0; ++ ++ /* XXX check for freed slab objects ? */ ++ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) ++ return 0; ++ ++ return (cachep == GET_PAGE_CACHE(page)); ++} ++ + #if DEBUG + static int kmem_extra_free_checks (kmem_cache_t * cachep, + slab_t *slabp, void * objp) +diff -Nru a/net/unix/af_unix.c b/net/unix/af_unix.c +--- a/net/unix/af_unix.c Sat Jan 4 18:24:12 2003 ++++ b/net/unix/af_unix.c Sat Jan 4 18:24:12 2003 +@@ -715,7 +715,7 @@ + /* + * Do the final lookup. + */ +- dentry = lookup_hash(&nd.last, nd.dentry); ++ dentry = lookup_hash(&nd.last, nd.dentry, NULL); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_mknod_unlock; diff --git a/lustre/kernel_patches/pc/lin-2.5.44.pc b/lustre/kernel_patches/pc/lin-2.5.44.pc new file mode 100644 index 0000000..ca773d5 --- /dev/null +++ b/lustre/kernel_patches/pc/lin-2.5.44.pc @@ -0,0 +1,33 @@ +arch/um/kernel/mem.c +fs/Config.help +fs/Config.in +fs/dcache.c +fs/driverfs/inode.c +fs/ext3/file.c +fs/ext3/ialloc.c +fs/ext3/inode.c +fs/ext3/Makefile +fs/ext3/namei.c +fs/ext3/super.c +fs/ext3/symlink.c +fs/ext3/xattr.c +fs/ext3/xattr.h +fs/ext3/xattr_user.c +fs/Makefile +fs/mbcache.c +fs/namei.c +fs/nfsd/vfs.c +fs/open.c +fs/stat.c +fs/sysfs/inode.c +include/linux/dcache.h +include/linux/ext3_fs.h +include/linux/ext3_jbd.h +include/linux/fs.h +include/linux/lustre_version.h +include/linux/mbcache.h +include/linux/namei.h +include/linux/slab.h +kernel/ksyms.c +mm/slab.c +net/unix/af_unix.c diff --git a/lustre/kernel_patches/series/lin-2.5.44 b/lustre/kernel_patches/series/lin-2.5.44 new file mode 100644 index 0000000..1bc028b --- /dev/null +++ b/lustre/kernel_patches/series/lin-2.5.44 @@ -0,0 +1 @@ +lin-2.5.44.patch diff --git a/lustre/kernel_patches/txt/lin-2.5.44.txt b/lustre/kernel_patches/txt/lin-2.5.44.txt new file mode 100644 index 0000000..694303d --- /dev/null +++ b/lustre/kernel_patches/txt/lin-2.5.44.txt @@ -0,0 +1,4 @@ +DESC +patches for making kernel 2.5.44 ready for mounting Lustre, and some basic +sys_call. +EDESC