Whamcloud - gitweb
b=8007
authornathan <nathan>
Fri, 17 Mar 2006 22:30:31 +0000 (22:30 +0000)
committernathan <nathan>
Fri, 17 Mar 2006 22:30:31 +0000 (22:30 +0000)
update to b_rel_146

207 files changed:
ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch
ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
ldiskfs/kernel_patches/patches/iopen-2.6-rhel4.patch
ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch
ldiskfs/kernel_patches/patches/iopen-2.6.12.patch
ldiskfs/ldiskfs/autoMakefile.am
lustre/ChangeLog
lustre/autoconf/lustre-version.ac
lustre/include/linux/.cvsignore
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_import.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_user.h
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-x86_64.config
lustre/kernel_patches/kernel_configs/kernel-2.4.21-suse-2.4.21-2-x86_64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-ia64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-ia64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-x86_64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-i686.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-x86_64.config
lustre/kernel_patches/kernel_configs/uml-vanilla-2.4.24.config
lustre/kernel_patches/patches/2.4.19-ext3.patch [deleted file]
lustre/kernel_patches/patches/2.4.19-jbd.patch [deleted file]
lustre/kernel_patches/patches/add_page_private-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/export-log-2.6-rhel4.patch [new file with mode: 0644]
lustre/kernel_patches/patches/export-show_task-2.4-bgl.patch [deleted file]
lustre/kernel_patches/patches/export-truncate-bgl.patch [deleted file]
lustre/kernel_patches/patches/export_symbols-2.6-rhel4.patch
lustre/kernel_patches/patches/export_symbols-2.6-suse.patch
lustre/kernel_patches/patches/exports_2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/ext-2.4-patch-1-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/ext3-delete_thread-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch [deleted file]
lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch
lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch
lustre/kernel_patches/patches/ext3-extents-2.4.24.patch
lustre/kernel_patches/patches/ext3-extents-2.4.29.patch
lustre/kernel_patches/patches/ext3-extents-2.6.12.patch
lustre/kernel_patches/patches/ext3-extents-2.6.5.patch
lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch
lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch
lustre/kernel_patches/patches/ext3-htree-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/ext3-include-fixes-2.6-rhel4.patch
lustre/kernel_patches/patches/ext3-include-fixes-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/kernel_patches/patches/ext3-nlinks-2.6.12.patch [deleted file]
lustre/kernel_patches/patches/ext3-no-write-super.patch [deleted file]
lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/ext3-unmount_sync.patch [deleted file]
lustre/kernel_patches/patches/ext3-use-after-free-2.4.19-pre1.patch [deleted file]
lustre/kernel_patches/patches/ext3-use-after-free-suse.patch [deleted file]
lustre/kernel_patches/patches/extN-wantedi-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/invalidate_show-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/iopen-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/iopen-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/iopen-2.4.20.patch
lustre/kernel_patches/patches/iopen-2.4.21-chaos.patch
lustre/kernel_patches/patches/iopen-2.6-rhel4.patch
lustre/kernel_patches/patches/iopen-2.6-suse.patch
lustre/kernel_patches/patches/iopen-2.6.12.patch
lustre/kernel_patches/patches/jbd-2.4.18-jcberr.patch [deleted file]
lustre/kernel_patches/patches/jbd-2.4.19-pre1-jcberr.patch [deleted file]
lustre/kernel_patches/patches/jbd-flushtime-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch [new file with mode: 0644]
lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch [deleted file]
lustre/kernel_patches/patches/kksymoops-2.4-bgl.patch [deleted file]
lustre/kernel_patches/patches/linux-2.4.18-netdump.patch [deleted file]
lustre/kernel_patches/patches/linux-2.4.19-bgl-xattr-0.8.54.patch [deleted file]
lustre/kernel_patches/patches/linux-2.4.19-suse-xattr-0.8.54-hp.patch [deleted file]
lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch [deleted file]
lustre/kernel_patches/patches/listman-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/mcore-2.4.20-8.patch [deleted file]
lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch [deleted file]
lustre/kernel_patches/patches/nfs-cifs-intent-2.6-rhel4.patch
lustre/kernel_patches/patches/nfs_export_kernel-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/qsnet-rhel-2.4.patch
lustre/kernel_patches/patches/removepage-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/resched-2.4.19-pre1.patch [deleted file]
lustre/kernel_patches/patches/socket-exports-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/tcp-zero-copy-2.4.19-pre1.patch [deleted file]
lustre/kernel_patches/patches/vfs_intent-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch [deleted file]
lustre/kernel_patches/patches/vfs_intent-2.6-rhel4.patch
lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch
lustre/kernel_patches/patches/vfs_intent-2.6.12.patch
lustre/kernel_patches/patches/vfs_nointent-2.6-rhel4.patch
lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch
lustre/kernel_patches/patches/vfs_nointent-2.6.12.patch
lustre/kernel_patches/patches/vfs_races-2.6.12.patch
lustre/kernel_patches/patches/vm-tunables-rhel4.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vmalloc_to_page-2.4.19-bgl.patch [deleted file]
lustre/kernel_patches/series/2.6-rhel4.series
lustre/kernel_patches/series/bgl-2.4.19 [deleted file]
lustre/kernel_patches/series/suse-2.4.21-cray
lustre/kernel_patches/series/suse-2.4.21-jvn
lustre/kernel_patches/targets/2.6-rhel4.target.in
lustre/kernel_patches/targets/2.6-suse.target.in
lustre/kernel_patches/which_patch
lustre/ldiskfs/autoMakefile.am
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/liblustre/llite_lib.c
lustre/liblustre/llite_lib.h
lustre/liblustre/lutil.c
lustre/liblustre/rw.c
lustre/liblustre/super.c
lustre/liblustre/tests/sanity.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/llite_mmap.c
lustre/llite/rw.c
lustre/lov/lov_internal.h
lustre/lov/lov_merge.c
lustre/lov/lov_obd.c
lustre/lov/lov_qos.c
lustre/lov/lov_request.c
lustre/lvfs/fsfilt_ext3.c
lustre/lvfs/fsfilt_reiserfs.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_join.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/mds_unlink_open.c
lustre/mgc/mgc_request.c
lustre/obdclass/debug.c
lustre/obdclass/genops.c
lustre/obdclass/llog.c
lustre/obdclass/llog_test.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lustre_peer.c
lustre/obdclass/prng.c
lustre/obdecho/echo_client.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_lvb.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/import.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/recover.c
lustre/quota/quota_ctl.c
lustre/quota/quota_master.c
lustre/scripts/lustre
lustre/tests/Makefile.am
lustre/tests/cfg/insanity-local.sh
lustre/tests/conf-sanity.sh
lustre/tests/fsx.c
lustre/tests/llmountcleanup.sh
lustre/tests/llog-test.sh
lustre/tests/random-reads.c
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh
lustre/tests/replay-single.sh
lustre/tests/run-llog.sh
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/test-framework.sh
lustre/utils/lconf
lustre/utils/lctl.c
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/llmount.c
lustre/utils/llog_reader.c
lustre/utils/lmc
lustre/utils/lustre_cfg.c

index 507b044..3f5687b 100644 (file)
@@ -27,7 +27,7 @@ Index: linux-stage/fs/ext3/inode.c
                                struct ext3_iloc *iloc, int in_mem)
  {
        unsigned long block;
-@@ -2484,6 +2484,11 @@
+@@ -2484,6 +2484,11 @@ void ext3_read_inode(struct inode * inod
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
  
@@ -39,7 +39,7 @@ Index: linux-stage/fs/ext3/inode.c
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
-@@ -2619,6 +2624,9 @@
+@@ -2619,6 +2624,9 @@ static int ext3_do_update_inode(handle_t
        } else for (block = 0; block < EXT3_N_BLOCKS; block++)
                raw_inode->i_block[block] = ei->i_data[block];
  
@@ -49,7 +49,7 @@ Index: linux-stage/fs/ext3/inode.c
        BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
        rc = ext3_journal_dirty_metadata(handle, bh);
        if (!err)
-@@ -2849,7 +2857,8 @@
+@@ -2849,7 +2857,8 @@ ext3_reserve_inode_write(handle_t *handl
  {
        int err = 0;
        if (handle) {
index b6d0c57..657ecf4 100644 (file)
@@ -2482,12 +2482,13 @@ Index: linux-2.6.12-rc6/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/ext3/Makefile     2005-06-14 16:31:09.179354899 +0200
 +++ linux-2.6.12-rc6/fs/ext3/Makefile  2005-06-14 16:31:25.872714069 +0200
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o resize.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o resize.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2512,19 +2513,18 @@ Index: linux-2.6.12-rc6/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -593,7 +596,7 @@
-       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+@@ -593,6 +596,7 @@
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
-+      Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
 @@ -644,6 +647,8 @@
-       {Opt_iopen,  "iopen"},
-       {Opt_noiopen,  "noiopen"},
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
@@ -2593,8 +2593,8 @@ Index: linux-2.6.12-rc6/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -360,6 +364,8 @@
  #define EXT3_MOUNT_NOBH                       0x40000 /* No bufferheads */
- #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
 +#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
 +#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
index f69e16c..0ee8d28 100644 (file)
@@ -2471,12 +2471,13 @@ Index: linux-2.6.5-sles9/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2005-02-23 01:01:46.501172896 +0300
 +++ linux-2.6.5-sles9/fs/ext3/Makefile 2005-02-23 01:02:37.405434272 +0300
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o
-+         ioctl.o namei.o super.o symlink.o hash.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2501,12 +2502,11 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -537,7 +540,7 @@
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_err,
-+      Opt_err, Opt_extents, Opt_extdebug
+@@ -537,6 +540,7 @@
+       Opt_ignore, Opt_barrier,
+       Opt_err,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
@@ -2516,9 +2516,9 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
        {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
 @@ -797,6 +802,12 @@
                        break;
                case Opt_ignore:
@@ -2583,10 +2583,10 @@ Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -333,6 +337,8 @@
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
index 3b873c2..56fe653 100644 (file)
@@ -2466,12 +2466,13 @@ Index: linux-stage/fs/ext3/Makefile
 ===================================================================
 --- linux-stage.orig/fs/ext3/Makefile  2005-02-25 14:49:42.168561008 +0200
 +++ linux-stage/fs/ext3/Makefile       2005-02-25 15:39:28.384587168 +0200
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o resize.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o resize.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2496,19 +2497,18 @@ Index: linux-stage/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -589,7 +594,7 @@
-       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+@@ -589,6 +594,7 @@
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
-+      Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
 @@ -639,6 +644,8 @@
-       {Opt_iopen,  "iopen"},
-       {Opt_noiopen,  "noiopen"},
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
@@ -2578,10 +2578,10 @@ Index: linux-stage/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -359,6 +363,8 @@
  #define EXT3_MOUNT_RESERVATION                0x20000 /* Preallocation */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 49528cf..52e5521 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-stage/include/linux/ext3_fs.h
 --- linux-stage.orig/include/linux/ext3_fs.h   2005-02-25 14:53:56.424908168 +0200
 +++ linux-stage/include/linux/ext3_fs.h        2005-02-25 14:53:59.376459464 +0200
 @@ -361,12 +361,13 @@
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 -#ifndef _LINUX_EXT2_FS_H
index acf97dd..1ac944b 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-stage/include/linux/ext3_fs.h
 --- linux-stage.orig/include/linux/ext3_fs.h   2004-04-02 16:43:37.000000000 -0500
 +++ linux-stage/include/linux/ext3_fs.h        2004-04-02 16:43:37.000000000 -0500
 @@ -331,12 +331,13 @@
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 -#ifndef _LINUX_EXT2_FS_H
index 0594199..bb9928a 100644 (file)
@@ -18,9 +18,9 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -339,6 +347,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
@@ -126,11 +126,11 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
 @@ -543,7 +544,7 @@ enum {
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_err, Opt_extents, Opt_extdebug
-+      Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc
+       Opt_ignore, Opt_barrier,
+       Opt_err,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
@@ -139,9 +139,9 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
 @@ -811,6 +813,9 @@ static int parse_options (char * options
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
@@ -334,7 +334,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c   2005-12-09 13:08:53.191437750 +0300
 +++ linux-2.6.5-7.201/fs/ext3/mballoc.c        2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2435 @@
+@@ -0,0 +1,2430 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -899,10 +899,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1664,8 +1666,6 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1944,10 +1944,6 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2769,18 +2765,16 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.5-7.201/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.5-7.201.orig/fs/ext3/Makefile    2005-12-17 02:53:30.000000000 +0300
 +++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
--         ioctl.o namei.o super.o symlink.o hash.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o extents.o \
-+         mballoc.o
+          ioctl.o namei.o super.o symlink.o hash.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
index 2e6a6f4..a2b9caf 100644 (file)
@@ -18,7 +18,7 @@ Index: linux-2.6.12.6/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -366,6 +374,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
@@ -122,16 +122,17 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,7 +598,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
-+      Opt_mballoc,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
 @@ -649,6 +651,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
@@ -328,7 +329,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.12.6.orig/fs/ext3/mballoc.c      2005-12-09 13:08:53.191437750 +0300
 +++ linux-2.6.12.6/fs/ext3/mballoc.c   2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2429 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -893,10 +894,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1658,8 +1661,6 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1938,10 +1939,6 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2762,18 +2759,16 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.12.6/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.12.6.orig/fs/ext3/Makefile       2005-12-17 02:17:16.000000000 +0300
 +++ linux-2.6.12.6/fs/ext3/Makefile    2005-12-17 02:21:21.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
--         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+         mballoc.o
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
index 8fdd6cf..d12c678 100644 (file)
@@ -18,9 +18,9 @@ Index: linux-2.6.9-full/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
@@ -123,16 +123,17 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -596,6 +597,7 @@ enum {
+@@ -596,7 +597,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
-+      Opt_mballoc,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
 @@ -647,6 +649,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
@@ -347,7 +348,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2005-12-16 17:46:19.148560250 +0300
 +++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2429 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -912,10 +913,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1677,8 +1680,6 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1957,10 +1958,6 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2781,18 +2778,16 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.9-full/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/Makefile     2005-12-16 23:16:41.000000000 +0300
 +++ linux-2.6.9-full/fs/ext3/Makefile  2005-12-16 23:16:42.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
--         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+         mballoc.o
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
index 53a293f..98dbca4 100644 (file)
@@ -7,7 +7,7 @@ Index: linux-stage/fs/ext3/Makefile
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
 -ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
++ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
@@ -124,7 +124,7 @@ Index: linux-stage/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -188,7 +188,7 @@ Index: linux-stage/fs/ext3/iopen.c
 +      assert(dentry->d_inode == NULL);
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
 +      if (rehash)
-+              assert(d_unhashed(dentry));     /* d_rehash */
++              assert(d_unhashed(dentry));             /* d_rehash */
 +      assert(list_empty(&dentry->d_subdirs));
 +
 +      spin_lock(&dcache_lock);
@@ -411,7 +411,7 @@ Index: linux-stage/fs/ext3/namei.c
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
@@ -420,20 +420,20 @@ Index: linux-stage/fs/ext3/super.c
 --- linux-stage.orig/fs/ext3/super.c   2005-02-25 14:37:30.987717392 +0200
 +++ linux-stage/fs/ext3/super.c        2005-02-25 14:44:50.495901992 +0200
 @@ -586,6 +586,7 @@
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
+ static match_table_t tokens = {
 @@ -633,6 +634,9 @@
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
@@ -464,8 +464,8 @@ Index: linux-stage/include/linux/ext3_fs.h
  #define EXT3_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */
  #define EXT3_MOUNT_BARRIER            0x10000 /* Use block barriers */
  #define EXT3_MOUNT_RESERVATION                0x20000 /* Preallocation */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index bd133cb..1c5e900 100644 (file)
@@ -1,15 +1,7 @@
- fs/ext3/inode.c                    |    3 
- fs/ext3/iopen.c                    |  239 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   15 ++
- fs/ext3/namei.c                    |   13 ++
- fs/ext3/super.c                    |   17 ++
- include/linux/ext3_fs.h            |    2 
- 7 files changed, 304 insertions(+), 1 deletion(-)
-
-Index: linux-2.6.5-sles9/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-04-04 07:36:18.000000000 +0400
-+++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300
+--- linux-stage.orig/fs/ext3/Makefile  2005-02-25 14:31:53.151076368 +0200
++++ linux-stage/fs/ext3/Makefile       2005-02-25 14:41:51.259150120 +0200
 @@ -4,7 +4,7 @@
  
  obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -19,10 +11,10 @@ Index: linux-2.6.5-sles9/fs/ext3/Makefile
           ioctl.o namei.o super.o symlink.o hash.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
-Index: linux-2.6.5-sles9/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:15:44.739673656 +0300
-+++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:18:27.608913768 +0300
+--- linux-stage.orig/fs/ext3/inode.c   2005-02-25 14:37:30.983718000 +0200
++++ linux-stage/fs/ext3/inode.c        2005-02-25 14:47:42.069818792 +0200
 @@ -37,6 +37,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
@@ -31,7 +23,7 @@ Index: linux-2.6.5-sles9/fs/ext3/inode.c
  #include "acl.h"
  
  /*
-@@ -2402,6 +2403,9 @@
+@@ -2408,6 +2409,9 @@
  #endif
        ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
  
@@ -41,7 +33,7 @@ Index: linux-2.6.5-sles9/fs/ext3/inode.c
        if (ext3_get_inode_loc(inode, &iloc, 0))
                goto bad_inode;
        bh = iloc.bh;
-Index: linux-2.6.5-sles9/fs/ext3/iopen.c
+Index: linux-stage/fs/ext3/iopen.c
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/iopen.c     2003-01-30 13:24:37.000000000 +0300
 +++ linux-2.6.5-sles9/fs/ext3/iopen.c  2004-11-09 02:18:27.611913312 +0300
@@ -133,7 +125,7 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -324,10 +316,10 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.c
 +
 +      return 1;
 +}
-Index: linux-2.6.5-sles9/fs/ext3/iopen.h
+Index: linux-stage/fs/ext3/iopen.h
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/iopen.h     2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.6.5-sles9/fs/ext3/iopen.h  2004-11-09 02:18:27.613913008 +0300
+--- linux-stage.orig/fs/ext3/iopen.h   2005-02-25 14:41:01.017787968 +0200
++++ linux-stage/fs/ext3/iopen.h        2005-02-25 14:41:01.045783712 +0200
 @@ -0,0 +1,15 @@
 +/*
 + * iopen.h
@@ -344,10 +336,10 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.h
 +extern int ext3_iopen_get_inode(struct inode *inode);
 +extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
 +                                         struct inode *inode, int rehash);
-Index: linux-2.6.5-sles9/fs/ext3/namei.c
+Index: linux-stage/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/namei.c     2004-11-09 02:15:44.614692656 +0300
-+++ linux-2.6.5-sles9/fs/ext3/namei.c  2004-11-09 02:18:27.616912552 +0300
+--- linux-stage.orig/fs/ext3/namei.c   2005-02-25 14:37:28.975023368 +0200
++++ linux-stage/fs/ext3/namei.c        2005-02-25 14:46:43.090784968 +0200
 @@ -37,6 +37,7 @@
  #include <linux/buffer_head.h>
  #include <linux/smp_lock.h>
@@ -356,7 +348,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  #include "acl.h"
  
  /*
-@@ -979,6 +980,9 @@
+@@ -980,6 +981,9 @@
        if (dentry->d_name.len > EXT3_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
  
@@ -366,7 +358,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
        bh = ext3_find_entry(dentry, &de);
        inode = NULL;
        if (bh) {
-@@ -989,10 +993,8 @@
+@@ -990,10 +994,8 @@
                if (!inode)
                        return ERR_PTR(-EACCES);
        }
@@ -379,7 +371,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  }
  
  
-@@ -2029,10 +2031,6 @@
+@@ -2037,10 +2039,6 @@
                              inode->i_nlink);
        inode->i_version++;
        inode->i_nlink = 0;
@@ -390,7 +382,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
        ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        ext3_mark_inode_dirty(handle, inode);
-@@ -2152,6 +2150,23 @@
+@@ -2163,6 +2161,23 @@
        return err;
  }
  
@@ -414,40 +406,39 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  static int ext3_link (struct dentry * old_dentry,
                struct inode * dir, struct dentry *dentry)
  {
-@@ -2175,7 +2190,8 @@
+@@ -2186,7 +2201,8 @@
        ext3_inc_count(handle, inode);
        atomic_inc(&inode->i_count);
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
-Index: linux-2.6.5-sles9/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:15:44.743673048 +0300
-+++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:18:27.620911944 +0300
-@@ -534,7 +534,7 @@
-       Opt_reservation, Opt_noreservation, Opt_noload,
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
+--- linux-stage.orig/fs/ext3/super.c   2005-02-25 14:37:30.987717392 +0200
++++ linux-stage/fs/ext3/super.c        2005-02-25 14:44:50.495901992 +0200
+@@ -586,6 +586,7 @@
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
--      Opt_ignore, Opt_barrier,
-+      Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_ignore, Opt_barrier,
        Opt_err,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
-@@ -577,6 +577,9 @@
+ static match_table_t tokens = {
+@@ -633,6 +634,9 @@
+       {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-       {Opt_barrier, "barrier=%u"},
 +      {Opt_iopen, "iopen"},
 +      {Opt_noiopen, "noiopen"},
 +      {Opt_iopen_nopriv, "iopen_nopriv"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -778,6 +781,18 @@
+@@ -914,6 +918,18 @@
                        else
                                clear_opt(sbi->s_mount_opt, BARRIER);
                        break;
@@ -466,16 +457,16 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
                case Opt_ignore:
                        break;
                default:
-Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:15:44.616692352 +0300
-+++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:18:27.622911640 +0300
-@@ -329,6 +329,8 @@
+--- linux-stage.orig/include/linux/ext3_fs.h   2005-02-25 14:37:28.977023064 +0200
++++ linux-stage/include/linux/ext3_fs.h        2005-02-25 14:49:00.569884968 +0200
+@@ -355,6 +355,8 @@
  #define EXT3_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */
  #define EXT3_MOUNT_RESERVATION                0x10000 /* Preallocation */
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 5141bbc..8d456ac 100644 (file)
@@ -7,7 +7,7 @@ Index: linux-2.6.12-rc6/fs/ext3/Makefile
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
 -ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
++ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
@@ -124,7 +124,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -150,7 +150,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
 +      dentry->d_inode = inode;
 +
-+      d_rehash_cond(dentry, 0);                               /* d_rehash */
++      d_rehash_cond(dentry, 0);                       /* d_rehash */
 +      spin_unlock(&dcache_lock);
 +
 +      return NULL;
@@ -188,7 +188,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      assert(dentry->d_inode == NULL);
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
 +      if (rehash)
-+              assert(d_unhashed(dentry));     /* d_rehash */
++              assert(d_unhashed(dentry));             /* d_rehash */
 +      assert(list_empty(&dentry->d_subdirs));
 +
 +      spin_lock(&dcache_lock);
@@ -230,7 +230,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      dentry->d_inode = inode;
 +do_rehash:
 +      if (rehash)
-+              d_rehash_cond(dentry, 0);                       /* d_rehash */
++              d_rehash_cond(dentry, 0);               /* d_rehash */
 +      spin_unlock(&dcache_lock);
 +
 +      return NULL;
@@ -411,7 +411,7 @@ Index: linux-2.6.12-rc6/fs/ext3/namei.c
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
@@ -420,20 +420,20 @@ Index: linux-2.6.12-rc6/fs/ext3/super.c
 --- linux-2.6.12-rc6.orig/fs/ext3/super.c      2005-06-14 16:01:16.287775299 +0200
 +++ linux-2.6.12-rc6/fs/ext3/super.c   2005-06-14 16:14:33.656906156 +0200
 @@ -590,6 +590,7 @@
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
+ static match_table_t tokens = {
 @@ -638,6 +639,9 @@
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
@@ -464,8 +464,8 @@ Index: linux-2.6.12-rc6/include/linux/ext3_fs.h
  #define EXT3_MOUNT_RESERVATION                0x10000 /* Preallocation */
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
  #define EXT3_MOUNT_NOBH                       0x40000 /* No bufferheads */
-+#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 0eff073..7e378c2 100644 (file)
@@ -38,7 +38,8 @@ sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
        cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3
        cp $(linux_headers) linux-stage/include/linux
 if USE_QUILT
-       cd linux-stage && quilt setup -d ../$(patches) ../$(series)
+       ln -s ../$(patches) linux-stage/patches
+       ln -s ../$(series) linux-stage/series
        cd linux-stage && quilt push -a -q
 else
        @echo -n "Applying ext3 patches:"
index 2bf6ae8..05fbc7e 100644 (file)
@@ -1,12 +1,20 @@
-01-31-2006  Cluster File Systems, Inc. <info@clusterfs.com>
+03-16-2006  Cluster File Systems, Inc. <info@clusterfs.com>
+       * version 1.4.6.1
+       * Support for newer kernels: 2.6.9-34.EL (RHEL 4), 2.6.5-7.252 (SLES 9)
+
+
+02-14-2006  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.4.6
        * WIRE PROTOCOL CHANGE.  This version of Lustre networking WILL NOT
         INTEROPERATE with older versions automatically.  Please read the 
         user documentation before upgrading any part of a live system.
        * WARNING: Lustre networking configuration changes are required with
-        this release.  See https://bugzilla.clusterfs.com/show_bug.cgi?id=10052         
+        this release.  See https://bugzilla.clusterfs.com/show_bug.cgi?id=10052
         for details.
        * bug fixes
+       * Support for newer kernels: 2.6.9-22.0.2.EL (RHEL 4),
+         2.6.5-7.244 (SLES 9) - same as 1.4.5.2.
+
 
 Severity   : enhancement
 Bugzilla   : 7981/8208
@@ -91,6 +99,13 @@ Details    : sending a glimpse AST to a liblustre client waits for every AST
             to time out, as liblustre clients will not respond.  Since they
             cannot cache data we refresh the OST lock LVB from disk instead.
 
+Severity   : enhancement
+Bugzilla   : 7198
+Description: doing an ls at the same time as file IO can be slow
+Details    : enqueue and other "small" requests can be blocked behind many
+            large IO requests.  Create a new OST IO portal for non-IO
+            requests so they can be processed faster.
+
 Severity   : minor
 Frequency  : rare (only HPUX clients mounting unsupported re-exported NFS vol)
 Bugzilla   : 5781
@@ -206,7 +221,7 @@ Details    : Having an LWI_INTR() wait event (interruptible, but no timeout)
             request was interrupted, and we also didn't break out of the
             event loop if there was no timeout
 
-Severity   : minor
+Severity   : major
 Frequency  : rare
 Bugzilla   : 5047
 Description: data loss during non-page-aligned writes to a single file from
@@ -261,6 +276,12 @@ Details    : llap_shrink cache now attempts to unmap pages before discarding
             extra checks that trigger if this unmapping is not done first.
 
 Severity   : minor
+Frequency  : rare
+Bugzilla   : 6034
+Description: lconf didn't resolve symlinks before checking to see whether a
+            given mountpoint was already in use
+
+Severity   : minor
 Frequency  : when migrating failover services
 Bugzilla   : 6395, 9514
 Description: When migrating a subset of services from a node (e.g. failback
@@ -374,7 +395,7 @@ Details    : a module parameter allows the number of OST service threads
 
 Severity   : major
 Frequency  : rare
-Bugzilla   : 9635
+Bugzilla   : 6146, 9635, 9895
 Description: servers crash with bad pointer in target_handle_connect()
 Details    : In rare cases when a client is reconnecting it was possible that
             the connection request was the last reference for that export.
@@ -459,6 +480,14 @@ Details    : If an OST is down, in some cases it was possible to create two
             than potential performance impact and spurious error messages.
 
 Severity   : minor
+Frequency  : rare
+Bugzilla   : 5681, 9562
+Description: Client may oops in ll_unhash_aliases
+Details    : Client dcache may become inconsistent in race condition.
+            In some cases "getcwd" can fail if the current directory is
+            modified.
+
+Severity   : minor
 Frequency  : always
 Bugzilla   : 9942
 Description: Inode refcounting problems in NFS export code
@@ -468,7 +497,7 @@ Details    : link_raw functions used to call d_instantiate without obtaining
 Severity   : minor
 Frequency  : rare
 Bugzilla   : 9942, 9903
-Description: Referencing freed requests leading to crash, memleask with NFS.
+Description: Referencing freed requests leading to crash, memleaks with NFS.
 Details    : We used to require that call to ll_revalidate_it was always
             followed by ll_lookup_it. Also with revalidate_special() it is
             possible to call ll_revalidate_it() twice for the same dentry
@@ -487,16 +516,6 @@ Details    : If a client is repeatedly creating and unlinking files it
             client node to run out of memory.  Instead flush old inodes
             from client cache that have the same inode number as a new inode.
 
-Severity   : minor
-Frequency  : echo_client brw_test command
-Bugzilla   : 9919
-Description: fix echo_client to work with OST preallocated code
-Details    : OST preallocation code (5137) didn't take echo_client IO path
-            into account: echo_client calls filter methods outside of any
-            OST thread and, hence, there is no per-thread preallocated
-            pages and buffers to use. Solution: hijack pga pages for IO. As
-            a byproduct, this avoids unnecessary data copying.
-
 Severity   : major
 Frequency  : rare, unless heavy write-truncate concurrency is continuous
 Bugzilla   : 4180, 6984, 7171, 9963, 9331
@@ -521,6 +540,72 @@ Details    : mds_cleanup() and filter_cleanup() need to drop the kernel lock
             the kernel lock is held, not whether it is this process that is
             holding it as 2.6 kernels do.
 
+Severity   : major
+Frequency  : rare
+Bugzilla   : 9635
+Description: MDS or OST may oops/LBUG if a client is connecting multiple times
+Details    : The client ptlrpc code may be trying to reconnect to a down
+            server before a previous connection attempt has timed out.
+            Increase the reconnect interval to be longer than the connection
+            timeout interval to avoid sending duplicate connections to
+            servers.
+
+Severity   : minor
+Frequency  : echo_client brw_test command
+Bugzilla   : 9919
+Description: fix echo_client to work with OST preallocated code
+Details    : OST preallocation code (5137) didn't take echo_client IO path
+            into account: echo_client calls filter methods outside of any
+            OST thread and, hence, there is no per-thread preallocated
+            pages and buffers to use. Solution: hijack pga pages for IO. As
+            a byproduct, this avoids unnecessary data copying.
+
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 3555, 5962, 6025, 6155, 6296, 9574
+Description: Client can oops in mdc_commit_close() after open replay
+Details    : It was possible for the MDS to return an open request with no
+            transaction number in mds_finish_transno() if the client was
+            evicted, but without actually returning an error.  Clients
+            would later try to replay that open and may trip an assertion
+            Simplify the client close codepath, and always return an error
+            from the MDS in case the open is not successful.
+
+Severity   : major
+Frequency  : rare, 2.6 OSTs only
+Bugzilla   : 10076
+Description: OST may deadlock under high load on fragmented files
+Details    : If there was a heavy load and highly-fragmented OST filesystems
+            it was possible to have all the OST threads deadlock waiting on
+            allocation of biovecs, because the biovecs were not released
+            until the entire RPC IO was completed.  Instead, release biovecs
+            as soon as they are complete to ensure forward IO progress.
+
+Severity   : enhancement
+Bugzilla   : 9578
+Description: Support for specifying external journal device at mount
+Details    : If an OST or MDS device is formatted with an external journal
+            device, this device major/minor is stored in the ext3 superblock
+            and may not be valid for failover.  Allow detecting and
+            specifying the external journal at mount time.
+
+Severity   : major
+Frequency  : rare
+Bugzilla   : 10235
+Description: Mounting an MDS with pending unlinked files may cause oops
+Details    : target_finish_recovery() calls mds_postrecov() which returned
+            the number of orphans unlinked. mds_lov_connect->mds_postsetup()
+            considers this an error and immediately begins cleaning up the
+            lov, just after starting the mds_lov process
+
+Severity   : enhancement
+Bugzilla   : 9461
+Description: Implement 'lfs df' to report actual free space on per-OST basis
+Details    : Add sub-command 'df' on 'lfs' to report the disk space usage of
+            MDS/OSDs. Usage: lfs df [-i][-h]. Command Options: '-i' to report
+            usage of objects; '-h' to report in human readable format.
+
+
 ------------------------------------------------------------------------------
 
 08-26-2005  Cluster File Systems, Inc. <info@clusterfs.com>
@@ -640,14 +725,14 @@ Frequency  : rare (only unsupported configurations with a node running as an
 Bugzilla   : 6514, 5137
 Description: Mounting a Lustre file system on a node running as an OST could
             lead to deadlocks
-Details    : OSTs now allocate memory needed to write out data at
-             startup, instead of when needed, to avoid having to
-             allocate memory in possibly low memory situations.
-             Specifically, if the file system is mounted on on OST,
-             memory pressure could force it to try to write out data,
-             which it needed to allocate memory to do.  Due to the low
-             memory, it would be unable to do so and the node would
-             become unresponsive.
+Details    : OSTs now preallocates memory needed to write out data at
+            startup, instead of when needed, to avoid having to
+            allocate memory in possibly low memory situations.
+            Specifically, if the file system is mounted on on OST,
+            memory pressure could force it to try to write out data,
+            which it needed to allocate memory to do.  Due to the low
+            memory, it would be unable to do so and the node would
+            become unresponsive.
 
 Severity   : enhancement
 Bugzilla   : 7015
@@ -687,6 +772,16 @@ Details    : The config llog parsing code may overwrite the error return
              of an error.
 
 Severity   : minor
+Bugzilla   : 6422
+Frequency  : rare
+Description: MDS can fail to allocate large reply buffers
+Details    : After long uptimes the MDS can fail to allocate large reply
+            buffers (e.g. zconf client mount config records) due to memory
+            fragmentation or consumption by the buffer cache.  Preallocate
+            some large reply buffers so that these replies can be sent even
+            under memory pressure.
+
+Severity   : minor
 Bugzilla   : 6266
 Frequency  : rare (liblustre)
 Description: fsx running with liblustre complained that using truncate() to
@@ -781,7 +876,7 @@ Severity:  : enhancement
 Bugzilla   : 3262, 6359
 Description: Attempts to reconnect to servers are now more aggressive.
 Details    : This builds on the enhanced upcall-less recovery that was added
-             in 1.4.2.  When trying to reconnect to servers,  clients will
+             in 1.4.2.  When trying to reconnect to servers, clients will
              now try each server in the failover group every 10 seconds.  By
              default, clients would previously try one server every 25 seconds.
 
index fd9034d..bc74354 100644 (file)
@@ -1,7 +1,7 @@
 m4_define([LUSTRE_MAJOR],[1])
 m4_define([LUSTRE_MINOR],[4])
-m4_define([LUSTRE_PATCH],[5])
-m4_define([LUSTRE_FIX],[95])
+m4_define([LUSTRE_PATCH],[6])
+m4_define([LUSTRE_FIX],[0])
 
 dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after
 dnl # next release candidate/beta would spill this warning already.
@@ -11,8 +11,7 @@ m4_define([LUSTRE_VER_OFFSET_WARN],[288])
 dnl # User editable part ends here. -----------------------------------------
 
 m4_pattern_allow(AC_LUSTRE)
-m4_define([LUSTRE_VERSION],m4_if(LUSTRE_FIX,[0],LUSTRE_MAJOR.LUSTRE_MINOR.LUSTR
-E_PATCH,LUSTRE_MAJOR.LUSTRE_MINOR.LUSTRE_PATCH.LUSTRE_FIX))
+m4_define([LUSTRE_VERSION],m4_if(LUSTRE_FIX,[0],LUSTRE_MAJOR.LUSTRE_MINOR.LUSTRE_PATCH,LUSTRE_MAJOR.LUSTRE_MINOR.LUSTRE_PATCH.LUSTRE_FIX))
 
 [AC_LUSTRE_MAJOR]=LUSTRE_MAJOR
 [AC_LUSTRE_MINOR]=LUSTRE_MINOR
index b731c89..ee57167 100644 (file)
@@ -13,3 +13,4 @@ extN_jbd.h
 extN_xattr.h
 xattr.h
 lustre_build_version.h
+lustre_ver.h
index a853f42..2eb9780 100644 (file)
@@ -314,6 +314,15 @@ static inline int page_mapped(struct page *page)
 }
 #endif /* !HAVE_PAGE_MAPPED */
 
+static inline void file_accessed(struct file *file)
+{
+#ifdef O_NOATIME
+        if (file->f_flags & O_NOATIME)
+                return;
+#endif
+        update_atime(file->f_dentry->d_inode);
+}
+
 #endif /* end of 2.4 compat macros */
 
 #ifdef HAVE_PAGE_LIST
index 34840d3..41109f1 100644 (file)
@@ -61,8 +61,9 @@ struct fsfilt_operations {
         int     (* fs_iocontrol)(struct inode *inode, struct file *file,
                                  unsigned int cmd, unsigned long arg);
         int     (* fs_set_md)(struct inode *inode, void *handle, void *md,
-                              int size);
-        int     (* fs_get_md)(struct inode *inode, void *md, int size);
+                              int size, const char *name);
+        int     (* fs_get_md)(struct inode *inode, void *md, int size,
+                              const char *name);
         /*
          * this method is needed to make IO operation fsfilt nature depend.
          *
@@ -273,15 +274,16 @@ static inline int fsfilt_iocontrol(struct obd_device *obd, struct inode *inode,
 }
 
 static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode,
-                                void *handle, void *md, int size)
+                                void *handle, void *md, int size,
+                                const char *name)
 {
-        return obd->obd_fsops->fs_set_md(inode, handle, md, size);
+        return obd->obd_fsops->fs_set_md(inode, handle, md, size, name);
 }
 
 static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode,
-                                void *md, int size)
+                                void *md, int size, const char *name)
 {
-        return obd->obd_fsops->fs_get_md(inode, md, size);
+        return obd->obd_fsops->fs_get_md(inode, md, size, name);
 }
 
 static inline int fsfilt_send_bio(int rw, struct obd_device *obd,
index c634c79..23732f0 100644 (file)
 #define LUSTRE_OST_VERSION  0x00030000
 #define LUSTRE_DLM_VERSION  0x00040000
 #define LUSTRE_LOG_VERSION  0x00050000
-#define LUSTRE_PBD_VERSION  0x00060000
-#define LUSTRE_MGS_VERSION  0x00070000
+#define LUSTRE_MGS_VERSION  0x00060000
 
 
 struct lustre_handle {
@@ -241,7 +240,7 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
                                 OBD_CONNECT_IBITS | OBD_CONNECT_JOIN)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
-                                OBD_CONNECT_TRUNCLOCK)
+                                OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION)
 
index 68834bf..e460ae3 100644 (file)
@@ -54,7 +54,6 @@ struct obd_import {
         struct lustre_handle      imp_dlm_handle; /* client's ldlm export */
         struct ptlrpc_connection *imp_connection;
         struct ptlrpc_client     *imp_client;
-        struct list_head          imp_observers;
         struct list_head          imp_pinger_chain;
 
         /* Lists of requests that are retained for replay, waiting for a reply,
@@ -89,7 +88,7 @@ struct obd_import {
         unsigned int              imp_invalid:1, imp_replayable:1,
                                   imp_dlm_fake:1, imp_server_timeout:1,
                                   imp_initial_recov:1, imp_initial_recov_bk:1,
-                                  imp_force_verify:1, imp_pingable:1, 
+                                  imp_force_verify:1, imp_pingable:1,
                                   imp_resend_replay:1, imp_deactive:1;
         __u32                     imp_connect_op;
         struct obd_connect_data   imp_connect_data;
index ab2c862..2d74940 100644 (file)
@@ -500,7 +500,7 @@ static inline void obd_ioctl_freedata(char *buf, int len)
  * If it returns FALSE l_wait_event() continues to wait as described above with
  * signals enabled.  Otherwise it returns -ETIMEDOUT.
  *
- * LWI_INTR(intr_handler, callback_data) is shorthand for 
+ * LWI_INTR(intr_handler, callback_data) is shorthand for
  * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data)
  *
  * The second form of usage looks like this:
@@ -520,6 +520,27 @@ static inline void obd_ioctl_freedata(char *buf, int len)
  * This is the same as previous case, but condition is checked once every
  * 'interval' jiffies (if non-zero).
  *
+ * Subtle synchronization point: this macro does *not* necessary takes
+ * wait-queue spin-lock before returning, and, hence, following idiom is safe
+ * ONLY when caller provides some external locking:
+ *
+ *             Thread1                            Thread2
+ *
+ *   l_wait_event(&obj->wq, ....);                                       (1)
+ *
+ *                                    wake_up(&obj->wq):                 (2)
+ *                                         spin_lock(&q->lock);          (2.1)
+ *                                         __wake_up_common(q, ...);     (2.2)
+ *                                         spin_unlock(&q->lock, flags); (2.3)
+ *
+ *   OBD_FREE_PTR(obj);                                                  (3)
+ *
+ * As l_wait_event() may "short-cut" execution and return without taking
+ * wait-queue spin-lock, some additional synchronization is necessary to
+ * guarantee that step (3) can begin only after (2.3) finishes.
+ *
+ * XXX nikita: some ptlrpc daemon threads have races of that sort.
+ *
  */
 
 #define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1))
@@ -580,6 +601,10 @@ static inline sigset_t l_w_e_set_sigs(int sigs)
         return old;
 }
 
+/*
+ * wait for @condition to become true, but no longer than timeout, specified
+ * by @info.
+ */
 #define __l_wait_event(wq, condition, info, ret, excl)                         \
 do {                                                                           \
         wait_queue_t  __wait;                                                  \
@@ -723,6 +748,20 @@ do {                                                                    \
 })
 
 #ifdef __KERNEL__
+/* initialize ost_lvb according to inode */
+static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb)
+{
+        lvb->lvb_size = inode->i_size;
+        lvb->lvb_blocks = inode->i_blocks;
+        lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+        lvb->lvb_atime = LTIME_S(inode->i_atime);
+        lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+}
+#else
+/* defined in liblustre/llite_lib.h */
+#endif
+
+#ifdef __KERNEL__
 #define LIBLUSTRE_CLIENT (0)
 #else
 #define LIBLUSTRE_CLIENT (1)
index 7b50c4b..f96fbb4 100644 (file)
@@ -287,7 +287,17 @@ struct ptlrpc_request {
         spinlock_t rq_lock;
         /* client-side flags */
         unsigned int rq_intr:1, rq_replied:1, rq_err:1,
-                rq_timedout:1, rq_resend:1, rq_restart:1, rq_replay:1,
+                rq_timedout:1, rq_resend:1, rq_restart:1,
+                /*
+                 * when ->rq_replay is set, request is kept by the client even
+                 * after server commits corresponding transaction. This is
+                 * used for operations that require sequence of multiple
+                 * requests to be replayed. The only example currently is file
+                 * open/close. When last request in such a sequence is
+                 * committed, ->rq_replay is cleared on all requests in the
+                 * sequence.
+                 */
+                rq_replay:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
                 rq_no_delay:1, rq_net_err:1;
         enum rq_phase rq_phase; /* one of RQ_PHASE_* */
@@ -520,7 +530,10 @@ struct ptlrpc_service {
         struct list_head  srv_active_replies;   /* all the active replies */
         struct list_head  srv_reply_queue;      /* replies waiting for service */
 
-        wait_queue_head_t srv_waitq; /* all threads sleep on this */
+        wait_queue_head_t srv_waitq; /* all threads sleep on this. This
+                                      * wait-queue is signalled when new
+                                      * incoming request arrives and when
+                                      * difficult reply has to be handled. */
 
         struct list_head   srv_threads;
         svc_handler_t      srv_handler;
index 6fe23c8..faaa273 100644 (file)
@@ -62,10 +62,7 @@ struct lov_oinfo {                 /* per-stripe data structure */
 
         unsigned loi_kms_valid:1;
         __u64 loi_kms;             /* known minimum size */
-        __u64 loi_rss;             /* recently seen size */
-        __u64 loi_mtime;           /* recently seen mtime */
-        __u64 loi_blocks;          /* recently seen blocks */
-
+        struct ost_lvb loi_lvb;
         struct osc_async_rc     loi_ar;
 };
 
@@ -233,12 +230,8 @@ struct filter_obd {
         struct dentry       *fo_dentry_O;
         struct dentry      **fo_dentry_O_groups;
         struct dentry      **fo_dentry_O_sub;
-        spinlock_t           fo_objidlock;      /* protect fo_lastobjid
-                                                 * increment */
-        
-        spinlock_t           fo_translock;      /* protect fsd_last_rcvd
-                                                 * increment */
-        
+        spinlock_t           fo_objidlock;      /* protect fo_lastobjid */
+        spinlock_t           fo_translock;      /* protect fsd_last_transno */
         struct file         *fo_rcvd_filp;
         struct file         *fo_health_check_filp;
         struct lr_server_data *fo_fsd;
@@ -248,10 +241,6 @@ struct filter_obd {
         int                  fo_destroy_in_progress;
         struct semaphore     fo_create_lock;
 
-        struct file_operations *fo_fop;
-        struct inode_operations *fo_iop;
-        struct address_space_operations *fo_aops;
-
         struct list_head     fo_export_list;
         int                  fo_subdir_count;
 
@@ -264,11 +253,9 @@ struct filter_obd {
         struct obd_import   *fo_mdc_imp;
         struct obd_uuid      fo_mdc_uuid;
         struct lustre_handle fo_mdc_conn;
-#if 0
-        struct ptlrpc_client fo_mdc_client;
-#endif
         struct file        **fo_last_objid_files;
-        __u64               *fo_last_objids; /* last created objid for groups */
+        __u64               *fo_last_objids; /* last created objid for groups,
+                                              * protected by fo_objidlock */
 
         struct semaphore     fo_alloc_lock;
 
@@ -407,6 +394,7 @@ struct mds_obd {
         __u64                            mds_last_transno;
         __u64                            mds_mount_count;
         __u64                            mds_io_epoch;
+        unsigned long                    mds_atime_diff;
         struct semaphore                 mds_epoch_sem;
         struct ll_fid                    mds_rootfid;
         struct lr_server_data           *mds_server_data;
@@ -805,6 +793,8 @@ struct obd_ops {
         int (*o_teardown_async_page)(struct obd_export *exp,
                                      struct lov_stripe_md *lsm,
                                      struct lov_oinfo *loi, void *cookie);
+        int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
+                           struct ost_lvb *lvb, int kms_only);
         int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
                             obd_off size, int shrink);
         int (*o_punch)(struct obd_export *exp, struct obdo *oa,
index cff5c37..ec1ca7b 100644 (file)
@@ -899,10 +899,19 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
         RETURN(rc);
 }
 
-/* b1_4_bug5047 has changes to make this an obd_merge_lvb() method */
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms_only);
-__u64 lov_merge_blocks(struct lov_stripe_md *lsm);
-__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time);
+static inline int obd_merge_lvb(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                struct ost_lvb *lvb, int kms_only)
+{
+        int rc;
+        ENTRY;
+        
+        OBD_CHECK_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb);
+
+        rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only);
+        RETURN(rc);
+}
 
 static inline int obd_adjust_kms(struct obd_export *exp,
                                  struct lov_stripe_md *lsm, obd_off size,
@@ -1087,8 +1096,8 @@ static inline int obd_notify(struct obd_device *obd,
            mds_postsetup".  I know that my mds is able to handle notifies
            by this point, and it needs to get them to execute mds_postrecov. */ 
         if (!obd->obd_set_up && !obd->obd_async_recov) {
-                CERROR("obd %s not set up, notifying anyhow\n", obd->obd_name);
-                return -EAGAIN;
+                CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
+                return -EINVAL;
         }
 
         if (!OBP(obd, notify)) {
index 06bd6c3..71bd1f0 100644 (file)
@@ -39,6 +39,7 @@ extern unsigned int obd_fail_loc;
 extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_timeout;          /* seconds */
 #define PING_INTERVAL max(obd_timeout / 4, 1U)
+#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
 extern unsigned int ldlm_timeout;
 extern unsigned int obd_health_check_timeout;
 extern char obd_lustre_upcall[128];
index 35da48d..557c3ab 100644 (file)
@@ -16,6 +16,9 @@ extern int llapi_file_create(char *name, long stripe_size, int stripe_offset,
 extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum);
 extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
                       int verbose, int quiet);
+extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
+                     struct obd_statfs *stat_buf,
+                     struct obd_uuid *uuid_buf);
 extern int llapi_ping(char *obd_type, char *obd_name);
 extern int llapi_target_check(int num_types, char **obd_types, char *dir);
 extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
index 08f6158..f2b349f 100644 (file)
 #define LL_IOC_POLL_QUOTACHECK          _IOR ('f', 161, struct if_quotacheck *)
 #define LL_IOC_QUOTACTL                 _IOWR('f', 162, struct if_quotactl *)
 #define LL_IOC_JOIN                     _IOW ('f', 163, long)
+#define LL_IOC_OBD_STATFS               _IOWR('f', 164, struct obd_statfs *)
 
+#define LL_STATFS_MDC           1
+#define LL_STATFS_LOV           2
 
 #define IOC_MDC_TYPE            'i'
 #define IOC_MDC_GETSTRIPE       _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
index 1b6f6c9..4fb3337 100644 (file)
@@ -1442,4 +1442,3 @@ CONFIG_MAGIC_SYSRQ=y
 # CONFIG_IA64_DEBUG_CMPXCHG is not set
 # CONFIG_IA64_DEBUG_IRQ is not set
 CONFIG_KALLSYMS=y
-CONFIG_IEEE1394_PCILYNX=m
index c650a84..5295a33 100644 (file)
@@ -45,7 +45,7 @@ CONFIG_MTRR=y
 CONFIG_SMP=y
 CONFIG_HPET_TIMER=y
 CONFIG_GART_IOMMU=y
-# CONFIG_SWIOTLB is not set
+CONFIG_SWIOTLB=y
 CONFIG_NR_SIBLINGS_0=y
 # CONFIG_NR_SIBLINGS_2 is not set
 CONFIG_HAVE_DEC_LOCK=y
index e104787..527d397 100644 (file)
@@ -45,7 +45,7 @@ CONFIG_MTRR=y
 CONFIG_SMP=y
 CONFIG_HPET_TIMER=y
 CONFIG_GART_IOMMU=y
-# CONFIG_SWIOTLB is not set
+CONFIG_SWIOTLB=y
 CONFIG_NR_SIBLINGS_0=y
 # CONFIG_NR_SIBLINGS_2 is not set
 CONFIG_HAVE_DEC_LOCK=y
index 1718e49..9971cfa 100644 (file)
@@ -67,7 +67,6 @@ CONFIG_STOP_MACHINE=y
 #
 # Processor type and features
 #
-CONFIG_MEM_MIRROR=y
 # CONFIG_X86_PC is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
@@ -131,6 +130,8 @@ CONFIG_X86_CPUID=m
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
 # CONFIG_NOHIGHMEM is not set
 # CONFIG_HIGHMEM4G is not set
 CONFIG_HIGHMEM64G=y
@@ -610,6 +611,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -667,6 +669,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -729,7 +732,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -932,6 +938,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -1194,6 +1203,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1202,6 +1212,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1804,8 +1815,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 CONFIG_IPMI_PANIC_STRING=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1865,6 +1877,13 @@ CONFIG_SONYPI=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=m
 CONFIG_AGP_ALI=m
 CONFIG_AGP_ATI=m
@@ -1966,6 +1985,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 CONFIG_SENSORS_W83L785TS=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
index 1718e49..9971cfa 100644 (file)
@@ -67,7 +67,6 @@ CONFIG_STOP_MACHINE=y
 #
 # Processor type and features
 #
-CONFIG_MEM_MIRROR=y
 # CONFIG_X86_PC is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
@@ -131,6 +130,8 @@ CONFIG_X86_CPUID=m
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
 # CONFIG_NOHIGHMEM is not set
 # CONFIG_HIGHMEM4G is not set
 CONFIG_HIGHMEM64G=y
@@ -610,6 +611,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -667,6 +669,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -729,7 +732,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -932,6 +938,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -1194,6 +1203,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1202,6 +1212,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1804,8 +1815,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 CONFIG_IPMI_PANIC_STRING=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1865,6 +1877,13 @@ CONFIG_SONYPI=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=m
 CONFIG_AGP_ALI=m
 CONFIG_AGP_ATI=m
@@ -1966,6 +1985,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 CONFIG_SENSORS_W83L785TS=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
index 1718e49..9971cfa 100644 (file)
@@ -67,7 +67,6 @@ CONFIG_STOP_MACHINE=y
 #
 # Processor type and features
 #
-CONFIG_MEM_MIRROR=y
 # CONFIG_X86_PC is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
@@ -131,6 +130,8 @@ CONFIG_X86_CPUID=m
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
 # CONFIG_NOHIGHMEM is not set
 # CONFIG_HIGHMEM4G is not set
 CONFIG_HIGHMEM64G=y
@@ -610,6 +611,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -667,6 +669,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -729,7 +732,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -932,6 +938,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -1194,6 +1203,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1202,6 +1212,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1804,8 +1815,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 CONFIG_IPMI_PANIC_STRING=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1865,6 +1877,13 @@ CONFIG_SONYPI=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=m
 CONFIG_AGP_ALI=m
 CONFIG_AGP_ATI=m
@@ -1966,6 +1985,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 CONFIG_SENSORS_W83L785TS=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
index 2a42807..c205dc4 100644 (file)
@@ -93,6 +93,7 @@ CONFIG_IA64_SGI_SN_XPC=m
 CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 CONFIG_IA32_SUPPORT=y
@@ -439,6 +440,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -482,6 +484,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -520,7 +523,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -723,6 +729,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -942,6 +951,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -950,6 +960,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1489,8 +1500,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 CONFIG_IPMI_PANIC_STRING=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1525,6 +1537,13 @@ CONFIG_APPLICOM=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
 CONFIG_AGP_HP_ZX1=m
@@ -1612,6 +1631,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 # CONFIG_SENSORS_W83L785TS is not set
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
index 2a42807..c205dc4 100644 (file)
@@ -93,6 +93,7 @@ CONFIG_IA64_SGI_SN_XPC=m
 CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 CONFIG_IA32_SUPPORT=y
@@ -439,6 +440,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -482,6 +484,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -520,7 +523,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -723,6 +729,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -942,6 +951,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -950,6 +960,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1489,8 +1500,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 CONFIG_IPMI_PANIC_STRING=y
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1525,6 +1537,13 @@ CONFIG_APPLICOM=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
 CONFIG_AGP_HP_ZX1=m
@@ -1612,6 +1631,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 # CONFIG_SENSORS_W83L785TS is not set
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
index c455f9d..81f3823 100644 (file)
@@ -84,6 +84,13 @@ CONFIG_X86_GOOD_APIC=y
 CONFIG_MICROCODE=m
 CONFIG_X86_MSR=m
 CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
 CONFIG_X86_HT=y
 CONFIG_X86_IO_APIC=y
 CONFIG_X86_LOCAL_APIC=y
@@ -496,6 +503,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -543,6 +551,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -581,7 +590,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -787,6 +799,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -999,6 +1014,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1007,6 +1023,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1494,8 +1511,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 # CONFIG_IPMI_PANIC_STRING is not set
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1546,8 +1564,16 @@ CONFIG_APPLICOM=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=m
 CONFIG_AGP_INTEL_MCH=m
 # CONFIG_DRM is not set
 
@@ -1633,6 +1659,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 CONFIG_SENSORS_W83L785TS=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
@@ -1763,6 +1790,8 @@ CONFIG_FB_VESA=y
 CONFIG_VIDEO_SELECT=y
 CONFIG_FB_HGA=m
 CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
 CONFIG_FB_MATROX=m
 CONFIG_FB_MATROX_MILLENIUM=y
 CONFIG_FB_MATROX_MYSTIQUE=y
index c455f9d..81f3823 100644 (file)
@@ -84,6 +84,13 @@ CONFIG_X86_GOOD_APIC=y
 CONFIG_MICROCODE=m
 CONFIG_X86_MSR=m
 CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+CONFIG_DELL_RBU=m
+CONFIG_DCDBAS=m
 CONFIG_X86_HT=y
 CONFIG_X86_IO_APIC=y
 CONFIG_X86_LOCAL_APIC=y
@@ -496,6 +503,7 @@ CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
 CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -543,6 +551,7 @@ CONFIG_SCSI_QLA21XX=m
 CONFIG_SCSI_QLA22XX=m
 CONFIG_SCSI_QLA2300=m
 CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA24XX=m
 CONFIG_SCSI_QLA6312=m
 CONFIG_SCSI_QLA2XXX_FAILOVER=y
 CONFIG_SCSI_QLA4XXX=m
@@ -581,7 +590,10 @@ CONFIG_BLK_DEV_DM_BBR=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
@@ -787,6 +799,9 @@ CONFIG_IP_NF_CONNTRACK_MARK=y
 CONFIG_IP_NF_TARGET_CONNMARK=m
 CONFIG_IP_NF_MATCH_CONNMARK=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_HASHLIMIT=m
+# CONFIG_IP_NF_MATCH_IPV4OPTIONS is not set
 
 #
 # IPv6: Netfilter Configuration
@@ -999,6 +1014,7 @@ CONFIG_SK98LIN=m
 CONFIG_TIGON3=m
 CONFIG_NET_BROADCOM=m
 CONFIG_NET_BCM44=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1007,6 +1023,7 @@ CONFIG_IXGB=m
 CONFIG_IXGB_NAPI=y
 CONFIG_S2IO=m
 CONFIG_S2IO_NAPI=y
+# CONFIG_2BUFF_MODE is not set
 CONFIG_FDDI=y
 # CONFIG_DEFXX is not set
 CONFIG_SKFP=m
@@ -1494,8 +1511,9 @@ CONFIG_IPMI_HANDLER=m
 CONFIG_IPMI_PANIC_EVENT=y
 # CONFIG_IPMI_PANIC_STRING is not set
 CONFIG_IPMI_DEVICE_INTERFACE=m
-CONFIG_IPMI_KCS=m
+CONFIG_IPMI_SI=m
 CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPMI_POWEROFF=m
 
 #
 # Watchdog Cards
@@ -1546,8 +1564,16 @@ CONFIG_APPLICOM=m
 #
 # Ftape, the floppy tape device driver
 #
+
+#
+# TPM devices
+#
+CONFIG_TCG_TPM=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_ATMEL=m
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=m
 CONFIG_AGP_INTEL_MCH=m
 # CONFIG_DRM is not set
 
@@ -1633,6 +1659,7 @@ CONFIG_SENSORS_VT1211=m
 CONFIG_SENSORS_W83781D=m
 CONFIG_SENSORS_W83L785TS=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_PCF8574=m
 
 #
 # Other I2C Chip support
@@ -1763,6 +1790,8 @@ CONFIG_FB_VESA=y
 CONFIG_VIDEO_SELECT=y
 CONFIG_FB_HGA=m
 CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
 CONFIG_FB_MATROX=m
 CONFIG_FB_MATROX_MILLENIUM=y
 CONFIG_FB_MATROX_MYSTIQUE=y
index 2b18f96..8629266 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:02:11 2005
+# Sat Mar 11 23:36:27 2006
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -132,6 +132,7 @@ CONFIG_X86_CPUID=m
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
 # CONFIG_NOHIGHMEM is not set
 # CONFIG_HIGHMEM4G is not set
 CONFIG_HIGHMEM64G=y
@@ -533,6 +534,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -550,6 +553,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -563,6 +567,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -658,10 +663,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -1110,9 +1119,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1386,6 +1397,14 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1501,6 +1520,7 @@ CONFIG_SERIAL_8250_RSA=y
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_CRASH=m
@@ -2113,6 +2133,38 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -2206,15 +2258,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
@@ -2308,7 +2365,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_HIGHMEM=y
-# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_INFO=y
 # CONFIG_FRAME_POINTER is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
index 25a3848..aad9e59 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:01:23 2005
+# Sat Mar 11 23:44:45 2006
 #
 CONFIG_X86=y
 CONFIG_MMU=y
@@ -14,6 +14,7 @@ CONFIG_GENERIC_IOMAP=y
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_CLEAN_COMPILE=y
+CONFIG_BROKEN_ON_SMP=y
 
 #
 # General setup
@@ -56,22 +57,19 @@ CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SIG=y
 # CONFIG_MODULE_SIG_FORCE is not set
 CONFIG_KMOD=y
-CONFIG_STOP_MACHINE=y
 
 #
 # Processor type and features
 #
-CONFIG_MEM_MIRROR=y
-# CONFIG_X86_PC is not set
+CONFIG_X86_PC=y
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_NUMAQ is not set
 # CONFIG_X86_SUMMIT is not set
 # CONFIG_X86_BIGSMP is not set
 # CONFIG_X86_VISWS is not set
-CONFIG_X86_GENERICARCH=y
+# CONFIG_X86_GENERICARCH is not set
 # CONFIG_X86_ES7000 is not set
-CONFIG_X86_CYCLONE_TIMER=y
 # CONFIG_M386 is not set
 # CONFIG_M486 is not set
 # CONFIG_M586 is not set
@@ -111,17 +109,13 @@ CONFIG_X86_USE_PPRO_CHECKSUM=y
 # CONFIG_X86_HIGH_ENTRY is not set
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=32
-CONFIG_SCHED_SMT=y
+# CONFIG_SMP is not set
 # CONFIG_PREEMPT is not set
 CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_X86_LOCAL_APIC=y
-CONFIG_X86_IO_APIC=y
+# CONFIG_X86_UP_APIC is not set
 CONFIG_X86_TSC=y
 CONFIG_X86_MCE=y
 # CONFIG_X86_MCE_NONFATAL is not set
-CONFIG_X86_MCE_P4THERMAL=y
 CONFIG_TOSHIBA=m
 CONFIG_I8K=m
 CONFIG_MICROCODE=m
@@ -132,18 +126,15 @@ CONFIG_X86_CPUID=m
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
 # CONFIG_NOHIGHMEM is not set
-# CONFIG_HIGHMEM4G is not set
-CONFIG_HIGHMEM64G=y
+CONFIG_HIGHMEM4G=y
+# CONFIG_HIGHMEM64G is not set
 CONFIG_HIGHMEM=y
-CONFIG_X86_PAE=y
-# CONFIG_NUMA is not set
 CONFIG_HIGHPTE=y
 # CONFIG_MATH_EMULATION is not set
 CONFIG_MTRR=y
 # CONFIG_EFI is not set
-# CONFIG_IRQBALANCE is not set
-CONFIG_HAVE_DEC_LOCK=y
 CONFIG_REGPARM=y
 CONFIG_IOPROC=y
 CONFIG_PTRACK=y
@@ -237,7 +228,6 @@ CONFIG_PCI_GOANY=y
 CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
-CONFIG_PCI_MSI=y
 CONFIG_PCI_LEGACY_PROC=y
 # CONFIG_PCI_NAMES is not set
 CONFIG_ISA=y
@@ -265,7 +255,6 @@ CONFIG_HOTPLUG_PCI=y
 # CONFIG_HOTPLUG_PCI_FAKE is not set
 CONFIG_HOTPLUG_PCI_COMPAQ=m
 # CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
-CONFIG_HOTPLUG_PCI_IBM=m
 CONFIG_HOTPLUG_PCI_ACPI=m
 CONFIG_HOTPLUG_PCI_ACPI_IBM=m
 # CONFIG_HOTPLUG_PCI_CPCI is not set
@@ -533,6 +522,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -550,6 +541,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -563,6 +555,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -658,10 +651,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -673,13 +670,14 @@ CONFIG_IEEE1394=m
 #
 # CONFIG_IEEE1394_VERBOSEDEBUG is not set
 # CONFIG_IEEE1394_OUI_DB is not set
-# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
 
 #
 # Device Drivers
 #
 CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_OHCI1394=y
+CONFIG_IEEE1394_OHCI1394=m
 
 #
 # Protocol Drivers
@@ -1040,6 +1038,7 @@ CONFIG_NET_VENDOR_SMC=y
 # CONFIG_ULTRA is not set
 CONFIG_SMC9194=m
 CONFIG_NET_VENDOR_RACAL=y
+# CONFIG_NI5010 is not set
 # CONFIG_NI52 is not set
 # CONFIG_NI65 is not set
 
@@ -1056,6 +1055,7 @@ CONFIG_DE4X5=m
 CONFIG_WINBOND_840=m
 CONFIG_DM9102=m
 CONFIG_PCMCIA_XIRCOM=m
+# CONFIG_PCMCIA_XIRTULIP is not set
 # CONFIG_AT1700 is not set
 # CONFIG_DEPCA is not set
 CONFIG_HP100=m
@@ -1109,9 +1109,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1286,6 +1288,7 @@ CONFIG_ISDN_TTY_FAX=y
 #
 # ISDN feature submodules
 #
+CONFIG_ISDN_DRV_LOOP=m
 
 #
 # ISDN4Linux hardware drivers
@@ -1365,6 +1368,8 @@ CONFIG_ISDN_DRV_PCBIT=m
 CONFIG_ISDN_DRV_SC=m
 CONFIG_ISDN_DRV_ACT2000=m
 CONFIG_ISDN_DRV_TPAM=m
+CONFIG_HYSDN=m
+CONFIG_HYSDN_CAPI=y
 
 #
 # CAPI subsystem
@@ -1385,6 +1390,14 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1473,12 +1486,25 @@ CONFIG_VT=y
 CONFIG_VT_CONSOLE=y
 CONFIG_HW_CONSOLE=y
 CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_COMPUTONE is not set
 # CONFIG_ROCKETPORT is not set
 # CONFIG_CYCLADES is not set
+# CONFIG_DIGIEPCA is not set
+# CONFIG_DIGI is not set
+# CONFIG_ESPSERIAL is not set
+# CONFIG_MOXA_INTELLIO is not set
+# CONFIG_MOXA_SMARTIO is not set
+# CONFIG_ISI is not set
 CONFIG_SYNCLINK=m
 CONFIG_SYNCLINKMP=m
 CONFIG_N_HDLC=m
+# CONFIG_RISCOM8 is not set
+# CONFIG_SPECIALIX is not set
+# CONFIG_SX is not set
+# CONFIG_RIO is not set
 CONFIG_STALDRV=y
+# CONFIG_STALLION is not set
+# CONFIG_ISTALLION is not set
 
 #
 # Serial drivers
@@ -1500,6 +1526,7 @@ CONFIG_SERIAL_8250_RSA=y
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_CRASH=m
@@ -1575,6 +1602,7 @@ CONFIG_SONYPI=m
 #
 # Ftape, the floppy tape device driver
 #
+# CONFIG_FTAPE is not set
 CONFIG_AGP=y
 CONFIG_AGP_ALI=y
 CONFIG_AGP_ATI=y
@@ -1628,6 +1656,7 @@ CONFIG_I2C_ALI1563=m
 CONFIG_I2C_ALI15X3=m
 CONFIG_I2C_AMD756=m
 CONFIG_I2C_AMD8111=m
+# CONFIG_I2C_ELEKTOR is not set
 CONFIG_I2C_I801=m
 CONFIG_I2C_I810=m
 CONFIG_I2C_ISA=m
@@ -1717,6 +1746,7 @@ CONFIG_VIDEO_DEV=m
 # CONFIG_TUNER_3036 is not set
 # CONFIG_VIDEO_STRADIS is not set
 # CONFIG_VIDEO_ZORAN is not set
+# CONFIG_VIDEO_MEYE is not set
 # CONFIG_VIDEO_SAA7134 is not set
 # CONFIG_VIDEO_MXB is not set
 # CONFIG_VIDEO_DPC is not set
@@ -2052,6 +2082,7 @@ CONFIG_USB_USS720=m
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
 CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
 CONFIG_USB_SERIAL_EMPEG=m
 CONFIG_USB_SERIAL_FTDI_SIO=m
@@ -2112,6 +2143,38 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -2205,15 +2268,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
@@ -2307,7 +2375,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_HIGHMEM=y
-# CONFIG_DEBUG_INFO is not set
+CONFIG_DEBUG_INFO=y
 # CONFIG_FRAME_POINTER is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_DEBUG_STACKOVERFLOW=y
@@ -2316,8 +2384,6 @@ CONFIG_DEBUG_STACK_USAGE=y
 # CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_4KSTACKS is not set
 # CONFIG_SCHEDSTATS is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
-CONFIG_X86_MPPARSE=y
 
 #
 # Security options
@@ -2374,8 +2440,5 @@ CONFIG_CRC32=y
 CONFIG_LIBCRC32C=m
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_DEFLATE=m
-CONFIG_X86_SMP=y
-CONFIG_X86_HT=y
 CONFIG_X86_BIOS_REBOOT=y
-CONFIG_X86_TRAMPOLINE=y
 CONFIG_PC=y
index 30280c2..173c099 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:05:00 2005
+# Wed Mar 15 17:33:05 2006
 #
 
 #
@@ -84,6 +84,7 @@ CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=64
 # CONFIG_HOTPLUG_CPU is not set
+CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 # CONFIG_IA32_SUPPORT is not set
@@ -98,6 +99,7 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -307,6 +309,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -321,6 +325,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -332,6 +337,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -408,10 +414,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -419,6 +429,32 @@ CONFIG_FUSION_LAN=m
 CONFIG_IEEE1394=m
 
 #
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
 # I2O device support
 #
 # CONFIG_I2O is not set
@@ -804,9 +840,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1044,6 +1082,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1128,6 +1172,8 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_N_HDLC=m
 CONFIG_STALDRV=y
 CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
@@ -1149,6 +1195,7 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1216,6 +1263,7 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
+CONFIG_HANGCHECK_TIMER=m
 # CONFIG_MMTIMER is not set
 
 #
@@ -1672,6 +1720,38 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1757,15 +1837,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
@@ -1929,7 +2014,3 @@ CONFIG_CRYPTO_CRC32C=m
 CONFIG_CRYPTO_SIGNATURE=y
 CONFIG_CRYPTO_SIGNATURE_DSA=y
 CONFIG_CRYPTO_MPILIB=y
-CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_OHCI1394=y
-CONFIG_IEEE1394_SBP2=m
-CONFIG_IEEE1394_SBP2_PHYS_DMA=y
index ab56ce0..87e1d04 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:04:10 2005
+# Wed Mar 15 17:35:26 2006
 #
 
 #
@@ -84,6 +84,7 @@ CONFIG_FORCE_MAX_ZONEORDER=18
 CONFIG_SMP=y
 CONFIG_NR_CPUS=64
 # CONFIG_HOTPLUG_CPU is not set
+CONFIG_SCHED_SMT=y
 # CONFIG_PREEMPT is not set
 CONFIG_HAVE_DEC_LOCK=y
 # CONFIG_IA32_SUPPORT is not set
@@ -98,6 +99,7 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -307,6 +309,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -321,6 +325,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -332,6 +337,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -408,10 +414,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -419,6 +429,32 @@ CONFIG_FUSION_LAN=m
 CONFIG_IEEE1394=m
 
 #
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
 # I2O device support
 #
 # CONFIG_I2O is not set
@@ -804,9 +840,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1044,6 +1082,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1128,6 +1172,8 @@ CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_N_HDLC=m
 CONFIG_STALDRV=y
 CONFIG_SGI_SNSC=y
+CONFIG_SGI_TIOCX=y
+CONFIG_SGI_MBCS=m
 
 #
 # Serial drivers
@@ -1149,6 +1195,7 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1216,6 +1263,7 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
+CONFIG_HANGCHECK_TIMER=m
 # CONFIG_MMTIMER is not set
 
 #
@@ -1672,6 +1720,38 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1757,15 +1837,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
@@ -1929,7 +2014,3 @@ CONFIG_CRYPTO_CRC32C=m
 CONFIG_CRYPTO_SIGNATURE=y
 CONFIG_CRYPTO_SIGNATURE_DSA=y
 CONFIG_CRYPTO_MPILIB=y
-CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_OHCI1394=y
-CONFIG_IEEE1394_SBP2=m
-CONFIG_IEEE1394_SBP2_PHYS_DMA=y
index f621ca1..a210500 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:06:20 2005
+# Wed Mar 15 17:39:44 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -438,6 +438,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -452,6 +454,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -463,6 +466,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -539,10 +543,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -965,9 +973,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1213,6 +1223,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1318,6 +1334,7 @@ CONFIG_SERIAL_8250_RSA=y
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_CRASH=m
@@ -1865,9 +1882,42 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
 
 #
 # File systems
@@ -1963,15 +2013,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
index 8a1b02f..ebe65e8 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
 # Linux kernel version: 2.6.9-prep.qp2.2.5.11.3qsnet
-# Thu Oct 27 17:05:31 2005
+# Wed Mar 15 17:38:17 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -438,6 +438,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -452,6 +454,7 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -463,6 +466,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -539,10 +543,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -561,7 +569,7 @@ CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
 # Device Drivers
 #
 CONFIG_IEEE1394_PCILYNX=m
-CONFIG_IEEE1394_OHCI1394=y
+CONFIG_IEEE1394_OHCI1394=m
 
 #
 # Protocol Drivers
@@ -965,9 +973,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1213,6 +1223,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1318,6 +1334,7 @@ CONFIG_SERIAL_8250_RSA=y
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_CRASH=m
@@ -1865,9 +1882,42 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+CONFIG_EDAC=m
+
+#
+# Reporting subsystems
+#
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_AMD76X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_POLL=y
+
+#
 # Firmware Drivers
 #
 CONFIG_EDD=m
+CONFIG_DELL_RBU=m
 
 #
 # File systems
@@ -1963,15 +2013,20 @@ CONFIG_VXFS_FS=m
 #
 CONFIG_NFS_FS=m
 CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_NFS_DIRECTIO=y
 CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
 CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
index 5d92aa6..01b7fa8 100644 (file)
@@ -37,7 +37,7 @@ CONFIG_KERNEL_HALF_GIGS=1
 # CONFIG_HIGHMEM is not set
 CONFIG_PROC_MM=y
 CONFIG_KERNEL_STACK_ORDER=4
-# CONFIG_UML_REAL_TIME_CLOCK is not set
+CONFIG_UML_REAL_TIME_CLOCK=y
 
 #
 # Loadable module support
diff --git a/lustre/kernel_patches/patches/2.4.19-ext3.patch b/lustre/kernel_patches/patches/2.4.19-ext3.patch
deleted file mode 100644 (file)
index a167c6a..0000000
+++ /dev/null
@@ -1,7892 +0,0 @@
-diff -rup --new-file linux.mcp2/fs/ext3/Makefile linux_tmp/fs/ext3/Makefile
---- linux.mcp2/fs/ext3/Makefile        1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/Makefile 2001-12-21 09:41:55.000000000 -0800
-@@ -0,0 +1,16 @@
-+#
-+# Makefile for the linux ext2-filesystem routines.
-+#
-+# Note! Dependencies are done automagically by 'make dep', which also
-+# removes any old dependencies. DON'T put your own dependencies here
-+# unless it's something special (ie not a .c file).
-+#
-+# Note 2! The CFLAGS definitions are now in the main makefile...
-+
-+O_TARGET := ext3.o
-+
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+              ioctl.o namei.o super.o symlink.o
-+obj-m    := $(O_TARGET)
-+
-+include $(TOPDIR)/Rules.make
-diff -rup --new-file linux.mcp2/fs/ext3/balloc.c linux_tmp/fs/ext3/balloc.c
---- linux.mcp2/fs/ext3/balloc.c        1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/balloc.c 2002-08-02 17:39:45.000000000 -0700
-@@ -0,0 +1,999 @@
-+/*
-+ *  linux/fs/ext3/balloc.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/locks.h>
-+#include <linux/quotaops.h>
-+
-+/*
-+ * balloc.c contains the blocks allocation and deallocation routines
-+ */
-+
-+/*
-+ * The free blocks are managed by bitmaps.  A file system contains several
-+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
-+ * block for inodes, N blocks for the inode table and data blocks.
-+ *
-+ * The file system contains group descriptors which are located after the
-+ * super block.  Each descriptor contains the number of the bitmap block and
-+ * the free blocks count in the block.  The descriptors are loaded in memory
-+ * when a file system is mounted (see ext3_read_super).
-+ */
-+
-+
-+#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
-+
-+struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
-+                                           unsigned int block_group,
-+                                           struct buffer_head ** bh)
-+{
-+      unsigned long group_desc;
-+      unsigned long desc;
-+      struct ext3_group_desc * gdp;
-+
-+      if (block_group >= sb->u.ext3_sb.s_groups_count) {
-+              ext3_error (sb, "ext3_get_group_desc",
-+                          "block_group >= groups_count - "
-+                          "block_group = %d, groups_count = %lu",
-+                          block_group, sb->u.ext3_sb.s_groups_count);
-+
-+              return NULL;
-+      }
-+      
-+      group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
-+      desc = block_group % EXT3_DESC_PER_BLOCK(sb);
-+      if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
-+              ext3_error (sb, "ext3_get_group_desc",
-+                          "Group descriptor not loaded - "
-+                          "block_group = %d, group_desc = %lu, desc = %lu",
-+                           block_group, group_desc, desc);
-+              return NULL;
-+      }
-+      
-+      gdp = (struct ext3_group_desc *) 
-+            sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
-+      if (bh)
-+              *bh = sb->u.ext3_sb.s_group_desc[group_desc];
-+      return gdp + desc;
-+}
-+
-+/*
-+ * Read the bitmap for a given block_group, reading into the specified 
-+ * slot in the superblock's bitmap cache.
-+ *
-+ * Return >=0 on success or a -ve error code.
-+ */
-+
-+static int read_block_bitmap (struct super_block * sb,
-+                             unsigned int block_group,
-+                             unsigned long bitmap_nr)
-+{
-+      struct ext3_group_desc * gdp;
-+      struct buffer_head * bh = NULL;
-+      int retval = -EIO;
-+      
-+      gdp = ext3_get_group_desc (sb, block_group, NULL);
-+      if (!gdp)
-+              goto error_out;
-+      retval = 0;
-+      bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
-+      if (!bh) {
-+              ext3_error (sb, "read_block_bitmap",
-+                          "Cannot read block bitmap - "
-+                          "block_group = %d, block_bitmap = %lu",
-+                          block_group, (unsigned long) gdp->bg_block_bitmap);
-+              retval = -EIO;
-+      }
-+      /*
-+       * On IO error, just leave a zero in the superblock's block pointer for
-+       * this group.  The IO will be retried next time.
-+       */
-+error_out:
-+      sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
-+      sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
-+      return retval;
-+}
-+
-+/*
-+ * load_block_bitmap loads the block bitmap for a blocks group
-+ *
-+ * It maintains a cache for the last bitmaps loaded.  This cache is managed
-+ * with a LRU algorithm.
-+ *
-+ * Notes:
-+ * 1/ There is one cache per mounted file system.
-+ * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
-+ *    this function reads the bitmap without maintaining a LRU cache.
-+ * 
-+ * Return the slot used to store the bitmap, or a -ve error code.
-+ */
-+static int __load_block_bitmap (struct super_block * sb,
-+                              unsigned int block_group)
-+{
-+      int i, j, retval = 0;
-+      unsigned long block_bitmap_number;
-+      struct buffer_head * block_bitmap;
-+
-+      if (block_group >= sb->u.ext3_sb.s_groups_count)
-+              ext3_panic (sb, "load_block_bitmap",
-+                          "block_group >= groups_count - "
-+                          "block_group = %d, groups_count = %lu",
-+                          block_group, sb->u.ext3_sb.s_groups_count);
-+
-+      if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
-+              if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
-+                      if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
-+                          block_group)
-+                              return block_group;
-+                      ext3_error (sb, "__load_block_bitmap",
-+                                  "block_group != block_bitmap_number");
-+              }
-+              retval = read_block_bitmap (sb, block_group, block_group);
-+              if (retval < 0)
-+                      return retval;
-+              return block_group;
-+      }
-+
-+      for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
-+                  sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
-+              ;
-+      if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
-+          sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
-+              block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
-+              block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
-+              for (j = i; j > 0; j--) {
-+                      sb->u.ext3_sb.s_block_bitmap_number[j] =
-+                              sb->u.ext3_sb.s_block_bitmap_number[j - 1];
-+                      sb->u.ext3_sb.s_block_bitmap[j] =
-+                              sb->u.ext3_sb.s_block_bitmap[j - 1];
-+              }
-+              sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
-+              sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
-+
-+              /*
-+               * There's still one special case here --- if block_bitmap == 0
-+               * then our last attempt to read the bitmap failed and we have
-+               * just ended up caching that failure.  Try again to read it.
-+               */
-+              if (!block_bitmap)
-+                      retval = read_block_bitmap (sb, block_group, 0);
-+      } else {
-+              if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
-+                      sb->u.ext3_sb.s_loaded_block_bitmaps++;
-+              else
-+                      brelse (sb->u.ext3_sb.s_block_bitmap
-+                                      [EXT3_MAX_GROUP_LOADED - 1]);
-+              for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
-+                                      j > 0;  j--) {
-+                      sb->u.ext3_sb.s_block_bitmap_number[j] =
-+                              sb->u.ext3_sb.s_block_bitmap_number[j - 1];
-+                      sb->u.ext3_sb.s_block_bitmap[j] =
-+                              sb->u.ext3_sb.s_block_bitmap[j - 1];
-+              }
-+              retval = read_block_bitmap (sb, block_group, 0);
-+      }
-+      return retval;
-+}
-+
-+/*
-+ * Load the block bitmap for a given block group.  First of all do a couple
-+ * of fast lookups for common cases and then pass the request onto the guts
-+ * of the bitmap loader.
-+ *
-+ * Return the slot number of the group in the superblock bitmap cache's on
-+ * success, or a -ve error code.
-+ *
-+ * There is still one inconsistency here --- if the number of groups in this
-+ * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of 
-+ * differentiating between a group for which we have never performed a bitmap
-+ * IO request, and a group for which the last bitmap read request failed.
-+ */
-+static inline int load_block_bitmap (struct super_block * sb,
-+                                   unsigned int block_group)
-+{
-+      int slot;
-+      
-+      /*
-+       * Do the lookup for the slot.  First of all, check if we're asking
-+       * for the same slot as last time, and did we succeed that last time?
-+       */
-+      if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
-+          sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
-+          sb->u.ext3_sb.s_block_bitmap[0]) {
-+              return 0;
-+      }
-+      /*
-+       * Or can we do a fast lookup based on a loaded group on a filesystem
-+       * small enough to be mapped directly into the superblock?
-+       */
-+      else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && 
-+               sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
-+                      && sb->u.ext3_sb.s_block_bitmap[block_group]) {
-+              slot = block_group;
-+      }
-+      /*
-+       * If not, then do a full lookup for this block group.
-+       */
-+      else {
-+              slot = __load_block_bitmap (sb, block_group);
-+      }
-+
-+      /*
-+       * <0 means we just got an error
-+       */
-+      if (slot < 0)
-+              return slot;
-+      
-+      /*
-+       * If it's a valid slot, we may still have cached a previous IO error,
-+       * in which case the bh in the superblock cache will be zero.
-+       */
-+      if (!sb->u.ext3_sb.s_block_bitmap[slot])
-+              return -EIO;
-+      
-+      /*
-+       * Must have been read in OK to get this far.
-+       */
-+      return slot;
-+}
-+
-+/* Free given blocks, update quota and i_blocks field */
-+void ext3_free_blocks (handle_t *handle, struct inode * inode,
-+                      unsigned long block, unsigned long count)
-+{
-+      struct buffer_head *bitmap_bh;
-+      struct buffer_head *gd_bh;
-+      unsigned long block_group;
-+      unsigned long bit;
-+      unsigned long i;
-+      int bitmap_nr;
-+      unsigned long overflow;
-+      struct super_block * sb;
-+      struct ext3_group_desc * gdp;
-+      struct ext3_super_block * es;
-+      int err = 0, ret;
-+      int dquot_freed_blocks = 0;
-+
-+      sb = inode->i_sb;
-+      if (!sb) {
-+              printk ("ext3_free_blocks: nonexistent device");
-+              return;
-+      }
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+      if (block < le32_to_cpu(es->s_first_data_block) || 
-+          (block + count) > le32_to_cpu(es->s_blocks_count)) {
-+              ext3_error (sb, "ext3_free_blocks",
-+                          "Freeing blocks not in datazone - "
-+                          "block = %lu, count = %lu", block, count);
-+              goto error_return;
-+      }
-+
-+      ext3_debug ("freeing block %lu\n", block);
-+
-+do_more:
-+      overflow = 0;
-+      block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-+                    EXT3_BLOCKS_PER_GROUP(sb);
-+      bit = (block - le32_to_cpu(es->s_first_data_block)) %
-+                    EXT3_BLOCKS_PER_GROUP(sb);
-+      /*
-+       * Check to see if we are freeing blocks across a group
-+       * boundary.
-+       */
-+      if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
-+              overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
-+              count -= overflow;
-+      }
-+      bitmap_nr = load_block_bitmap (sb, block_group);
-+      if (bitmap_nr < 0)
-+              goto error_return;
-+      
-+      bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
-+      gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
-+      if (!gdp)
-+              goto error_return;
-+
-+      if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
-+          in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
-+          in_range (block, le32_to_cpu(gdp->bg_inode_table),
-+                    sb->u.ext3_sb.s_itb_per_group) ||
-+          in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
-+                    sb->u.ext3_sb.s_itb_per_group))
-+              ext3_error (sb, "ext3_free_blocks",
-+                          "Freeing blocks in system zones - "
-+                          "Block = %lu, count = %lu",
-+                          block, count);
-+
-+      /*
-+       * We are about to start releasing blocks in the bitmap,
-+       * so we need undo access.
-+       */
-+      /* @@@ check errors */
-+      BUFFER_TRACE(bitmap_bh, "getting undo access");
-+      err = ext3_journal_get_undo_access(handle, bitmap_bh);
-+      if (err)
-+              goto error_return;
-+      
-+      /*
-+       * We are about to modify some metadata.  Call the journal APIs
-+       * to unshare ->b_data if a currently-committing transaction is
-+       * using it
-+       */
-+      BUFFER_TRACE(gd_bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, gd_bh);     
-+      if (err)
-+              goto error_return;
-+
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-+      if (err)
-+              goto error_return;
-+
-+      for (i = 0; i < count; i++) {
-+              /*
-+               * An HJ special.  This is expensive...
-+               */
-+#ifdef CONFIG_JBD_DEBUG
-+              {
-+                      struct buffer_head *debug_bh;
-+                      debug_bh = sb_get_hash_table(sb, block + i);
-+                      if (debug_bh) {
-+                              BUFFER_TRACE(debug_bh, "Deleted!");
-+                              if (!bh2jh(bitmap_bh)->b_committed_data)
-+                                      BUFFER_TRACE(debug_bh,
-+                                              "No commited data in bitmap");
-+                              BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
-+                              __brelse(debug_bh);
-+                      }
-+              }
-+#endif
-+              BUFFER_TRACE(bitmap_bh, "clear bit");
-+              if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
-+                      ext3_error (sb, __FUNCTION__,
-+                                    "bit already cleared for block %lu", 
-+                                    block + i);
-+                      BUFFER_TRACE(bitmap_bh, "bit already cleared");
-+              } else {
-+                      dquot_freed_blocks++;
-+                      gdp->bg_free_blocks_count =
-+                        cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
-+                      es->s_free_blocks_count =
-+                        cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
-+              }
-+              /* @@@ This prevents newly-allocated data from being
-+               * freed and then reallocated within the same
-+               * transaction. 
-+               * 
-+               * Ideally we would want to allow that to happen, but to
-+               * do so requires making journal_forget() capable of
-+               * revoking the queued write of a data block, which
-+               * implies blocking on the journal lock.  *forget()
-+               * cannot block due to truncate races.
-+               *
-+               * Eventually we can fix this by making journal_forget()
-+               * return a status indicating whether or not it was able
-+               * to revoke the buffer.  On successful revoke, it is
-+               * safe not to set the allocation bit in the committed
-+               * bitmap, because we know that there is no outstanding
-+               * activity on the buffer any more and so it is safe to
-+               * reallocate it.  
-+               */
-+              BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
-+              J_ASSERT_BH(bitmap_bh,
-+                              bh2jh(bitmap_bh)->b_committed_data != NULL);
-+              ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
-+      }
-+
-+      /* We dirtied the bitmap block */
-+      BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-+      err = ext3_journal_dirty_metadata(handle, bitmap_bh);
-+
-+      /* And the group descriptor block */
-+      BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
-+      ret = ext3_journal_dirty_metadata(handle, gd_bh);
-+      if (!err) err = ret;
-+
-+      /* And the superblock */
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
-+      ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-+      if (!err) err = ret;
-+
-+      if (overflow && !err) {
-+              block += count;
-+              count = overflow;
-+              goto do_more;
-+      }
-+      sb->s_dirt = 1;
-+error_return:
-+      ext3_std_error(sb, err);
-+      unlock_super(sb);
-+      if (dquot_freed_blocks)
-+              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
-+      return;
-+}
-+
-+/* For ext3 allocations, we must not reuse any blocks which are
-+ * allocated in the bitmap buffer's "last committed data" copy.  This
-+ * prevents deletes from freeing up the page for reuse until we have
-+ * committed the delete transaction.
-+ *
-+ * If we didn't do this, then deleting something and reallocating it as
-+ * data would allow the old block to be overwritten before the
-+ * transaction committed (because we force data to disk before commit).
-+ * This would lead to corruption if we crashed between overwriting the
-+ * data and committing the delete. 
-+ *
-+ * @@@ We may want to make this allocation behaviour conditional on
-+ * data-writes at some point, and disable it for metadata allocations or
-+ * sync-data inodes.
-+ */
-+static int ext3_test_allocatable(int nr, struct buffer_head *bh)
-+{
-+      if (ext3_test_bit(nr, bh->b_data))
-+              return 0;
-+      if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
-+              return 1;
-+      return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
-+}
-+
-+/*
-+ * Find an allocatable block in a bitmap.  We honour both the bitmap and
-+ * its last-committed copy (if that exists), and perform the "most
-+ * appropriate allocation" algorithm of looking for a free block near
-+ * the initial goal; then for a free byte somewhere in the bitmap; then
-+ * for any free bit in the bitmap.
-+ */
-+static int find_next_usable_block(int start,
-+                      struct buffer_head *bh, int maxblocks)
-+{
-+      int here, next;
-+      char *p, *r;
-+      
-+      if (start > 0) {
-+              /*
-+               * The goal was occupied; search forward for a free 
-+               * block within the next XX blocks.
-+               *
-+               * end_goal is more or less random, but it has to be
-+               * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
-+               * next 64-bit boundary is simple..
-+               */
-+              int end_goal = (start + 63) & ~63;
-+              here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
-+              if (here < end_goal && ext3_test_allocatable(here, bh))
-+                      return here;
-+              
-+              ext3_debug ("Bit not found near goal\n");
-+              
-+      }
-+      
-+      here = start;
-+      if (here < 0)
-+              here = 0;
-+      
-+      /*
-+       * There has been no free block found in the near vicinity of
-+       * the goal: do a search forward through the block groups,
-+       * searching in each group first for an entire free byte in the
-+       * bitmap and then for any free bit.
-+       * 
-+       * Search first in the remainder of the current group 
-+       */
-+      p = ((char *) bh->b_data) + (here >> 3);
-+      r = memscan(p, 0, (maxblocks - here + 7) >> 3);
-+      next = (r - ((char *) bh->b_data)) << 3;
-+      
-+      if (next < maxblocks && ext3_test_allocatable(next, bh))
-+              return next;
-+      
-+      /* The bitmap search --- search forward alternately
-+       * through the actual bitmap and the last-committed copy
-+       * until we find a bit free in both. */
-+
-+      while (here < maxblocks) {
-+              next  = ext3_find_next_zero_bit ((unsigned long *) bh->b_data, 
-+                                               maxblocks, here);
-+              if (next >= maxblocks)
-+                      return -1;
-+              if (ext3_test_allocatable(next, bh))
-+                      return next;
-+
-+              J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
-+              here = ext3_find_next_zero_bit
-+                      ((unsigned long *) bh2jh(bh)->b_committed_data, 
-+                       maxblocks, next);
-+      }
-+      return -1;
-+}
-+
-+/*
-+ * ext3_new_block uses a goal block to assist allocation.  If the goal is
-+ * free, or there is a free block within 32 blocks of the goal, that block
-+ * is allocated.  Otherwise a forward search is made for a free block; within 
-+ * each block group the search first looks for an entire free byte in the block
-+ * bitmap, and then for any free bit if that fails.
-+ * This function also updates quota and i_blocks field.
-+ */
-+int ext3_new_block (handle_t *handle, struct inode * inode,
-+              unsigned long goal, u32 * prealloc_count,
-+              u32 * prealloc_block, int * errp)
-+{
-+      struct buffer_head * bh, *bhtmp;
-+      struct buffer_head * bh2;
-+#if 0
-+      char * p, * r;
-+#endif
-+      int i, j, k, tmp, alloctmp;
-+      int bitmap_nr;
-+      int fatal = 0, err;
-+      int performed_allocation = 0;
-+      struct super_block * sb;
-+      struct ext3_group_desc * gdp;
-+      struct ext3_super_block * es;
-+#ifdef EXT3FS_DEBUG
-+      static int goal_hits = 0, goal_attempts = 0;
-+#endif
-+      *errp = -ENOSPC;
-+      sb = inode->i_sb;
-+      if (!sb) {
-+              printk ("ext3_new_block: nonexistent device");
-+              return 0;
-+      }
-+
-+      /*
-+       * Check quota for allocation of this block.
-+       */
-+      if (DQUOT_ALLOC_BLOCK(inode, 1)) {
-+              *errp = -EDQUOT;
-+              return 0;
-+      }
-+
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+      if (le32_to_cpu(es->s_free_blocks_count) <=
-+                      le32_to_cpu(es->s_r_blocks_count) &&
-+          ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
-+           (sb->u.ext3_sb.s_resgid == 0 ||
-+            !in_group_p (sb->u.ext3_sb.s_resgid)) && 
-+           !capable(CAP_SYS_RESOURCE)))
-+              goto out;
-+
-+      ext3_debug ("goal=%lu.\n", goal);
-+
-+      /*
-+       * First, test whether the goal block is free.
-+       */
-+      if (goal < le32_to_cpu(es->s_first_data_block) ||
-+          goal >= le32_to_cpu(es->s_blocks_count))
-+              goal = le32_to_cpu(es->s_first_data_block);
-+      i = (goal - le32_to_cpu(es->s_first_data_block)) /
-+                      EXT3_BLOCKS_PER_GROUP(sb);
-+      gdp = ext3_get_group_desc (sb, i, &bh2);
-+      if (!gdp)
-+              goto io_error;
-+
-+      if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
-+              j = ((goal - le32_to_cpu(es->s_first_data_block)) %
-+                              EXT3_BLOCKS_PER_GROUP(sb));
-+#ifdef EXT3FS_DEBUG
-+              if (j)
-+                      goal_attempts++;
-+#endif
-+              bitmap_nr = load_block_bitmap (sb, i);
-+              if (bitmap_nr < 0)
-+                      goto io_error;
-+              
-+              bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
-+
-+              ext3_debug ("goal is at %d:%d.\n", i, j);
-+
-+              if (ext3_test_allocatable(j, bh)) {
-+#ifdef EXT3FS_DEBUG
-+                      goal_hits++;
-+                      ext3_debug ("goal bit allocated.\n");
-+#endif
-+                      goto got_block;
-+              }
-+
-+              j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
-+              if (j >= 0)
-+                      goto search_back;
-+      }
-+
-+      ext3_debug ("Bit not found in block group %d.\n", i);
-+
-+      /*
-+       * Now search the rest of the groups.  We assume that 
-+       * i and gdp correctly point to the last group visited.
-+       */
-+      for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
-+              i++;
-+              if (i >= sb->u.ext3_sb.s_groups_count)
-+                      i = 0;
-+              gdp = ext3_get_group_desc (sb, i, &bh2);
-+              if (!gdp) {
-+                      *errp = -EIO;
-+                      goto out;
-+              }
-+              if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
-+                      bitmap_nr = load_block_bitmap (sb, i);
-+                      if (bitmap_nr < 0)
-+                              goto io_error;
-+      
-+                      bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
-+                      j = find_next_usable_block(-1, bh, 
-+                                                 EXT3_BLOCKS_PER_GROUP(sb));
-+                      if (j >= 0) 
-+                              goto search_back;
-+              }
-+      }
-+
-+      /* No space left on the device */
-+      goto out;
-+
-+search_back:
-+      /* 
-+       * We have succeeded in finding a free byte in the block
-+       * bitmap.  Now search backwards up to 7 bits to find the
-+       * start of this group of free blocks.
-+       */
-+      for (   k = 0;
-+              k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
-+              k++, j--)
-+              ;
-+      
-+got_block:
-+
-+      ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
-+
-+      /* Make sure we use undo access for the bitmap, because it is
-+           critical that we do the frozen_data COW on bitmap buffers in
-+           all cases even if the buffer is in BJ_Forget state in the
-+           committing transaction.  */
-+      BUFFER_TRACE(bh, "get undo access for marking new block");
-+      fatal = ext3_journal_get_undo_access(handle, bh);
-+      if (fatal) goto out;
-+      
-+      BUFFER_TRACE(bh2, "get_write_access");
-+      fatal = ext3_journal_get_write_access(handle, bh2);
-+      if (fatal) goto out;
-+
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
-+      fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-+      if (fatal) goto out;
-+
-+      tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
-+                              + le32_to_cpu(es->s_first_data_block);
-+
-+      if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
-+          tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
-+          in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
-+                    sb->u.ext3_sb.s_itb_per_group))
-+              ext3_error (sb, "ext3_new_block",
-+                          "Allocating block in system zone - "
-+                          "block = %u", tmp);
-+
-+      /* The superblock lock should guard against anybody else beating
-+       * us to this point! */
-+      J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
-+      BUFFER_TRACE(bh, "setting bitmap bit");
-+      ext3_set_bit(j, bh->b_data);
-+      performed_allocation = 1;
-+
-+#ifdef CONFIG_JBD_DEBUG
-+      {
-+              struct buffer_head *debug_bh;
-+
-+              /* Record bitmap buffer state in the newly allocated block */
-+              debug_bh = sb_get_hash_table(sb, tmp);
-+              if (debug_bh) {
-+                      BUFFER_TRACE(debug_bh, "state when allocated");
-+                      BUFFER_TRACE2(debug_bh, bh, "bitmap state");
-+                      brelse(debug_bh);
-+              }
-+      }
-+#endif
-+      if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
-+              J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
-+      bhtmp = bh;
-+      alloctmp = j;
-+
-+      ext3_debug ("found bit %d\n", j);
-+
-+      /*
-+       * Do block preallocation now if required.
-+       */
-+#ifdef EXT3_PREALLOCATE
-+      /*
-+       * akpm: this is not enabled for ext3.  Need to use
-+       * ext3_test_allocatable()
-+       */
-+      /* Writer: ->i_prealloc* */
-+      if (prealloc_count && !*prealloc_count) {
-+              int     prealloc_goal;
-+              unsigned long next_block = tmp + 1;
-+
-+              prealloc_goal = es->s_prealloc_blocks ?
-+                      es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
-+
-+              *prealloc_block = next_block;
-+              /* Writer: end */
-+              for (k = 1;
-+                   k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
-+                   k++, next_block++) {
-+                      if (DQUOT_PREALLOC_BLOCK(inode, 1))
-+                              break;
-+                      /* Writer: ->i_prealloc* */
-+                      if (*prealloc_block + *prealloc_count != next_block ||
-+                          ext3_set_bit (j + k, bh->b_data)) {
-+                              /* Writer: end */
-+                              DQUOT_FREE_BLOCK(inode, 1);
-+                              break;
-+                      }
-+                      (*prealloc_count)++;
-+                      /* Writer: end */
-+              }       
-+              /*
-+               * As soon as we go for per-group spinlocks we'll need these
-+               * done inside the loop above.
-+               */
-+              gdp->bg_free_blocks_count =
-+                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
-+                             (k - 1));
-+              es->s_free_blocks_count =
-+                      cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
-+                             (k - 1));
-+              ext3_debug ("Preallocated a further %lu bits.\n",
-+                             (k - 1));
-+      }
-+#endif
-+
-+      j = tmp;
-+
-+      BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
-+      err = ext3_journal_dirty_metadata(handle, bh);
-+      if (!fatal) fatal = err;
-+      
-+      if (j >= le32_to_cpu(es->s_blocks_count)) {
-+              ext3_error (sb, "ext3_new_block",
-+                          "block(%d) >= blocks count(%d) - "
-+                          "block_group = %d, es == %p ",j,
-+                      le32_to_cpu(es->s_blocks_count), i, es);
-+              goto out;
-+      }
-+
-+      /*
-+       * It is up to the caller to add the new buffer to a journal
-+       * list of some description.  We don't know in advance whether
-+       * the caller wants to use it as metadata or data.
-+       */
-+
-+      ext3_debug ("allocating block %d. "
-+                  "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
-+
-+      gdp->bg_free_blocks_count =
-+                      cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
-+      es->s_free_blocks_count =
-+                      cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
-+
-+      BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
-+      err = ext3_journal_dirty_metadata(handle, bh2);
-+      if (!fatal) fatal = err;
-+      
-+      BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
-+      err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-+      if (!fatal) fatal = err;
-+
-+      sb->s_dirt = 1;
-+      if (fatal)
-+              goto out;
-+
-+      unlock_super (sb);
-+      *errp = 0;
-+      return j;
-+      
-+io_error:
-+      *errp = -EIO;
-+out:
-+      if (fatal) {
-+              *errp = fatal;
-+              ext3_std_error(sb, fatal);
-+      }
-+      unlock_super (sb);
-+      /*
-+       * Undo the block allocation
-+       */
-+      if (!performed_allocation)
-+              DQUOT_FREE_BLOCK(inode, 1);
-+      return 0;
-+      
-+}
-+
-+unsigned long ext3_count_free_blocks (struct super_block * sb)
-+{
-+#ifdef EXT3FS_DEBUG
-+      struct ext3_super_block * es;
-+      unsigned long desc_count, bitmap_count, x;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      int i;
-+      
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+      desc_count = 0;
-+      bitmap_count = 0;
-+      gdp = NULL;
-+      for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
-+              gdp = ext3_get_group_desc (sb, i, NULL);
-+              if (!gdp)
-+                      continue;
-+              desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
-+              bitmap_nr = load_block_bitmap (sb, i);
-+              if (bitmap_nr < 0)
-+                      continue;
-+              
-+              x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
-+                                   sb->s_blocksize);
-+              printk ("group %d: stored = %d, counted = %lu\n",
-+                      i, le16_to_cpu(gdp->bg_free_blocks_count), x);
-+              bitmap_count += x;
-+      }
-+      printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
-+             le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
-+      unlock_super (sb);
-+      return bitmap_count;
-+#else
-+      return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
-+#endif
-+}
-+
-+static inline int block_in_use (unsigned long block,
-+                              struct super_block * sb,
-+                              unsigned char * map)
-+{
-+      return ext3_test_bit ((block -
-+              le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
-+                       EXT3_BLOCKS_PER_GROUP(sb), map);
-+}
-+
-+static inline int test_root(int a, int b)
-+{
-+      if (a == 0)
-+              return 1;
-+      while (1) {
-+              if (a == 1)
-+                      return 1;
-+              if (a % b)
-+                      return 0;
-+              a = a / b;
-+      }
-+}
-+
-+int ext3_group_sparse(int group)
-+{
-+      return (test_root(group, 3) || test_root(group, 5) ||
-+              test_root(group, 7));
-+}
-+
-+/**
-+ *    ext3_bg_has_super - number of blocks used by the superblock in group
-+ *    @sb: superblock for filesystem
-+ *    @group: group number to check
-+ *
-+ *    Return the number of blocks used by the superblock (primary or backup)
-+ *    in this group.  Currently this will be only 0 or 1.
-+ */
-+int ext3_bg_has_super(struct super_block *sb, int group)
-+{
-+      if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-+          !ext3_group_sparse(group))
-+              return 0;
-+      return 1;
-+}
-+
-+/**
-+ *    ext3_bg_num_gdb - number of blocks used by the group table in group
-+ *    @sb: superblock for filesystem
-+ *    @group: group number to check
-+ *
-+ *    Return the number of blocks used by the group descriptor table
-+ *    (primary or backup) in this group.  In the future there may be a
-+ *    different number of descriptor blocks in each group.
-+ */
-+unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
-+{
-+      if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
-+          !ext3_group_sparse(group))
-+              return 0;
-+      return EXT3_SB(sb)->s_gdb_count;
-+}
-+
-+#ifdef CONFIG_EXT3_CHECK
-+/* Called at mount-time, super-block is locked */
-+void ext3_check_blocks_bitmap (struct super_block * sb)
-+{
-+      struct buffer_head * bh;
-+      struct ext3_super_block * es;
-+      unsigned long desc_count, bitmap_count, x, j;
-+      unsigned long desc_blocks;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      int i;
-+
-+      es = sb->u.ext3_sb.s_es;
-+      desc_count = 0;
-+      bitmap_count = 0;
-+      gdp = NULL;
-+      for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
-+              gdp = ext3_get_group_desc (sb, i, NULL);
-+              if (!gdp)
-+                      continue;
-+              desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
-+              bitmap_nr = load_block_bitmap (sb, i);
-+              if (bitmap_nr < 0)
-+                      continue;
-+
-+              bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
-+
-+              if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
-+                      ext3_error(sb, __FUNCTION__,
-+                                 "Superblock in group %d is marked free", i);
-+
-+              desc_blocks = ext3_bg_num_gdb(sb, i);
-+              for (j = 0; j < desc_blocks; j++)
-+                      if (!ext3_test_bit(j + 1, bh->b_data))
-+                              ext3_error(sb, __FUNCTION__,
-+                                         "Descriptor block #%ld in group "
-+                                         "%d is marked free", j, i);
-+
-+              if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
-+                                              sb, bh->b_data))
-+                      ext3_error (sb, "ext3_check_blocks_bitmap",
-+                                  "Block bitmap for group %d is marked free",
-+                                  i);
-+
-+              if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
-+                                              sb, bh->b_data))
-+                      ext3_error (sb, "ext3_check_blocks_bitmap",
-+                                  "Inode bitmap for group %d is marked free",
-+                                  i);
-+
-+              for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
-+                      if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
-+                                                      sb, bh->b_data))
-+                              ext3_error (sb, "ext3_check_blocks_bitmap",
-+                                          "Block #%d of the inode table in "
-+                                          "group %d is marked free", j, i);
-+
-+              x = ext3_count_free (bh, sb->s_blocksize);
-+              if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
-+                      ext3_error (sb, "ext3_check_blocks_bitmap",
-+                                  "Wrong free blocks count for group %d, "
-+                                  "stored = %d, counted = %lu", i,
-+                                  le16_to_cpu(gdp->bg_free_blocks_count), x);
-+              bitmap_count += x;
-+      }
-+      if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
-+              ext3_error (sb, "ext3_check_blocks_bitmap",
-+                      "Wrong free blocks count in super block, "
-+                      "stored = %lu, counted = %lu",
-+                      (unsigned long)le32_to_cpu(es->s_free_blocks_count),
-+                      bitmap_count);
-+}
-+#endif
-diff -rup --new-file linux.mcp2/fs/ext3/bitmap.c linux_tmp/fs/ext3/bitmap.c
---- linux.mcp2/fs/ext3/bitmap.c        1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/bitmap.c 2001-11-09 14:25:04.000000000 -0800
-@@ -0,0 +1,26 @@
-+/*
-+ *  linux/fs/ext3/bitmap.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ */
-+
-+#include <linux/fs.h>
-+
-+
-+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-+
-+unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
-+{
-+      unsigned int i;
-+      unsigned long sum = 0;
-+      
-+      if (!map) 
-+              return (0);
-+      for (i = 0; i < numchars; i++)
-+              sum += nibblemap[map->b_data[i] & 0xf] +
-+                      nibblemap[(map->b_data[i] >> 4) & 0xf];
-+      return (sum);
-+}
-diff -rup --new-file linux.mcp2/fs/ext3/dir.c linux_tmp/fs/ext3/dir.c
---- linux.mcp2/fs/ext3/dir.c   1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/dir.c    2001-11-09 14:25:04.000000000 -0800
-@@ -0,0 +1,190 @@
-+/*
-+ *  linux/fs/ext3/dir.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/dir.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  ext3 directory handling functions
-+ *
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+
-+static unsigned char ext3_filetype_table[] = {
-+      DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-+};
-+
-+static int ext3_readdir(struct file *, void *, filldir_t);
-+
-+struct file_operations ext3_dir_operations = {
-+      read:           generic_read_dir,
-+      readdir:        ext3_readdir,           /* BKL held */
-+      ioctl:          ext3_ioctl,             /* BKL held */
-+      fsync:          ext3_sync_file,         /* BKL held */
-+};
-+
-+int ext3_check_dir_entry (const char * function, struct inode * dir,
-+                        struct ext3_dir_entry_2 * de,
-+                        struct buffer_head * bh,
-+                        unsigned long offset)
-+{
-+      const char * error_msg = NULL;
-+      const int rlen = le16_to_cpu(de->rec_len);
-+
-+      if (rlen < EXT3_DIR_REC_LEN(1))
-+              error_msg = "rec_len is smaller than minimal";
-+      else if (rlen % 4 != 0)
-+              error_msg = "rec_len % 4 != 0";
-+      else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
-+              error_msg = "rec_len is too small for name_len";
-+      else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
-+              error_msg = "directory entry across blocks";
-+      else if (le32_to_cpu(de->inode) >
-+                      le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
-+              error_msg = "inode out of bounds";
-+
-+      if (error_msg != NULL)
-+              ext3_error (dir->i_sb, function,
-+                      "bad entry in directory #%lu: %s - "
-+                      "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
-+                      dir->i_ino, error_msg, offset,
-+                      (unsigned long) le32_to_cpu(de->inode),
-+                      rlen, de->name_len);
-+      return error_msg == NULL ? 1 : 0;
-+}
-+
-+static int ext3_readdir(struct file * filp,
-+                       void * dirent, filldir_t filldir)
-+{
-+      int error = 0;
-+      unsigned long offset, blk;
-+      int i, num, stored;
-+      struct buffer_head * bh, * tmp, * bha[16];
-+      struct ext3_dir_entry_2 * de;
-+      struct super_block * sb;
-+      int err;
-+      struct inode *inode = filp->f_dentry->d_inode;
-+
-+      sb = inode->i_sb;
-+
-+      stored = 0;
-+      bh = NULL;
-+      offset = filp->f_pos & (sb->s_blocksize - 1);
-+
-+      while (!error && !stored && filp->f_pos < inode->i_size) {
-+              blk = (filp->f_pos) >> EXT3_BLOCK_SIZE_BITS(sb);
-+              bh = ext3_bread (0, inode, blk, 0, &err);
-+              if (!bh) {
-+                      ext3_error (sb, "ext3_readdir",
-+                              "directory #%lu contains a hole at offset %lu",
-+                              inode->i_ino, (unsigned long)filp->f_pos);
-+                      filp->f_pos += sb->s_blocksize - offset;
-+                      continue;
-+              }
-+
-+              /*
-+               * Do the readahead
-+               */
-+              if (!offset) {
-+                      for (i = 16 >> (EXT3_BLOCK_SIZE_BITS(sb) - 9), num = 0;
-+                           i > 0; i--) {
-+                              tmp = ext3_getblk (NULL, inode, ++blk, 0, &err);
-+                              if (tmp && !buffer_uptodate(tmp) &&
-+                                              !buffer_locked(tmp))
-+                                      bha[num++] = tmp;
-+                              else
-+                                      brelse (tmp);
-+                      }
-+                      if (num) {
-+                              ll_rw_block (READA, num, bha);
-+                              for (i = 0; i < num; i++)
-+                                      brelse (bha[i]);
-+                      }
-+              }
-+              
-+revalidate:
-+              /* If the dir block has changed since the last call to
-+               * readdir(2), then we might be pointing to an invalid
-+               * dirent right now.  Scan from the start of the block
-+               * to make sure. */
-+              if (filp->f_version != inode->i_version) {
-+                      for (i = 0; i < sb->s_blocksize && i < offset; ) {
-+                              de = (struct ext3_dir_entry_2 *) 
-+                                      (bh->b_data + i);
-+                              /* It's too expensive to do a full
-+                               * dirent test each time round this
-+                               * loop, but we do have to test at
-+                               * least that it is non-zero.  A
-+                               * failure will be detected in the
-+                               * dirent test below. */
-+                              if (le16_to_cpu(de->rec_len) <
-+                                              EXT3_DIR_REC_LEN(1))
-+                                      break;
-+                              i += le16_to_cpu(de->rec_len);
-+                      }
-+                      offset = i;
-+                      filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
-+                              | offset;
-+                      filp->f_version = inode->i_version;
-+              }
-+              
-+              while (!error && filp->f_pos < inode->i_size 
-+                     && offset < sb->s_blocksize) {
-+                      de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
-+                      if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
-+                                                 bh, offset)) {
-+                              /* On error, skip the f_pos to the
-+                                   next block. */
-+                              filp->f_pos = (filp->f_pos |
-+                                              (sb->s_blocksize - 1)) + 1;
-+                              brelse (bh);
-+                              return stored;
-+                      }
-+                      offset += le16_to_cpu(de->rec_len);
-+                      if (le32_to_cpu(de->inode)) {
-+                              /* We might block in the next section
-+                               * if the data destination is
-+                               * currently swapped out.  So, use a
-+                               * version stamp to detect whether or
-+                               * not the directory has been modified
-+                               * during the copy operation.
-+                               */
-+                              unsigned long version = filp->f_version;
-+                              unsigned char d_type = DT_UNKNOWN;
-+
-+                              if (EXT3_HAS_INCOMPAT_FEATURE(sb,
-+                                              EXT3_FEATURE_INCOMPAT_FILETYPE)
-+                                              && de->file_type < EXT3_FT_MAX)
-+                                      d_type =
-+                                        ext3_filetype_table[de->file_type];
-+                              error = filldir(dirent, de->name,
-+                                              de->name_len,
-+                                              filp->f_pos,
-+                                              le32_to_cpu(de->inode),
-+                                              d_type);
-+                              if (error)
-+                                      break;
-+                              if (version != filp->f_version)
-+                                      goto revalidate;
-+                              stored ++;
-+                      }
-+                      filp->f_pos += le16_to_cpu(de->rec_len);
-+              }
-+              offset = 0;
-+              brelse (bh);
-+      }
-+      UPDATE_ATIME(inode);
-+      return 0;
-+}
-diff -rup --new-file linux.mcp2/fs/ext3/file.c linux_tmp/fs/ext3/file.c
---- linux.mcp2/fs/ext3/file.c  1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/file.c   2001-11-15 13:37:55.000000000 -0800
-@@ -0,0 +1,94 @@
-+/*
-+ *  linux/fs/ext3/file.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/file.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  ext3 fs regular file handling primitives
-+ *
-+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
-+ *    (jj@sunsite.ms.mff.cuni.cz)
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/locks.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/smp_lock.h>
-+
-+/*
-+ * Called when an inode is released. Note that this is different
-+ * from ext3_file_open: open gets called at every open, but release
-+ * gets called only when /all/ the files are closed.
-+ */
-+static int ext3_release_file (struct inode * inode, struct file * filp)
-+{
-+      if (filp->f_mode & FMODE_WRITE)
-+              ext3_discard_prealloc (inode);
-+      return 0;
-+}
-+
-+/*
-+ * Called when an inode is about to be opened.
-+ * We use this to disallow opening RW large files on 32bit systems if
-+ * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
-+ * on this flag in sys_open.
-+ */
-+static int ext3_open_file (struct inode * inode, struct file * filp)
-+{
-+      if (!(filp->f_flags & O_LARGEFILE) &&
-+          inode->i_size > 0x7FFFFFFFLL)
-+              return -EFBIG;
-+      return 0;
-+}
-+
-+/*
-+ * ext3_file_write().
-+ *
-+ * Most things are done in ext3_prepare_write() and ext3_commit_write().
-+ */
-+
-+static ssize_t
-+ext3_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
-+{
-+      struct inode *inode = file->f_dentry->d_inode;
-+
-+      /*
-+       * Nasty: if the file is subject to synchronous writes then we need
-+       * to force generic_osync_inode() to call ext3_write_inode().
-+       * We do that by marking the inode dirty.  This adds much more
-+       * computational expense than we need, but we're going to sync
-+       * anyway.
-+       */
-+      if (IS_SYNC(inode) || (file->f_flags & O_SYNC))
-+              mark_inode_dirty(inode);
-+
-+      return generic_file_write(file, buf, count, ppos);
-+}
-+
-+struct file_operations ext3_file_operations = {
-+      llseek:         generic_file_llseek,    /* BKL held */
-+      read:           generic_file_read,      /* BKL not held.  Don't need */
-+      write:          ext3_file_write,        /* BKL not held.  Don't need */
-+      ioctl:          ext3_ioctl,             /* BKL held */
-+      mmap:           generic_file_mmap,
-+      open:           ext3_open_file,         /* BKL not held.  Don't need */
-+      release:        ext3_release_file,      /* BKL not held.  Don't need */
-+      fsync:          ext3_sync_file,         /* BKL held */
-+};
-+
-+struct inode_operations ext3_file_inode_operations = {
-+      truncate:       ext3_truncate,          /* BKL held */
-+      setattr:        ext3_setattr,           /* BKL held */
-+};
-+
-diff -rup --new-file linux.mcp2/fs/ext3/fsync.c linux_tmp/fs/ext3/fsync.c
---- linux.mcp2/fs/ext3/fsync.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/fsync.c  2001-11-20 21:34:13.000000000 -0800
-@@ -0,0 +1,70 @@
-+/*
-+ *  linux/fs/ext3/fsync.c
-+ *
-+ *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
-+ *  from
-+ *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
-+ *                      Laboratoire MASI - Institut Blaise Pascal
-+ *                      Universite Pierre et Marie Curie (Paris VI)
-+ *  from
-+ *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
-+ * 
-+ *  ext3fs fsync primitive
-+ *
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ * 
-+ *  Removed unnecessary code duplication for little endian machines
-+ *  and excessive __inline__s. 
-+ *        Andi Kleen, 1997
-+ *
-+ * Major simplications and cleanup - we only need to do the metadata, because
-+ * we can depend on generic_block_fdatasync() to sync the data blocks.
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/jbd.h>
-+#include <linux/smp_lock.h>
-+
-+/*
-+ * akpm: A new design for ext3_sync_file().
-+ *
-+ * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
-+ * There cannot be a transaction open by this task. (AKPM: quotas?)
-+ * Another task could have dirtied this inode.  Its data can be in any
-+ * state in the journalling system.
-+ *
-+ * What we do is just kick off a commit and wait on it.  This will snapshot the
-+ * inode to disk.
-+ *
-+ * Note that there is a serious optimisation we can make here: if the current
-+ * inode is not part of j_running_transaction or j_committing_transaction
-+ * then we have nothing to do.  That would require implementation of t_ilist,
-+ * which isn't too hard.
-+ */
-+
-+int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
-+{
-+      struct inode *inode = dentry->d_inode;
-+      int ret;
-+
-+      J_ASSERT(ext3_journal_current_handle() == 0);
-+
-+      /*
-+       * fsync_inode_buffers() just walks i_dirty_buffers and waits
-+       * on them.  It's a no-op for full data journalling because
-+       * i_dirty_buffers will be ampty.
-+       * Really, we only need to start I/O on the dirty buffers -
-+       * we'll end up waiting on them in commit.
-+       */
-+      ret = fsync_inode_buffers(inode);
-+      ret |= fsync_inode_data_buffers(inode);
-+
-+      ext3_force_commit(inode->i_sb);
-+
-+      return ret;
-+}
-diff -rup --new-file linux.mcp2/fs/ext3/ialloc.c linux_tmp/fs/ext3/ialloc.c
---- linux.mcp2/fs/ext3/ialloc.c        1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/ialloc.c 2002-02-25 11:38:08.000000000 -0800
-@@ -0,0 +1,663 @@
-+/*
-+ *  linux/fs/ext3/ialloc.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  BSD ufs-inspired inode and directory allocation by
-+ *  Stephen Tweedie (sct@redhat.com), 1993
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/stat.h>
-+#include <linux/string.h>
-+#include <linux/locks.h>
-+#include <linux/quotaops.h>
-+
-+#include <asm/bitops.h>
-+#include <asm/byteorder.h>
-+
-+/*
-+ * ialloc.c contains the inodes allocation and deallocation routines
-+ */
-+
-+/*
-+ * The free inodes are managed by bitmaps.  A file system contains several
-+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
-+ * block for inodes, N blocks for the inode table and data blocks.
-+ *
-+ * The file system contains group descriptors which are located after the
-+ * super block.  Each descriptor contains the number of the bitmap block and
-+ * the free blocks count in the block.  The descriptors are loaded in memory
-+ * when a file system is mounted (see ext3_read_super).
-+ */
-+
-+
-+/*
-+ * Read the inode allocation bitmap for a given block_group, reading
-+ * into the specified slot in the superblock's bitmap cache.
-+ *
-+ * Return >=0 on success or a -ve error code.
-+ */
-+static int read_inode_bitmap (struct super_block * sb,
-+                             unsigned long block_group,
-+                             unsigned int bitmap_nr)
-+{
-+      struct ext3_group_desc * gdp;
-+      struct buffer_head * bh = NULL;
-+      int retval = 0;
-+
-+      gdp = ext3_get_group_desc (sb, block_group, NULL);
-+      if (!gdp) {
-+              retval = -EIO;
-+              goto error_out;
-+      }
-+      bh = sb_bread(sb, le32_to_cpu(gdp->bg_inode_bitmap));
-+      if (!bh) {
-+              ext3_error (sb, "read_inode_bitmap",
-+                          "Cannot read inode bitmap - "
-+                          "block_group = %lu, inode_bitmap = %lu",
-+                          block_group, (unsigned long) gdp->bg_inode_bitmap);
-+              retval = -EIO;
-+      }
-+      /*
-+       * On IO error, just leave a zero in the superblock's block pointer for
-+       * this group.  The IO will be retried next time.
-+       */
-+error_out:
-+      sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
-+      sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
-+      return retval;
-+}
-+
-+/*
-+ * load_inode_bitmap loads the inode bitmap for a blocks group
-+ *
-+ * It maintains a cache for the last bitmaps loaded.  This cache is managed
-+ * with a LRU algorithm.
-+ *
-+ * Notes:
-+ * 1/ There is one cache per mounted file system.
-+ * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
-+ *    this function reads the bitmap without maintaining a LRU cache.
-+ *
-+ * Return the slot used to store the bitmap, or a -ve error code.
-+ */
-+static int load_inode_bitmap (struct super_block * sb,
-+                            unsigned int block_group)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      unsigned long inode_bitmap_number;
-+      struct buffer_head * inode_bitmap;
-+      int i, j, retval = 0;
-+
-+      if (block_group >= sbi->s_groups_count)
-+              ext3_panic (sb, "load_inode_bitmap",
-+                          "block_group >= groups_count - "
-+                          "block_group = %d, groups_count = %lu",
-+                          block_group, sbi->s_groups_count);
-+      if (sbi->s_loaded_inode_bitmaps > 0 &&
-+          sbi->s_inode_bitmap_number[0] == block_group &&
-+          sbi->s_inode_bitmap[0] != NULL)
-+              return 0;
-+      if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) {
-+              if (sbi->s_inode_bitmap[block_group]) {
-+                      if (sbi->s_inode_bitmap_number[block_group] !=
-+                                              block_group)
-+                              ext3_panic(sb, "load_inode_bitmap",
-+                                      "block_group != inode_bitmap_number");
-+                      return block_group;
-+              }
-+              retval = read_inode_bitmap(sb, block_group, block_group);
-+              if (retval < 0)
-+                      return retval;
-+              return block_group;
-+      }
-+
-+      for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
-+                  sbi->s_inode_bitmap_number[i] != block_group; i++)
-+              /* do nothing */;
-+      if (i < sbi->s_loaded_inode_bitmaps &&
-+          sbi->s_inode_bitmap_number[i] == block_group) {
-+              inode_bitmap_number = sbi->s_inode_bitmap_number[i];
-+              inode_bitmap = sbi->s_inode_bitmap[i];
-+              for (j = i; j > 0; j--) {
-+                      sbi->s_inode_bitmap_number[j] =
-+                              sbi->s_inode_bitmap_number[j - 1];
-+                      sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
-+              }
-+              sbi->s_inode_bitmap_number[0] = inode_bitmap_number;
-+              sbi->s_inode_bitmap[0] = inode_bitmap;
-+
-+              /*
-+               * There's still one special case here --- if inode_bitmap == 0
-+               * then our last attempt to read the bitmap failed and we have
-+               * just ended up caching that failure.  Try again to read it.
-+               */
-+              if (!inode_bitmap)
-+                      retval = read_inode_bitmap (sb, block_group, 0);
-+      } else {
-+              if (sbi->s_loaded_inode_bitmaps < EXT3_MAX_GROUP_LOADED)
-+                      sbi->s_loaded_inode_bitmaps++;
-+              else
-+                      brelse(sbi->s_inode_bitmap[EXT3_MAX_GROUP_LOADED - 1]);
-+              for (j = sbi->s_loaded_inode_bitmaps - 1; j > 0; j--) {
-+                      sbi->s_inode_bitmap_number[j] =
-+                              sbi->s_inode_bitmap_number[j - 1];
-+                      sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
-+              }
-+              retval = read_inode_bitmap (sb, block_group, 0);
-+      }
-+      return retval;
-+}
-+
-+/*
-+ * NOTE! When we get the inode, we're the only people
-+ * that have access to it, and as such there are no
-+ * race conditions we have to worry about. The inode
-+ * is not on the hash-lists, and it cannot be reached
-+ * through the filesystem because the directory entry
-+ * has been deleted earlier.
-+ *
-+ * HOWEVER: we must make sure that we get no aliases,
-+ * which means that we have to call "clear_inode()"
-+ * _before_ we mark the inode not in use in the inode
-+ * bitmaps. Otherwise a newly created file might use
-+ * the same inode number (not actually the same pointer
-+ * though), and then we'd have two inodes sharing the
-+ * same inode number and space on the harddisk.
-+ */
-+void ext3_free_inode (handle_t *handle, struct inode * inode)
-+{
-+      struct super_block * sb = inode->i_sb;
-+      int is_directory;
-+      unsigned long ino;
-+      struct buffer_head * bh;
-+      struct buffer_head * bh2;
-+      unsigned long block_group;
-+      unsigned long bit;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      struct ext3_super_block * es;
-+      int fatal = 0, err;
-+
-+      if (!inode->i_dev) {
-+              printk ("ext3_free_inode: inode has no device\n");
-+              return;
-+      }
-+      if (atomic_read(&inode->i_count) > 1) {
-+              printk ("ext3_free_inode: inode has count=%d\n",
-+                                      atomic_read(&inode->i_count));
-+              return;
-+      }
-+      if (inode->i_nlink) {
-+              printk ("ext3_free_inode: inode has nlink=%d\n",
-+                      inode->i_nlink);
-+              return;
-+      }
-+      if (!sb) {
-+              printk("ext3_free_inode: inode on nonexistent device\n");
-+              return;
-+      }
-+
-+      ino = inode->i_ino;
-+      ext3_debug ("freeing inode %lu\n", ino);
-+
-+      /*
-+       * Note: we must free any quota before locking the superblock,
-+       * as writing the quota to disk may need the lock as well.
-+       */
-+      DQUOT_INIT(inode);
-+      DQUOT_FREE_INODE(inode);
-+      DQUOT_DROP(inode);
-+
-+      is_directory = S_ISDIR(inode->i_mode);
-+
-+      /* Do this BEFORE marking the inode not in use or returning an error */
-+      clear_inode (inode);
-+
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+      if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-+              ext3_error (sb, "ext3_free_inode",
-+                          "reserved or nonexistent inode %lu", ino);
-+              goto error_return;
-+      }
-+      block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-+      bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
-+      bitmap_nr = load_inode_bitmap (sb, block_group);
-+      if (bitmap_nr < 0)
-+              goto error_return;
-+
-+      bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
-+
-+      BUFFER_TRACE(bh, "get_write_access");
-+      fatal = ext3_journal_get_write_access(handle, bh);
-+      if (fatal)
-+              goto error_return;
-+
-+      /* Ok, now we can actually update the inode bitmaps.. */
-+      if (!ext3_clear_bit (bit, bh->b_data))
-+              ext3_error (sb, "ext3_free_inode",
-+                            "bit already cleared for inode %lu", ino);
-+      else {
-+              gdp = ext3_get_group_desc (sb, block_group, &bh2);
-+
-+              BUFFER_TRACE(bh2, "get_write_access");
-+              fatal = ext3_journal_get_write_access(handle, bh2);
-+              if (fatal) goto error_return;
-+
-+              BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access");
-+              fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-+              if (fatal) goto error_return;
-+
-+              if (gdp) {
-+                      gdp->bg_free_inodes_count = cpu_to_le16(
-+                              le16_to_cpu(gdp->bg_free_inodes_count) + 1);
-+                      if (is_directory)
-+                              gdp->bg_used_dirs_count = cpu_to_le16(
-+                                le16_to_cpu(gdp->bg_used_dirs_count) - 1);
-+              }
-+              BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-+              err = ext3_journal_dirty_metadata(handle, bh2);
-+              if (!fatal) fatal = err;
-+              es->s_free_inodes_count =
-+                      cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
-+              BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
-+                                      "call ext3_journal_dirty_metadata");
-+              err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-+              if (!fatal) fatal = err;
-+      }
-+      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+      err = ext3_journal_dirty_metadata(handle, bh);
-+      if (!fatal)
-+              fatal = err;
-+      sb->s_dirt = 1;
-+error_return:
-+      ext3_std_error(sb, fatal);
-+      unlock_super(sb);
-+}
-+
-+/*
-+ * There are two policies for allocating an inode.  If the new inode is
-+ * a directory, then a forward search is made for a block group with both
-+ * free space and a low directory-to-inode ratio; if that fails, then of
-+ * the groups with above-average free space, that group with the fewest
-+ * directories already is chosen.
-+ *
-+ * For other inodes, search forward from the parent directory's block
-+ * group to find a free inode.
-+ */
-+struct inode * ext3_new_inode (handle_t *handle,
-+                              const struct inode * dir, int mode)
-+{
-+      struct super_block * sb;
-+      struct buffer_head * bh;
-+      struct buffer_head * bh2;
-+      int i, j, avefreei;
-+      struct inode * inode;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      struct ext3_group_desc * tmp;
-+      struct ext3_super_block * es;
-+      int err = 0;
-+
-+      /* Cannot create files in a deleted directory */
-+      if (!dir || !dir->i_nlink)
-+              return ERR_PTR(-EPERM);
-+
-+      sb = dir->i_sb;
-+      inode = new_inode(sb);
-+      if (!inode)
-+              return ERR_PTR(-ENOMEM);
-+      init_rwsem(&inode->u.ext3_i.truncate_sem);
-+
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+repeat:
-+      gdp = NULL;
-+      i = 0;
-+
-+      if (S_ISDIR(mode)) {
-+              avefreei = le32_to_cpu(es->s_free_inodes_count) /
-+                      sb->u.ext3_sb.s_groups_count;
-+              if (!gdp) {
-+                      for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
-+                              struct buffer_head *temp_buffer;
-+                              tmp = ext3_get_group_desc (sb, j, &temp_buffer);
-+                              if (tmp &&
-+                                  le16_to_cpu(tmp->bg_free_inodes_count) &&
-+                                  le16_to_cpu(tmp->bg_free_inodes_count) >=
-+                                                      avefreei) {
-+                                      if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
-+                                              le16_to_cpu(gdp->bg_free_blocks_count))) {
-+                                              i = j;
-+                                              gdp = tmp;
-+                                              bh2 = temp_buffer;
-+                                      }
-+                              }
-+                      }
-+              }
-+      } else {
-+              /*
-+               * Try to place the inode in its parent directory
-+               */
-+              i = dir->u.ext3_i.i_block_group;
-+              tmp = ext3_get_group_desc (sb, i, &bh2);
-+              if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
-+                      gdp = tmp;
-+              else
-+              {
-+                      /*
-+                       * Use a quadratic hash to find a group with a
-+                       * free inode
-+                       */
-+                      for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
-+                              i += j;
-+                              if (i >= sb->u.ext3_sb.s_groups_count)
-+                                      i -= sb->u.ext3_sb.s_groups_count;
-+                              tmp = ext3_get_group_desc (sb, i, &bh2);
-+                              if (tmp &&
-+                                  le16_to_cpu(tmp->bg_free_inodes_count)) {
-+                                      gdp = tmp;
-+                                      break;
-+                              }
-+                      }
-+              }
-+              if (!gdp) {
-+                      /*
-+                       * That failed: try linear search for a free inode
-+                       */
-+                      i = dir->u.ext3_i.i_block_group + 1;
-+                      for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
-+                              if (++i >= sb->u.ext3_sb.s_groups_count)
-+                                      i = 0;
-+                              tmp = ext3_get_group_desc (sb, i, &bh2);
-+                              if (tmp &&
-+                                  le16_to_cpu(tmp->bg_free_inodes_count)) {
-+                                      gdp = tmp;
-+                                      break;
-+                              }
-+                      }
-+              }
-+      }
-+
-+      err = -ENOSPC;
-+      if (!gdp)
-+              goto fail;
-+
-+      err = -EIO;
-+      bitmap_nr = load_inode_bitmap (sb, i);
-+      if (bitmap_nr < 0)
-+              goto fail;
-+
-+      bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
-+
-+      if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
-+                                    EXT3_INODES_PER_GROUP(sb))) <
-+          EXT3_INODES_PER_GROUP(sb)) {
-+              BUFFER_TRACE(bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, bh);
-+              if (err) goto fail;
-+              
-+              if (ext3_set_bit (j, bh->b_data)) {
-+                      ext3_error (sb, "ext3_new_inode",
-+                                    "bit already set for inode %d", j);
-+                      goto repeat;
-+              }
-+              BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+              err = ext3_journal_dirty_metadata(handle, bh);
-+              if (err) goto fail;
-+      } else {
-+              if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
-+                      ext3_error (sb, "ext3_new_inode",
-+                                  "Free inodes count corrupted in group %d",
-+                                  i);
-+                      /* Is it really ENOSPC? */
-+                      err = -ENOSPC;
-+                      if (sb->s_flags & MS_RDONLY)
-+                              goto fail;
-+
-+                      BUFFER_TRACE(bh2, "get_write_access");
-+                      err = ext3_journal_get_write_access(handle, bh2);
-+                      if (err) goto fail;
-+                      gdp->bg_free_inodes_count = 0;
-+                      BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-+                      err = ext3_journal_dirty_metadata(handle, bh2);
-+                      if (err) goto fail;
-+              }
-+              goto repeat;
-+      }
-+      j += i * EXT3_INODES_PER_GROUP(sb) + 1;
-+      if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
-+              ext3_error (sb, "ext3_new_inode",
-+                          "reserved inode or inode > inodes count - "
-+                          "block_group = %d,inode=%d", i, j);
-+              err = -EIO;
-+              goto fail;
-+      }
-+
-+      BUFFER_TRACE(bh2, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, bh2);
-+      if (err) goto fail;
-+      gdp->bg_free_inodes_count =
-+              cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
-+      if (S_ISDIR(mode))
-+              gdp->bg_used_dirs_count =
-+                      cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
-+      BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-+      err = ext3_journal_dirty_metadata(handle, bh2);
-+      if (err) goto fail;
-+      
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-+      if (err) goto fail;
-+      es->s_free_inodes_count =
-+              cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata");
-+      err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-+      sb->s_dirt = 1;
-+      if (err) goto fail;
-+
-+      inode->i_uid = current->fsuid;
-+      if (test_opt (sb, GRPID))
-+              inode->i_gid = dir->i_gid;
-+      else if (dir->i_mode & S_ISGID) {
-+              inode->i_gid = dir->i_gid;
-+              if (S_ISDIR(mode))
-+                      mode |= S_ISGID;
-+      } else
-+              inode->i_gid = current->fsgid;
-+      inode->i_mode = mode;
-+
-+      inode->i_ino = j;
-+      /* This is the optimal IO size (for stat), not the fs block size */
-+      inode->i_blksize = PAGE_SIZE;
-+      inode->i_blocks = 0;
-+      inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-+      inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL;
-+      if (S_ISLNK(mode))
-+              inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
-+#ifdef EXT3_FRAGMENTS
-+      inode->u.ext3_i.i_faddr = 0;
-+      inode->u.ext3_i.i_frag_no = 0;
-+      inode->u.ext3_i.i_frag_size = 0;
-+#endif
-+      inode->u.ext3_i.i_file_acl = 0;
-+      inode->u.ext3_i.i_dir_acl = 0;
-+      inode->u.ext3_i.i_dtime = 0;
-+      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
-+#ifdef EXT3_PREALLOCATE
-+      inode->u.ext3_i.i_prealloc_count = 0;
-+#endif
-+      inode->u.ext3_i.i_block_group = i;
-+      
-+      if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL)
-+              inode->i_flags |= S_SYNC;
-+      if (IS_SYNC(inode))
-+              handle->h_sync = 1;
-+      insert_inode_hash(inode);
-+      inode->i_generation = sb->u.ext3_sb.s_next_generation++;
-+
-+      inode->u.ext3_i.i_state = EXT3_STATE_NEW;
-+      err = ext3_mark_inode_dirty(handle, inode);
-+      if (err) goto fail;
-+      
-+      unlock_super (sb);
-+      if(DQUOT_ALLOC_INODE(inode)) {
-+              DQUOT_DROP(inode);
-+              inode->i_flags |= S_NOQUOTA;
-+              inode->i_nlink = 0;
-+              iput(inode);
-+              return ERR_PTR(-EDQUOT);
-+      }
-+      ext3_debug ("allocating inode %lu\n", inode->i_ino);
-+      return inode;
-+
-+fail:
-+      unlock_super(sb);
-+      iput(inode);
-+      ext3_std_error(sb, err);
-+      return ERR_PTR(err);
-+}
-+
-+/* Verify that we are loading a valid orphan from disk */
-+struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino)
-+{
-+      ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
-+      unsigned long block_group;
-+      int bit;
-+      int bitmap_nr;
-+      struct buffer_head *bh;
-+      struct inode *inode = NULL;
-+      
-+      /* Error cases - e2fsck has already cleaned up for us */
-+      if (ino > max_ino) {
-+              ext3_warning(sb, __FUNCTION__,
-+                           "bad orphan ino %ld!  e2fsck was run?\n", ino);
-+              return NULL;
-+      }
-+
-+      block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-+      bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
-+      if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 ||
-+          !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) {
-+              ext3_warning(sb, __FUNCTION__,
-+                           "inode bitmap error for orphan %ld\n", ino);
-+              return NULL;
-+      }
-+
-+      /* Having the inode bit set should be a 100% indicator that this
-+       * is a valid orphan (no e2fsck run on fs).  Orphans also include
-+       * inodes that were being truncated, so we can't check i_nlink==0.
-+       */
-+      if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) ||
-+          is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) {
-+              ext3_warning(sb, __FUNCTION__,
-+                           "bad orphan inode %ld!  e2fsck was run?\n", ino);
-+              printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n",
-+                     bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data));
-+              printk(KERN_NOTICE "inode=%p\n", inode);
-+              if (inode) {
-+                      printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
-+                             is_bad_inode(inode));
-+                      printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%d\n",
-+                             NEXT_ORPHAN(inode));
-+                      printk(KERN_NOTICE "max_ino=%ld\n", max_ino);
-+              }
-+              /* Avoid freeing blocks if we got a bad deleted inode */
-+              if (inode && inode->i_nlink == 0)
-+                      inode->i_blocks = 0;
-+              iput(inode);
-+              return NULL;
-+      }
-+
-+      return inode;
-+}
-+
-+unsigned long ext3_count_free_inodes (struct super_block * sb)
-+{
-+#ifdef EXT3FS_DEBUG
-+      struct ext3_super_block * es;
-+      unsigned long desc_count, bitmap_count, x;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      int i;
-+
-+      lock_super (sb);
-+      es = sb->u.ext3_sb.s_es;
-+      desc_count = 0;
-+      bitmap_count = 0;
-+      gdp = NULL;
-+      for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
-+              gdp = ext3_get_group_desc (sb, i, NULL);
-+              if (!gdp)
-+                      continue;
-+              desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
-+              bitmap_nr = load_inode_bitmap (sb, i);
-+              if (bitmap_nr < 0)
-+                      continue;
-+
-+              x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
-+                                   EXT3_INODES_PER_GROUP(sb) / 8);
-+              printk ("group %d: stored = %d, counted = %lu\n",
-+                      i, le16_to_cpu(gdp->bg_free_inodes_count), x);
-+              bitmap_count += x;
-+      }
-+      printk("ext3_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
-+              le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
-+      unlock_super (sb);
-+      return desc_count;
-+#else
-+      return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
-+#endif
-+}
-+
-+#ifdef CONFIG_EXT3_CHECK
-+/* Called at mount-time, super-block is locked */
-+void ext3_check_inodes_bitmap (struct super_block * sb)
-+{
-+      struct ext3_super_block * es;
-+      unsigned long desc_count, bitmap_count, x;
-+      int bitmap_nr;
-+      struct ext3_group_desc * gdp;
-+      int i;
-+
-+      es = sb->u.ext3_sb.s_es;
-+      desc_count = 0;
-+      bitmap_count = 0;
-+      gdp = NULL;
-+      for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
-+              gdp = ext3_get_group_desc (sb, i, NULL);
-+              if (!gdp)
-+                      continue;
-+              desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
-+              bitmap_nr = load_inode_bitmap (sb, i);
-+              if (bitmap_nr < 0)
-+                      continue;
-+
-+              x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
-+                                   EXT3_INODES_PER_GROUP(sb) / 8);
-+              if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
-+                      ext3_error (sb, "ext3_check_inodes_bitmap",
-+                                  "Wrong free inodes count in group %d, "
-+                                  "stored = %d, counted = %lu", i,
-+                                  le16_to_cpu(gdp->bg_free_inodes_count), x);
-+              bitmap_count += x;
-+      }
-+      if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
-+              ext3_error (sb, "ext3_check_inodes_bitmap",
-+                          "Wrong free inodes count in super block, "
-+                          "stored = %lu, counted = %lu",
-+                          (unsigned long)le32_to_cpu(es->s_free_inodes_count),
-+                          bitmap_count);
-+}
-+#endif
-diff -rup --new-file linux.mcp2/fs/ext3/inode.c linux_tmp/fs/ext3/inode.c
---- linux.mcp2/fs/ext3/inode.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/inode.c  2002-08-02 17:39:45.000000000 -0700
-@@ -0,0 +1,2699 @@
-+/*
-+ *  linux/fs/ext3/inode.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/inode.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  Goal-directed block allocation by Stephen Tweedie
-+ *    (sct@redhat.com), 1993, 1998
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
-+ *    (jj@sunsite.ms.mff.cuni.cz)
-+ *
-+ *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/sched.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/jbd.h>
-+#include <linux/locks.h>
-+#include <linux/smp_lock.h>
-+#include <linux/highuid.h>
-+#include <linux/quotaops.h>
-+#include <linux/module.h>
-+
-+/*
-+ * SEARCH_FROM_ZERO forces each block allocation to search from the start
-+ * of the filesystem.  This is to force rapid reallocation of recently-freed
-+ * blocks.  The file fragmentation is horrendous.
-+ */
-+#undef SEARCH_FROM_ZERO
-+
-+/* The ext3 forget function must perform a revoke if we are freeing data
-+ * which has been journaled.  Metadata (eg. indirect blocks) must be
-+ * revoked in all cases. 
-+ *
-+ * "bh" may be NULL: a metadata block may have been freed from memory
-+ * but there may still be a record of it in the journal, and that record
-+ * still needs to be revoked.
-+ */
-+
-+static int ext3_forget(handle_t *handle, int is_metadata,
-+                     struct inode *inode, struct buffer_head *bh,
-+                     int blocknr)
-+{
-+      int err;
-+
-+      BUFFER_TRACE(bh, "enter");
-+
-+      jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
-+                "data mode %lx\n",
-+                bh, is_metadata, inode->i_mode,
-+                test_opt(inode->i_sb, DATA_FLAGS));
-+      
-+      /* Never use the revoke function if we are doing full data
-+       * journaling: there is no need to, and a V1 superblock won't
-+       * support it.  Otherwise, only skip the revoke on un-journaled
-+       * data blocks. */
-+
-+      if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
-+          (!is_metadata && !ext3_should_journal_data(inode))) {
-+              if (bh) {
-+                      BUFFER_TRACE(bh, "call journal_forget");
-+                      ext3_journal_forget(handle, bh);
-+              }
-+              return 0;
-+      }
-+
-+      /*
-+       * data!=journal && (is_metadata || should_journal_data(inode))
-+       */
-+      BUFFER_TRACE(bh, "call ext3_journal_revoke");
-+      err = ext3_journal_revoke(handle, blocknr, bh);
-+      if (err)
-+              ext3_abort(inode->i_sb, __FUNCTION__,
-+                         "error %d when attempting revoke", err);
-+      BUFFER_TRACE(bh, "exit");
-+      return err;
-+}
-+
-+/* 
-+ * Truncate transactions can be complex and absolutely huge.  So we need to
-+ * be able to restart the transaction at a conventient checkpoint to make
-+ * sure we don't overflow the journal.
-+ *
-+ * start_transaction gets us a new handle for a truncate transaction,
-+ * and extend_transaction tries to extend the existing one a bit.  If
-+ * extend fails, we need to propagate the failure up and restart the
-+ * transaction in the top-level truncate loop. --sct 
-+ */
-+
-+static handle_t *start_transaction(struct inode *inode) 
-+{
-+      long needed;
-+      handle_t *result;
-+      
-+      needed = inode->i_blocks;
-+      if (needed > EXT3_MAX_TRANS_DATA) 
-+              needed = EXT3_MAX_TRANS_DATA;
-+      
-+      result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
-+      if (!IS_ERR(result))
-+              return result;
-+      
-+      ext3_std_error(inode->i_sb, PTR_ERR(result));
-+      return result;
-+}
-+
-+/*
-+ * Try to extend this transaction for the purposes of truncation.
-+ *
-+ * Returns 0 if we managed to create more room.  If we can't create more
-+ * room, and the transaction must be restarted we return 1.
-+ */
-+static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
-+{
-+      long needed;
-+      
-+      if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
-+              return 0;
-+      needed = inode->i_blocks;
-+      if (needed > EXT3_MAX_TRANS_DATA) 
-+              needed = EXT3_MAX_TRANS_DATA;
-+      if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed))
-+              return 0;
-+      return 1;
-+}
-+
-+/*
-+ * Restart the transaction associated with *handle.  This does a commit,
-+ * so before we call here everything must be consistently dirtied against
-+ * this transaction.
-+ */
-+static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
-+{
-+      long needed = inode->i_blocks;
-+      if (needed > EXT3_MAX_TRANS_DATA) 
-+              needed = EXT3_MAX_TRANS_DATA;
-+      jbd_debug(2, "restarting handle %p\n", handle);
-+      return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed);
-+}
-+
-+/*
-+ * Called at each iput()
-+ */
-+void ext3_put_inode (struct inode * inode)
-+{
-+      ext3_discard_prealloc (inode);
-+}
-+
-+/*
-+ * Called at the last iput() if i_nlink is zero.
-+ */
-+void ext3_delete_inode (struct inode * inode)
-+{
-+      handle_t *handle;
-+      
-+      if (is_bad_inode(inode) ||
-+          inode->i_ino == EXT3_ACL_IDX_INO ||
-+          inode->i_ino == EXT3_ACL_DATA_INO)
-+              goto no_delete;
-+
-+      lock_kernel();
-+      handle = start_transaction(inode);
-+      if (IS_ERR(handle)) {
-+              /* If we're going to skip the normal cleanup, we still
-+               * need to make sure that the in-core orphan linked list
-+               * is properly cleaned up. */
-+              ext3_orphan_del(NULL, inode);
-+
-+              ext3_std_error(inode->i_sb, PTR_ERR(handle));
-+              unlock_kernel();
-+              goto no_delete;
-+      }
-+      
-+      if (IS_SYNC(inode))
-+              handle->h_sync = 1;
-+      inode->i_size = 0;
-+      if (inode->i_blocks)
-+              ext3_truncate(inode);
-+      /*
-+       * Kill off the orphan record which ext3_truncate created.
-+       * AKPM: I think this can be inside the above `if'.
-+       * Note that ext3_orphan_del() has to be able to cope with the
-+       * deletion of a non-existent orphan - this is because we don't
-+       * know if ext3_truncate() actually created an orphan record.
-+       * (Well, we could do this if we need to, but heck - it works)
-+       */
-+      ext3_orphan_del(handle, inode);
-+      inode->u.ext3_i.i_dtime = CURRENT_TIME;
-+
-+      /* 
-+       * One subtle ordering requirement: if anything has gone wrong
-+       * (transaction abort, IO errors, whatever), then we can still
-+       * do these next steps (the fs will already have been marked as
-+       * having errors), but we can't free the inode if the mark_dirty
-+       * fails.  
-+       */
-+      if (ext3_mark_inode_dirty(handle, inode))
-+              /* If that failed, just do the required in-core inode clear. */
-+              clear_inode(inode);
-+      else
-+              ext3_free_inode(handle, inode);
-+      ext3_journal_stop(handle, inode);
-+      unlock_kernel();
-+      return;
-+no_delete:
-+      clear_inode(inode);     /* We must guarantee clearing of inode... */
-+}
-+
-+void ext3_discard_prealloc (struct inode * inode)
-+{
-+#ifdef EXT3_PREALLOCATE
-+      lock_kernel();
-+      /* Writer: ->i_prealloc* */
-+      if (inode->u.ext3_i.i_prealloc_count) {
-+              unsigned short total = inode->u.ext3_i.i_prealloc_count;
-+              unsigned long block = inode->u.ext3_i.i_prealloc_block;
-+              inode->u.ext3_i.i_prealloc_count = 0;
-+              inode->u.ext3_i.i_prealloc_block = 0;
-+              /* Writer: end */
-+              ext3_free_blocks (inode, block, total);
-+      }
-+      unlock_kernel();
-+#endif
-+}
-+
-+static int ext3_alloc_block (handle_t *handle,
-+                      struct inode * inode, unsigned long goal, int *err)
-+{
-+#ifdef EXT3FS_DEBUG
-+      static unsigned long alloc_hits = 0, alloc_attempts = 0;
-+#endif
-+      unsigned long result;
-+
-+#ifdef EXT3_PREALLOCATE
-+      /* Writer: ->i_prealloc* */
-+      if (inode->u.ext3_i.i_prealloc_count &&
-+          (goal == inode->u.ext3_i.i_prealloc_block ||
-+           goal + 1 == inode->u.ext3_i.i_prealloc_block))
-+      {
-+              result = inode->u.ext3_i.i_prealloc_block++;
-+              inode->u.ext3_i.i_prealloc_count--;
-+              /* Writer: end */
-+              ext3_debug ("preallocation hit (%lu/%lu).\n",
-+                          ++alloc_hits, ++alloc_attempts);
-+      } else {
-+              ext3_discard_prealloc (inode);
-+              ext3_debug ("preallocation miss (%lu/%lu).\n",
-+                          alloc_hits, ++alloc_attempts);
-+              if (S_ISREG(inode->i_mode))
-+                      result = ext3_new_block (inode, goal, 
-+                               &inode->u.ext3_i.i_prealloc_count,
-+                               &inode->u.ext3_i.i_prealloc_block, err);
-+              else
-+                      result = ext3_new_block (inode, goal, 0, 0, err);
-+              /*
-+               * AKPM: this is somewhat sticky.  I'm not surprised it was
-+               * disabled in 2.2's ext3.  Need to integrate b_committed_data
-+               * guarding with preallocation, if indeed preallocation is
-+               * effective.
-+               */
-+      }
-+#else
-+      result = ext3_new_block (handle, inode, goal, 0, 0, err);
-+#endif
-+      return result;
-+}
-+
-+
-+typedef struct {
-+      u32     *p;
-+      u32     key;
-+      struct buffer_head *bh;
-+} Indirect;
-+
-+static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
-+{
-+      p->key = *(p->p = v);
-+      p->bh = bh;
-+}
-+
-+static inline int verify_chain(Indirect *from, Indirect *to)
-+{
-+      while (from <= to && from->key == *from->p)
-+              from++;
-+      return (from > to);
-+}
-+
-+/**
-+ *    ext3_block_to_path - parse the block number into array of offsets
-+ *    @inode: inode in question (we are only interested in its superblock)
-+ *    @i_block: block number to be parsed
-+ *    @offsets: array to store the offsets in
-+ *
-+ *    To store the locations of file's data ext3 uses a data structure common
-+ *    for UNIX filesystems - tree of pointers anchored in the inode, with
-+ *    data blocks at leaves and indirect blocks in intermediate nodes.
-+ *    This function translates the block number into path in that tree -
-+ *    return value is the path length and @offsets[n] is the offset of
-+ *    pointer to (n+1)th node in the nth one. If @block is out of range
-+ *    (negative or too large) warning is printed and zero returned.
-+ *
-+ *    Note: function doesn't find node addresses, so no IO is needed. All
-+ *    we need to know is the capacity of indirect blocks (taken from the
-+ *    inode->i_sb).
-+ */
-+
-+/*
-+ * Portability note: the last comparison (check that we fit into triple
-+ * indirect block) is spelled differently, because otherwise on an
-+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
-+ * if our filesystem had 8Kb blocks. We might use long long, but that would
-+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
-+ * i_block would have to be negative in the very beginning, so we would not
-+ * get there at all.
-+ */
-+
-+static int ext3_block_to_path(struct inode *inode, long i_block, int offsets[4])
-+{
-+      int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-+      int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
-+      const long direct_blocks = EXT3_NDIR_BLOCKS,
-+              indirect_blocks = ptrs,
-+              double_blocks = (1 << (ptrs_bits * 2));
-+      int n = 0;
-+
-+      if (i_block < 0) {
-+              ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
-+      } else if (i_block < direct_blocks) {
-+              offsets[n++] = i_block;
-+      } else if ( (i_block -= direct_blocks) < indirect_blocks) {
-+              offsets[n++] = EXT3_IND_BLOCK;
-+              offsets[n++] = i_block;
-+      } else if ((i_block -= indirect_blocks) < double_blocks) {
-+              offsets[n++] = EXT3_DIND_BLOCK;
-+              offsets[n++] = i_block >> ptrs_bits;
-+              offsets[n++] = i_block & (ptrs - 1);
-+      } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
-+              offsets[n++] = EXT3_TIND_BLOCK;
-+              offsets[n++] = i_block >> (ptrs_bits * 2);
-+              offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
-+              offsets[n++] = i_block & (ptrs - 1);
-+      } else {
-+              ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
-+      }
-+      return n;
-+}
-+
-+/**
-+ *    ext3_get_branch - read the chain of indirect blocks leading to data
-+ *    @inode: inode in question
-+ *    @depth: depth of the chain (1 - direct pointer, etc.)
-+ *    @offsets: offsets of pointers in inode/indirect blocks
-+ *    @chain: place to store the result
-+ *    @err: here we store the error value
-+ *
-+ *    Function fills the array of triples <key, p, bh> and returns %NULL
-+ *    if everything went OK or the pointer to the last filled triple
-+ *    (incomplete one) otherwise. Upon the return chain[i].key contains
-+ *    the number of (i+1)-th block in the chain (as it is stored in memory,
-+ *    i.e. little-endian 32-bit), chain[i].p contains the address of that
-+ *    number (it points into struct inode for i==0 and into the bh->b_data
-+ *    for i>0) and chain[i].bh points to the buffer_head of i-th indirect
-+ *    block for i>0 and NULL for i==0. In other words, it holds the block
-+ *    numbers of the chain, addresses they were taken from (and where we can
-+ *    verify that chain did not change) and buffer_heads hosting these
-+ *    numbers.
-+ *
-+ *    Function stops when it stumbles upon zero pointer (absent block)
-+ *            (pointer to last triple returned, *@err == 0)
-+ *    or when it gets an IO error reading an indirect block
-+ *            (ditto, *@err == -EIO)
-+ *    or when it notices that chain had been changed while it was reading
-+ *            (ditto, *@err == -EAGAIN)
-+ *    or when it reads all @depth-1 indirect blocks successfully and finds
-+ *    the whole chain, all way to the data (returns %NULL, *err == 0).
-+ */
-+static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
-+                               Indirect chain[4], int *err)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      Indirect *p = chain;
-+      struct buffer_head *bh;
-+
-+      *err = 0;
-+      /* i_data is not going away, no lock needed */
-+      add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets);
-+      if (!p->key)
-+              goto no_block;
-+      while (--depth) {
-+              bh = sb_bread(sb, le32_to_cpu(p->key));
-+              if (!bh)
-+                      goto failure;
-+              /* Reader: pointers */
-+              if (!verify_chain(chain, p))
-+                      goto changed;
-+              add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
-+              /* Reader: end */
-+              if (!p->key)
-+                      goto no_block;
-+      }
-+      return NULL;
-+
-+changed:
-+      *err = -EAGAIN;
-+      goto no_block;
-+failure:
-+      *err = -EIO;
-+no_block:
-+      return p;
-+}
-+
-+/**
-+ *    ext3_find_near - find a place for allocation with sufficient locality
-+ *    @inode: owner
-+ *    @ind: descriptor of indirect block.
-+ *
-+ *    This function returns the prefered place for block allocation.
-+ *    It is used when heuristic for sequential allocation fails.
-+ *    Rules are:
-+ *      + if there is a block to the left of our position - allocate near it.
-+ *      + if pointer will live in indirect block - allocate near that block.
-+ *      + if pointer will live in inode - allocate in the same
-+ *        cylinder group. 
-+ *    Caller must make sure that @ind is valid and will stay that way.
-+ */
-+
-+static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
-+{
-+      u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data;
-+      u32 *p;
-+
-+      /* Try to find previous block */
-+      for (p = ind->p - 1; p >= start; p--)
-+              if (*p)
-+                      return le32_to_cpu(*p);
-+
-+      /* No such thing, so let's try location of indirect block */
-+      if (ind->bh)
-+              return ind->bh->b_blocknr;
-+
-+      /*
-+       * It is going to be refered from inode itself? OK, just put it into
-+       * the same cylinder group then.
-+       */
-+      return (inode->u.ext3_i.i_block_group * 
-+              EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
-+             le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block);
-+}
-+
-+/**
-+ *    ext3_find_goal - find a prefered place for allocation.
-+ *    @inode: owner
-+ *    @block:  block we want
-+ *    @chain:  chain of indirect blocks
-+ *    @partial: pointer to the last triple within a chain
-+ *    @goal:  place to store the result.
-+ *
-+ *    Normally this function find the prefered place for block allocation,
-+ *    stores it in *@goal and returns zero. If the branch had been changed
-+ *    under us we return -EAGAIN.
-+ */
-+
-+static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
-+                        Indirect *partial, unsigned long *goal)
-+{
-+      /* Writer: ->i_next_alloc* */
-+      if (block == inode->u.ext3_i.i_next_alloc_block + 1) {
-+              inode->u.ext3_i.i_next_alloc_block++;
-+              inode->u.ext3_i.i_next_alloc_goal++;
-+      }
-+#ifdef SEARCH_FROM_ZERO
-+      inode->u.ext3_i.i_next_alloc_block = 0;
-+      inode->u.ext3_i.i_next_alloc_goal = 0;
-+#endif
-+      /* Writer: end */
-+      /* Reader: pointers, ->i_next_alloc* */
-+      if (verify_chain(chain, partial)) {
-+              /*
-+               * try the heuristic for sequential allocation,
-+               * failing that at least try to get decent locality.
-+               */
-+              if (block == inode->u.ext3_i.i_next_alloc_block)
-+                      *goal = inode->u.ext3_i.i_next_alloc_goal;
-+              if (!*goal)
-+                      *goal = ext3_find_near(inode, partial);
-+#ifdef SEARCH_FROM_ZERO
-+              *goal = 0;
-+#endif
-+              return 0;
-+      }
-+      /* Reader: end */
-+      return -EAGAIN;
-+}
-+
-+/**
-+ *    ext3_alloc_branch - allocate and set up a chain of blocks.
-+ *    @inode: owner
-+ *    @num: depth of the chain (number of blocks to allocate)
-+ *    @offsets: offsets (in the blocks) to store the pointers to next.
-+ *    @branch: place to store the chain in.
-+ *
-+ *    This function allocates @num blocks, zeroes out all but the last one,
-+ *    links them into chain and (if we are synchronous) writes them to disk.
-+ *    In other words, it prepares a branch that can be spliced onto the
-+ *    inode. It stores the information about that chain in the branch[], in
-+ *    the same format as ext3_get_branch() would do. We are calling it after
-+ *    we had read the existing part of chain and partial points to the last
-+ *    triple of that (one with zero ->key). Upon the exit we have the same
-+ *    picture as after the successful ext3_get_block(), excpet that in one
-+ *    place chain is disconnected - *branch->p is still zero (we did not
-+ *    set the last link), but branch->key contains the number that should
-+ *    be placed into *branch->p to fill that gap.
-+ *
-+ *    If allocation fails we free all blocks we've allocated (and forget
-+ *    their buffer_heads) and return the error value the from failed
-+ *    ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
-+ *    as described above and return 0.
-+ */
-+
-+static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-+                           int num,
-+                           unsigned long goal,
-+                           int *offsets,
-+                           Indirect *branch)
-+{
-+      int blocksize = inode->i_sb->s_blocksize;
-+      int n = 0, keys = 0;
-+      int err = 0;
-+      int i;
-+      int parent = ext3_alloc_block(handle, inode, goal, &err);
-+
-+      branch[0].key = cpu_to_le32(parent);
-+      if (parent) {
-+              for (n = 1; n < num; n++) {
-+                      struct buffer_head *bh;
-+                      /* Allocate the next block */
-+                      int nr = ext3_alloc_block(handle, inode, parent, &err);
-+                      if (!nr)
-+                              break;
-+                      branch[n].key = cpu_to_le32(nr);
-+                      keys = n+1;
-+                      
-+                      /*
-+                       * Get buffer_head for parent block, zero it out
-+                       * and set the pointer to new one, then send
-+                       * parent to disk.  
-+                       */
-+                      bh = sb_getblk(inode->i_sb, parent);
-+                      branch[n].bh = bh;
-+                      lock_buffer(bh);
-+                      BUFFER_TRACE(bh, "call get_create_access");
-+                      err = ext3_journal_get_create_access(handle, bh);
-+                      if (err) {
-+                              unlock_buffer(bh);
-+                              brelse(bh);
-+                              break;
-+                      }
-+
-+                      memset(bh->b_data, 0, blocksize);
-+                      branch[n].p = (u32*) bh->b_data + offsets[n];
-+                      *branch[n].p = branch[n].key;
-+                      BUFFER_TRACE(bh, "marking uptodate");
-+                      mark_buffer_uptodate(bh, 1);
-+                      unlock_buffer(bh);
-+
-+                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+                      err = ext3_journal_dirty_metadata(handle, bh);
-+                      if (err)
-+                              break;
-+                      
-+                      parent = nr;
-+              }
-+      }
-+      if (n == num)
-+              return 0;
-+
-+      /* Allocation failed, free what we already allocated */
-+      for (i = 1; i < keys; i++) {
-+              BUFFER_TRACE(branch[i].bh, "call journal_forget");
-+              ext3_journal_forget(handle, branch[i].bh);
-+      }
-+      for (i = 0; i < keys; i++)
-+              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+      return err;
-+}
-+
-+/**
-+ *    ext3_splice_branch - splice the allocated branch onto inode.
-+ *    @inode: owner
-+ *    @block: (logical) number of block we are adding
-+ *    @chain: chain of indirect blocks (with a missing link - see
-+ *            ext3_alloc_branch)
-+ *    @where: location of missing link
-+ *    @num:   number of blocks we are adding
-+ *
-+ *    This function verifies that chain (up to the missing link) had not
-+ *    changed, fills the missing link and does all housekeeping needed in
-+ *    inode (->i_blocks, etc.). In case of success we end up with the full
-+ *    chain to new block and return 0. Otherwise (== chain had been changed)
-+ *    we free the new blocks (forgetting their buffer_heads, indeed) and
-+ *    return -EAGAIN.
-+ */
-+
-+static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
-+                            Indirect chain[4], Indirect *where, int num)
-+{
-+      int i;
-+      int err = 0;
-+
-+      /*
-+       * If we're splicing into a [td]indirect block (as opposed to the
-+       * inode) then we need to get write access to the [td]indirect block
-+       * before the splice.
-+       */
-+      if (where->bh) {
-+              BUFFER_TRACE(where->bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, where->bh);
-+              if (err)
-+                      goto err_out;
-+      }
-+      /* Verify that place we are splicing to is still there and vacant */
-+
-+      /* Writer: pointers, ->i_next_alloc* */
-+      if (!verify_chain(chain, where-1) || *where->p)
-+              /* Writer: end */
-+              goto changed;
-+
-+      /* That's it */
-+
-+      *where->p = where->key;
-+      inode->u.ext3_i.i_next_alloc_block = block;
-+      inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
-+#ifdef SEARCH_FROM_ZERO
-+      inode->u.ext3_i.i_next_alloc_block = 0;
-+      inode->u.ext3_i.i_next_alloc_goal = 0;
-+#endif
-+      /* Writer: end */
-+
-+      /* We are done with atomic stuff, now do the rest of housekeeping */
-+
-+      inode->i_ctime = CURRENT_TIME;
-+      ext3_mark_inode_dirty(handle, inode);
-+
-+      /* had we spliced it onto indirect block? */
-+      if (where->bh) {
-+              /*
-+               * akpm: If we spliced it onto an indirect block, we haven't
-+               * altered the inode.  Note however that if it is being spliced
-+               * onto an indirect block at the very end of the file (the
-+               * file is growing) then we *will* alter the inode to reflect
-+               * the new i_size.  But that is not done here - it is done in
-+               * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
-+               */
-+              jbd_debug(5, "splicing indirect only\n");
-+              BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
-+              err = ext3_journal_dirty_metadata(handle, where->bh);
-+              if (err) 
-+                      goto err_out;
-+      } else {
-+              /*
-+               * OK, we spliced it into the inode itself on a direct block.
-+               * Inode was dirtied above.
-+               */
-+              jbd_debug(5, "splicing direct\n");
-+      }
-+      return err;
-+
-+changed:
-+      /*
-+       * AKPM: if where[i].bh isn't part of the current updating
-+       * transaction then we explode nastily.  Test this code path.
-+       */
-+      jbd_debug(1, "the chain changed: try again\n");
-+      err = -EAGAIN;
-+      
-+err_out:
-+      for (i = 1; i < num; i++) {
-+              BUFFER_TRACE(where[i].bh, "call journal_forget");
-+              ext3_journal_forget(handle, where[i].bh);
-+      }
-+      /* For the normal collision cleanup case, we free up the blocks.
-+       * On genuine filesystem errors we don't even think about doing
-+       * that. */
-+      if (err == -EAGAIN)
-+              for (i = 0; i < num; i++)
-+                      ext3_free_blocks(handle, inode, 
-+                                       le32_to_cpu(where[i].key), 1);
-+      return err;
-+}
-+
-+/*
-+ * Allocation strategy is simple: if we have to allocate something, we will
-+ * have to go the whole way to leaf. So let's do it before attaching anything
-+ * to tree, set linkage between the newborn blocks, write them if sync is
-+ * required, recheck the path, free and repeat if check fails, otherwise
-+ * set the last missing link (that will protect us from any truncate-generated
-+ * removals - all blocks on the path are immune now) and possibly force the
-+ * write on the parent block.
-+ * That has a nice additional property: no special recovery from the failed
-+ * allocations is needed - we simply release blocks and do not touch anything
-+ * reachable from inode.
-+ *
-+ * akpm: `handle' can be NULL if create == 0.
-+ *
-+ * The BKL may not be held on entry here.  Be sure to take it early.
-+ */
-+
-+static int ext3_get_block_handle(handle_t *handle, struct inode *inode, 
-+                               long iblock,
-+                               struct buffer_head *bh_result, int create)
-+{
-+      int err = -EIO;
-+      int offsets[4];
-+      Indirect chain[4];
-+      Indirect *partial;
-+      unsigned long goal;
-+      int left;
-+      int depth = ext3_block_to_path(inode, iblock, offsets);
-+      loff_t new_size;
-+
-+      J_ASSERT(handle != NULL || create == 0);
-+
-+      if (depth == 0)
-+              goto out;
-+
-+      lock_kernel();
-+reread:
-+      partial = ext3_get_branch(inode, depth, offsets, chain, &err);
-+
-+      /* Simplest case - block found, no allocation needed */
-+      if (!partial) {
-+              bh_result->b_state &= ~(1UL << BH_New);
-+got_it:
-+              bh_result->b_dev = inode->i_dev;
-+              bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
-+              bh_result->b_state |= (1UL << BH_Mapped);
-+              /* Clean up and exit */
-+              partial = chain+depth-1; /* the whole chain */
-+              goto cleanup;
-+      }
-+
-+      /* Next simple case - plain lookup or failed read of indirect block */
-+      if (!create || err == -EIO) {
-+cleanup:
-+              while (partial > chain) {
-+                      BUFFER_TRACE(partial->bh, "call brelse");
-+                      brelse(partial->bh);
-+                      partial--;
-+              }
-+              BUFFER_TRACE(bh_result, "returned");
-+              unlock_kernel();
-+out:
-+              return err;
-+      }
-+
-+      /*
-+       * Indirect block might be removed by truncate while we were
-+       * reading it. Handling of that case (forget what we've got and
-+       * reread) is taken out of the main path.
-+       */
-+      if (err == -EAGAIN)
-+              goto changed;
-+
-+      if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0)
-+              goto changed;
-+
-+      left = (chain + depth) - partial;
-+
-+      /*
-+       * Block out ext3_truncate while we alter the tree
-+       */
-+      down_read(&inode->u.ext3_i.truncate_sem);
-+      err = ext3_alloc_branch(handle, inode, left, goal,
-+                                      offsets+(partial-chain), partial);
-+
-+      /* The ext3_splice_branch call will free and forget any buffers
-+       * on the new chain if there is a failure, but that risks using
-+       * up transaction credits, especially for bitmaps where the
-+       * credits cannot be returned.  Can we handle this somehow?  We
-+       * may need to return -EAGAIN upwards in the worst case.  --sct */
-+      if (!err)
-+              err = ext3_splice_branch(handle, inode, iblock, chain,
-+                                       partial, left);
-+      up_read(&inode->u.ext3_i.truncate_sem);
-+      if (err == -EAGAIN)
-+              goto changed;
-+      if (err)
-+              goto cleanup;
-+
-+      new_size = inode->i_size;
-+      /*
-+       * This is not racy against ext3_truncate's modification of i_disksize
-+       * because VM/VFS ensures that the file cannot be extended while
-+       * truncate is in progress.  It is racy between multiple parallel
-+       * instances of get_block, but we have the BKL.
-+       */
-+      if (new_size > inode->u.ext3_i.i_disksize)
-+              inode->u.ext3_i.i_disksize = new_size;
-+
-+      bh_result->b_state |= (1UL << BH_New);
-+      goto got_it;
-+
-+changed:
-+      while (partial > chain) {
-+              jbd_debug(1, "buffer chain changed, retrying\n");
-+              BUFFER_TRACE(partial->bh, "brelsing");
-+              brelse(partial->bh);
-+              partial--;
-+      }
-+      goto reread;
-+}
-+
-+/*
-+ * The BKL is not held on entry here.
-+ */
-+static int ext3_get_block(struct inode *inode, long iblock,
-+                      struct buffer_head *bh_result, int create)
-+{
-+      handle_t *handle = 0;
-+      int ret;
-+
-+      if (create) {
-+              handle = ext3_journal_current_handle();
-+              J_ASSERT(handle != 0);
-+      }
-+      ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
-+      return ret;
-+}
-+
-+/*
-+ * `handle' can be NULL if create is zero
-+ */
-+struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
-+                              long block, int create, int * errp)
-+{
-+      struct buffer_head dummy;
-+      int fatal = 0, err;
-+      
-+      J_ASSERT(handle != NULL || create == 0);
-+
-+      dummy.b_state = 0;
-+      dummy.b_blocknr = -1000;
-+      buffer_trace_init(&dummy.b_history);
-+      *errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
-+      if (!*errp && buffer_mapped(&dummy)) {
-+              struct buffer_head *bh;
-+              bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
-+              if (buffer_new(&dummy)) {
-+                      J_ASSERT(create != 0);
-+                      J_ASSERT(handle != 0);
-+
-+                      /* Now that we do not always journal data, we
-+                         should keep in mind whether this should
-+                         always journal the new buffer as metadata.
-+                         For now, regular file writes use
-+                         ext3_get_block instead, so it's not a
-+                         problem. */
-+                      lock_kernel();
-+                      lock_buffer(bh);
-+                      BUFFER_TRACE(bh, "call get_create_access");
-+                      fatal = ext3_journal_get_create_access(handle, bh);
-+                      if (!fatal) {
-+                              memset(bh->b_data, 0,
-+                                     inode->i_sb->s_blocksize);
-+                              mark_buffer_uptodate(bh, 1);
-+                      }
-+                      unlock_buffer(bh);
-+                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+                      err = ext3_journal_dirty_metadata(handle, bh);
-+                      if (!fatal) fatal = err;
-+                      unlock_kernel();
-+              } else {
-+                      BUFFER_TRACE(bh, "not a new buffer");
-+              }
-+              if (fatal) {
-+                      *errp = fatal;
-+                      brelse(bh);
-+                      bh = NULL;
-+              }
-+              return bh;
-+      }
-+      return NULL;
-+}
-+
-+struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,
-+                             int block, int create, int *err)
-+{
-+      struct buffer_head * bh;
-+      int prev_blocks;
-+
-+      prev_blocks = inode->i_blocks;
-+
-+      bh = ext3_getblk (handle, inode, block, create, err);
-+      if (!bh)
-+              return bh;
-+#ifdef EXT3_PREALLOCATE
-+      /*
-+       * If the inode has grown, and this is a directory, then use a few
-+       * more of the preallocated blocks to keep directory fragmentation
-+       * down.  The preallocated blocks are guaranteed to be contiguous.
-+       */
-+      if (create &&
-+          S_ISDIR(inode->i_mode) &&
-+          inode->i_blocks > prev_blocks &&
-+          EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-+                                  EXT3_FEATURE_COMPAT_DIR_PREALLOC)) {
-+              int i;
-+              struct buffer_head *tmp_bh;
-+
-+              for (i = 1;
-+                   inode->u.ext3_i.i_prealloc_count &&
-+                   i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
-+                   i++) {
-+                      /*
-+                       * ext3_getblk will zero out the contents of the
-+                       * directory for us
-+                       */
-+                      tmp_bh = ext3_getblk(handle, inode,
-+                                              block+i, create, err);
-+                      if (!tmp_bh) {
-+                              brelse (bh);
-+                              return 0;
-+                      }
-+                      brelse (tmp_bh);
-+              }
-+      }
-+#endif
-+      if (buffer_uptodate(bh))
-+              return bh;
-+      ll_rw_block (READ, 1, &bh);
-+      wait_on_buffer (bh);
-+      if (buffer_uptodate(bh))
-+              return bh;
-+      brelse (bh);
-+      *err = -EIO;
-+      return NULL;
-+}
-+
-+static int walk_page_buffers( handle_t *handle,
-+                              struct buffer_head *head,
-+                              unsigned from,
-+                              unsigned to,
-+                              int *partial,
-+                              int (*fn)(      handle_t *handle,
-+                                              struct buffer_head *bh))
-+{
-+      struct buffer_head *bh;
-+      unsigned block_start, block_end;
-+      unsigned blocksize = head->b_size;
-+      int err, ret = 0;
-+
-+      for (   bh = head, block_start = 0;
-+              ret == 0 && (bh != head || !block_start);
-+              block_start = block_end, bh = bh->b_this_page)
-+      {
-+              block_end = block_start + blocksize;
-+              if (block_end <= from || block_start >= to) {
-+                      if (partial && !buffer_uptodate(bh))
-+                              *partial = 1;
-+                      continue;
-+              }
-+              err = (*fn)(handle, bh);
-+              if (!ret)
-+                      ret = err;
-+      }
-+      return ret;
-+}
-+
-+/*
-+ * To preserve ordering, it is essential that the hole instantiation and
-+ * the data write be encapsulated in a single transaction.  We cannot
-+ * close off a transaction and start a new one between the ext3_get_block()
-+ * and the commit_write().  So doing the journal_start at the start of
-+ * prepare_write() is the right place.
-+ *
-+ * Also, this function can nest inside ext3_writepage() ->
-+ * block_write_full_page(). In that case, we *know* that ext3_writepage()
-+ * has generated enough buffer credits to do the whole page.  So we won't
-+ * block on the journal in that case, which is good, because the caller may
-+ * be PF_MEMALLOC.
-+ *
-+ * By accident, ext3 can be reentered when a transaction is open via
-+ * quota file writes.  If we were to commit the transaction while thus
-+ * reentered, there can be a deadlock - we would be holding a quota
-+ * lock, and the commit would never complete if another thread had a
-+ * transaction open and was blocking on the quota lock - a ranking
-+ * violation.
-+ *
-+ * So what we do is to rely on the fact that journal_stop/journal_start
-+ * will _not_ run commit under these circumstances because handle->h_ref
-+ * is elevated.  We'll still have enough credits for the tiny quotafile
-+ * write.  
-+ */
-+
-+static int do_journal_get_write_access(handle_t *handle, 
-+                                     struct buffer_head *bh)
-+{
-+      return ext3_journal_get_write_access(handle, bh);
-+}
-+
-+static int ext3_prepare_write(struct file *file, struct page *page,
-+                            unsigned from, unsigned to)
-+{
-+      struct inode *inode = page->mapping->host;
-+      int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
-+      handle_t *handle;
-+
-+      lock_kernel();
-+      handle = ext3_journal_start(inode, needed_blocks);
-+      if (IS_ERR(handle)) {
-+              ret = PTR_ERR(handle);
-+              goto out;
-+      }
-+      unlock_kernel();
-+      ret = block_prepare_write(page, from, to, ext3_get_block);
-+      lock_kernel();
-+      if (ret != 0)
-+              goto prepare_write_failed;
-+
-+      if (ext3_should_journal_data(inode)) {
-+              ret = walk_page_buffers(handle, page->buffers,
-+                              from, to, NULL, do_journal_get_write_access);
-+              if (ret) {
-+                      /*
-+                       * We're going to fail this prepare_write(),
-+                       * so commit_write() will not be called.
-+                       * We need to undo block_prepare_write()'s kmap().
-+                       * AKPM: Do we need to clear PageUptodate?  I don't
-+                       * think so.
-+                       */
-+                      kunmap(page);
-+              }
-+      }
-+prepare_write_failed:
-+      if (ret)
-+              ext3_journal_stop(handle, inode);
-+out:
-+      unlock_kernel();
-+      return ret;
-+}
-+
-+static int journal_dirty_sync_data(handle_t *handle, struct buffer_head *bh)
-+{
-+      return ext3_journal_dirty_data(handle, bh, 0);
-+}
-+
-+/*
-+ * For ext3_writepage().  We also brelse() the buffer to account for
-+ * the bget() which ext3_writepage() performs.
-+ */
-+static int journal_dirty_async_data(handle_t *handle, struct buffer_head *bh)
-+{
-+      int ret = ext3_journal_dirty_data(handle, bh, 1);
-+      __brelse(bh);
-+      return ret;
-+}
-+
-+/* For commit_write() in data=journal mode */
-+static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
-+{
-+      set_bit(BH_Uptodate, &bh->b_state);
-+      return ext3_journal_dirty_metadata(handle, bh);
-+}
-+
-+/*
-+ * We need to pick up the new inode size which generic_commit_write gave us
-+ * `file' can be NULL - eg, when called from block_symlink().
-+ *
-+ * ext3 inode->i_dirty_buffers policy:  If we're journalling data we
-+ * definitely don't want them to appear on the inode at all - instead
-+ * we need to manage them at the JBD layer and we need to intercept
-+ * the relevant sync operations and translate them into journal operations.
-+ *
-+ * If we're not journalling data then we can just leave the buffers
-+ * on ->i_dirty_buffers.  If someone writes them out for us then thanks.
-+ * Otherwise we'll do it in commit, if we're using ordered data.
-+ */
-+
-+static int ext3_commit_write(struct file *file, struct page *page,
-+                           unsigned from, unsigned to)
-+{
-+      handle_t *handle = ext3_journal_current_handle();
-+      struct inode *inode = page->mapping->host;
-+      int ret = 0, ret2;
-+
-+      lock_kernel();
-+      if (ext3_should_journal_data(inode)) {
-+              /*
-+               * Here we duplicate the generic_commit_write() functionality
-+               */
-+              int partial = 0;
-+              loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-+
-+              ret = walk_page_buffers(handle, page->buffers,
-+                      from, to, &partial, commit_write_fn);
-+              if (!partial)
-+                      SetPageUptodate(page);
-+              kunmap(page);
-+              if (pos > inode->i_size)
-+                      inode->i_size = pos;
-+              EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
-+      } else {
-+              if (ext3_should_order_data(inode)) {
-+                      ret = walk_page_buffers(handle, page->buffers,
-+                              from, to, NULL, journal_dirty_sync_data);
-+              }
-+              /* Be careful here if generic_commit_write becomes a
-+               * required invocation after block_prepare_write. */
-+              if (ret == 0) {
-+                      ret = generic_commit_write(file, page, from, to);
-+              } else {
-+                      /*
-+                       * block_prepare_write() was called, but we're not
-+                       * going to call generic_commit_write().  So we
-+                       * need to perform generic_commit_write()'s kunmap
-+                       * by hand.
-+                       */
-+                      kunmap(page);
-+              }
-+      }
-+      if (inode->i_size > inode->u.ext3_i.i_disksize) {
-+              inode->u.ext3_i.i_disksize = inode->i_size;
-+              ret2 = ext3_mark_inode_dirty(handle, inode);
-+              if (!ret) 
-+                      ret = ret2;
-+      }
-+      ret2 = ext3_journal_stop(handle, inode);
-+      unlock_kernel();
-+      if (!ret)
-+              ret = ret2;
-+      return ret;
-+}
-+
-+/* 
-+ * bmap() is special.  It gets used by applications such as lilo and by
-+ * the swapper to find the on-disk block of a specific piece of data.
-+ *
-+ * Naturally, this is dangerous if the block concerned is still in the
-+ * journal.  If somebody makes a swapfile on an ext3 data-journaling
-+ * filesystem and enables swap, then they may get a nasty shock when the
-+ * data getting swapped to that swapfile suddenly gets overwritten by
-+ * the original zero's written out previously to the journal and
-+ * awaiting writeback in the kernel's buffer cache. 
-+ *
-+ * So, if we see any bmap calls here on a modified, data-journaled file,
-+ * take extra steps to flush any blocks which might be in the cache. 
-+ */
-+static int ext3_bmap(struct address_space *mapping, long block)
-+{
-+      struct inode *inode = mapping->host;
-+      journal_t *journal;
-+      int err;
-+      
-+      if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
-+              /* 
-+               * This is a REALLY heavyweight approach, but the use of
-+               * bmap on dirty files is expected to be extremely rare:
-+               * only if we run lilo or swapon on a freshly made file
-+               * do we expect this to happen. 
-+               *
-+               * (bmap requires CAP_SYS_RAWIO so this does not
-+               * represent an unprivileged user DOS attack --- we'd be
-+               * in trouble if mortal users could trigger this path at
-+               * will.) 
-+               *
-+               * NB. EXT3_STATE_JDATA is not set on files other than
-+               * regular files.  If somebody wants to bmap a directory
-+               * or symlink and gets confused because the buffer
-+               * hasn't yet been flushed to disk, they deserve
-+               * everything they get.
-+               */
-+              
-+              EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
-+              journal = EXT3_JOURNAL(inode);
-+              journal_lock_updates(journal);
-+              err = journal_flush(journal);
-+              journal_unlock_updates(journal);
-+              
-+              if (err)
-+                      return 0;
-+      }
-+      
-+      return generic_block_bmap(mapping,block,ext3_get_block);
-+}
-+
-+static int bget_one(handle_t *handle, struct buffer_head *bh)
-+{
-+      atomic_inc(&bh->b_count);
-+      return 0;
-+}
-+
-+/*
-+ * Note that we always start a transaction even if we're not journalling
-+ * data.  This is to preserve ordering: any hole instantiation within
-+ * __block_write_full_page -> ext3_get_block() should be journalled
-+ * along with the data so we don't crash and then get metadata which
-+ * refers to old data.
-+ *
-+ * In all journalling modes block_write_full_page() will start the I/O.
-+ *
-+ * Problem:
-+ *
-+ *    ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
-+ *            ext3_writepage()
-+ *
-+ * Similar for:
-+ *
-+ *    ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
-+ *
-+ * Same applies to ext3_get_block().  We will deadlock on various things like
-+ * lock_journal and i_truncate_sem.
-+ *
-+ * Setting PF_MEMALLOC here doesn't work - too many internal memory
-+ * allocations fail.
-+ *
-+ * 16May01: If we're reentered then journal_current_handle() will be
-+ *        non-zero. We simply *return*.
-+ *
-+ * 1 July 2001: @@@ FIXME:
-+ *   In journalled data mode, a data buffer may be metadata against the
-+ *   current transaction.  But the same file is part of a shared mapping
-+ *   and someone does a writepage() on it.
-+ *
-+ *   We will move the buffer onto the async_data list, but *after* it has
-+ *   been dirtied. So there's a small window where we have dirty data on
-+ *   BJ_Metadata.
-+ *
-+ *   Note that this only applies to the last partial page in the file.  The
-+ *   bit which block_write_full_page() uses prepare/commit for.  (That's
-+ *   broken code anyway: it's wrong for msync()).
-+ *
-+ *   It's a rare case: affects the final partial page, for journalled data
-+ *   where the file is subject to bith write() and writepage() in the same
-+ *   transction.  To fix it we'll need a custom block_write_full_page().
-+ *   We'll probably need that anyway for journalling writepage() output.
-+ *
-+ * We don't honour synchronous mounts for writepage().  That would be
-+ * disastrous.  Any write() or metadata operation will sync the fs for
-+ * us.
-+ */
-+static int ext3_writepage(struct page *page)
-+{
-+      struct inode *inode = page->mapping->host;
-+      struct buffer_head *page_buffers;
-+      handle_t *handle = NULL;
-+      int ret = 0, err;
-+      int needed;
-+      int order_data;
-+
-+      J_ASSERT(PageLocked(page));
-+      
-+      /*
-+       * We give up here if we're reentered, because it might be
-+       * for a different filesystem.  One *could* look for a
-+       * nested transaction opportunity.
-+       */
-+      lock_kernel();
-+      if (ext3_journal_current_handle())
-+              goto out_fail;
-+
-+      needed = ext3_writepage_trans_blocks(inode);
-+      if (current->flags & PF_MEMALLOC)
-+              handle = ext3_journal_try_start(inode, needed);
-+      else
-+              handle = ext3_journal_start(inode, needed);
-+                              
-+      if (IS_ERR(handle)) {
-+              ret = PTR_ERR(handle);
-+              goto out_fail;
-+      }
-+
-+      order_data = ext3_should_order_data(inode) ||
-+                      ext3_should_journal_data(inode);
-+
-+      unlock_kernel();
-+
-+      page_buffers = NULL;    /* Purely to prevent compiler warning */
-+
-+      /* bget() all the buffers */
-+      if (order_data) {
-+              if (!page->buffers)
-+                      create_empty_buffers(page,
-+                              inode->i_dev, inode->i_sb->s_blocksize);
-+              page_buffers = page->buffers;
-+              walk_page_buffers(handle, page_buffers, 0,
-+                              PAGE_CACHE_SIZE, NULL, bget_one);
-+      }
-+
-+      ret = block_write_full_page(page, ext3_get_block);
-+
-+      /*
-+       * The page can become unlocked at any point now, and
-+       * truncate can then come in and change things.  So we
-+       * can't touch *page from now on.  But *page_buffers is
-+       * safe due to elevated refcount.
-+       */
-+
-+      handle = ext3_journal_current_handle();
-+      lock_kernel();
-+
-+      /* And attach them to the current transaction */
-+      if (order_data) {
-+              err = walk_page_buffers(handle, page_buffers,
-+                      0, PAGE_CACHE_SIZE, NULL, journal_dirty_async_data);
-+              if (!ret)
-+                      ret = err;
-+      }
-+
-+      err = ext3_journal_stop(handle, inode);
-+      if (!ret)
-+              ret = err;
-+      unlock_kernel();
-+      return ret;
-+
-+out_fail:
-+      
-+      unlock_kernel();
-+      SetPageDirty(page);
-+      UnlockPage(page);
-+      return ret;
-+}
-+
-+static int ext3_readpage(struct file *file, struct page *page)
-+{
-+      return block_read_full_page(page,ext3_get_block);
-+}
-+
-+
-+static int ext3_flushpage(struct page *page, unsigned long offset)
-+{
-+      journal_t *journal = EXT3_JOURNAL(page->mapping->host);
-+      return journal_flushpage(journal, page, offset);
-+}
-+
-+static int ext3_releasepage(struct page *page, int wait)
-+{
-+      journal_t *journal = EXT3_JOURNAL(page->mapping->host);
-+      return journal_try_to_free_buffers(journal, page, wait);
-+}
-+
-+
-+struct address_space_operations ext3_aops = {
-+      readpage:       ext3_readpage,          /* BKL not held.  Don't need */
-+      writepage:      ext3_writepage,         /* BKL not held.  We take it */
-+      sync_page:      block_sync_page,
-+      prepare_write:  ext3_prepare_write,     /* BKL not held.  We take it */
-+      commit_write:   ext3_commit_write,      /* BKL not held.  We take it */
-+      bmap:           ext3_bmap,              /* BKL held */
-+      flushpage:      ext3_flushpage,         /* BKL not held.  Don't need */
-+      releasepage:    ext3_releasepage,       /* BKL not held.  Don't need */
-+};
-+
-+/*
-+ * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
-+ * up to the end of the block which corresponds to `from'.
-+ * This required during truncate. We need to physically zero the tail end
-+ * of that block so it doesn't yield old data if the file is later grown.
-+ */
-+static int ext3_block_truncate_page(handle_t *handle,
-+              struct address_space *mapping, loff_t from)
-+{
-+      unsigned long index = from >> PAGE_CACHE_SHIFT;
-+      unsigned offset = from & (PAGE_CACHE_SIZE-1);
-+      unsigned blocksize, iblock, length, pos;
-+      struct inode *inode = mapping->host;
-+      struct page *page;
-+      struct buffer_head *bh;
-+      int err;
-+
-+      blocksize = inode->i_sb->s_blocksize;
-+      length = offset & (blocksize - 1);
-+
-+      /* Block boundary? Nothing to do */
-+      if (!length)
-+              return 0;
-+
-+      length = blocksize - length;
-+      iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-+
-+      page = grab_cache_page(mapping, index);
-+      err = -ENOMEM;
-+      if (!page)
-+              goto out;
-+
-+      if (!page->buffers)
-+              create_empty_buffers(page, inode->i_dev, blocksize);
-+
-+      /* Find the buffer that contains "offset" */
-+      bh = page->buffers;
-+      pos = blocksize;
-+      while (offset >= pos) {
-+              bh = bh->b_this_page;
-+              iblock++;
-+              pos += blocksize;
-+      }
-+
-+      err = 0;
-+      if (!buffer_mapped(bh)) {
-+              /* Hole? Nothing to do */
-+              if (buffer_uptodate(bh))
-+                      goto unlock;
-+              ext3_get_block(inode, iblock, bh, 0);
-+              /* Still unmapped? Nothing to do */
-+              if (!buffer_mapped(bh))
-+                      goto unlock;
-+      }
-+
-+      /* Ok, it's mapped. Make sure it's up-to-date */
-+      if (Page_Uptodate(page))
-+              set_bit(BH_Uptodate, &bh->b_state);
-+
-+      if (!buffer_uptodate(bh)) {
-+              err = -EIO;
-+              ll_rw_block(READ, 1, &bh);
-+              wait_on_buffer(bh);
-+              /* Uhhuh. Read error. Complain and punt. */
-+              if (!buffer_uptodate(bh))
-+                      goto unlock;
-+      }
-+
-+      if (ext3_should_journal_data(inode)) {
-+              BUFFER_TRACE(bh, "get write access");
-+              err = ext3_journal_get_write_access(handle, bh);
-+              if (err)
-+                      goto unlock;
-+      }
-+      
-+      memset(kmap(page) + offset, 0, length);
-+      flush_dcache_page(page);
-+      kunmap(page);
-+
-+      BUFFER_TRACE(bh, "zeroed end of block");
-+
-+      err = 0;
-+      if (ext3_should_journal_data(inode)) {
-+              err = ext3_journal_dirty_metadata(handle, bh);
-+      } else {
-+              if (ext3_should_order_data(inode))
-+                      err = ext3_journal_dirty_data(handle, bh, 0);
-+              __mark_buffer_dirty(bh);
-+      }
-+
-+unlock:
-+      UnlockPage(page);
-+      page_cache_release(page);
-+out:
-+      return err;
-+}
-+
-+/*
-+ * Probably it should be a library function... search for first non-zero word
-+ * or memcmp with zero_page, whatever is better for particular architecture.
-+ * Linus?
-+ */
-+static inline int all_zeroes(u32 *p, u32 *q)
-+{
-+      while (p < q)
-+              if (*p++)
-+                      return 0;
-+      return 1;
-+}
-+
-+/**
-+ *    ext3_find_shared - find the indirect blocks for partial truncation.
-+ *    @inode:   inode in question
-+ *    @depth:   depth of the affected branch
-+ *    @offsets: offsets of pointers in that branch (see ext3_block_to_path)
-+ *    @chain:   place to store the pointers to partial indirect blocks
-+ *    @top:     place to the (detached) top of branch
-+ *
-+ *    This is a helper function used by ext3_truncate().
-+ *
-+ *    When we do truncate() we may have to clean the ends of several
-+ *    indirect blocks but leave the blocks themselves alive. Block is
-+ *    partially truncated if some data below the new i_size is refered
-+ *    from it (and it is on the path to the first completely truncated
-+ *    data block, indeed).  We have to free the top of that path along
-+ *    with everything to the right of the path. Since no allocation
-+ *    past the truncation point is possible until ext3_truncate()
-+ *    finishes, we may safely do the latter, but top of branch may
-+ *    require special attention - pageout below the truncation point
-+ *    might try to populate it.
-+ *
-+ *    We atomically detach the top of branch from the tree, store the
-+ *    block number of its root in *@top, pointers to buffer_heads of
-+ *    partially truncated blocks - in @chain[].bh and pointers to
-+ *    their last elements that should not be removed - in
-+ *    @chain[].p. Return value is the pointer to last filled element
-+ *    of @chain.
-+ *
-+ *    The work left to caller to do the actual freeing of subtrees:
-+ *            a) free the subtree starting from *@top
-+ *            b) free the subtrees whose roots are stored in
-+ *                    (@chain[i].p+1 .. end of @chain[i].bh->b_data)
-+ *            c) free the subtrees growing from the inode past the @chain[0].
-+ *                    (no partially truncated stuff there).  */
-+
-+static Indirect *ext3_find_shared(struct inode *inode,
-+                              int depth,
-+                              int offsets[4],
-+                              Indirect chain[4],
-+                              u32 *top)
-+{
-+      Indirect *partial, *p;
-+      int k, err;
-+
-+      *top = 0;
-+      /* Make k index the deepest non-null offest + 1 */
-+      for (k = depth; k > 1 && !offsets[k-1]; k--)
-+              ;
-+      partial = ext3_get_branch(inode, k, offsets, chain, &err);
-+      /* Writer: pointers */
-+      if (!partial)
-+              partial = chain + k-1;
-+      /*
-+       * If the branch acquired continuation since we've looked at it -
-+       * fine, it should all survive and (new) top doesn't belong to us.
-+       */
-+      if (!partial->key && *partial->p)
-+              /* Writer: end */
-+              goto no_top;
-+      for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
-+              ;
-+      /*
-+       * OK, we've found the last block that must survive. The rest of our
-+       * branch should be detached before unlocking. However, if that rest
-+       * of branch is all ours and does not grow immediately from the inode
-+       * it's easier to cheat and just decrement partial->p.
-+       */
-+      if (p == chain + k - 1 && p > chain) {
-+              p->p--;
-+      } else {
-+              *top = *p->p;
-+              /* Nope, don't do this in ext3.  Must leave the tree intact */
-+#if 0
-+              *p->p = 0;
-+#endif
-+      }
-+      /* Writer: end */
-+
-+      while(partial > p)
-+      {
-+              brelse(partial->bh);
-+              partial--;
-+      }
-+no_top:
-+      return partial;
-+}
-+
-+/*
-+ * Zero a number of block pointers in either an inode or an indirect block.
-+ * If we restart the transaction we must again get write access to the
-+ * indirect block for further modification.
-+ *
-+ * We release `count' blocks on disk, but (last - first) may be greater
-+ * than `count' because there can be holes in there.
-+ */
-+static void
-+ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
-+              unsigned long block_to_free, unsigned long count,
-+              u32 *first, u32 *last)
-+{
-+      u32 *p;
-+      if (try_to_extend_transaction(handle, inode)) {
-+              if (bh) {
-+                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+                      ext3_journal_dirty_metadata(handle, bh);
-+              }
-+              ext3_mark_inode_dirty(handle, inode);
-+              ext3_journal_test_restart(handle, inode);
-+              BUFFER_TRACE(bh, "get_write_access");
-+              ext3_journal_get_write_access(handle, bh);
-+      }
-+
-+      /*
-+       * Any buffers which are on the journal will be in memory. We find
-+       * them on the hash table so journal_revoke() will run journal_forget()
-+       * on them.  We've already detached each block from the file, so
-+       * bforget() in journal_forget() should be safe.
-+       *
-+       * AKPM: turn on bforget in journal_forget()!!!
-+       */
-+      for (p = first; p < last; p++) {
-+              u32 nr = le32_to_cpu(*p);
-+              if (nr) {
-+                      struct buffer_head *bh;
-+
-+                      *p = 0;
-+                      bh = sb_get_hash_table(inode->i_sb, nr);
-+                      ext3_forget(handle, 0, inode, bh, nr);
-+              }
-+      }
-+
-+      ext3_free_blocks(handle, inode, block_to_free, count);
-+}
-+
-+/**
-+ * ext3_free_data - free a list of data blocks
-+ * @handle:   handle for this transaction
-+ * @inode:    inode we are dealing with
-+ * @this_bh:  indirect buffer_head which contains *@first and *@last
-+ * @first:    array of block numbers
-+ * @last:     points immediately past the end of array
-+ *
-+ * We are freeing all blocks refered from that array (numbers are stored as
-+ * little-endian 32-bit) and updating @inode->i_blocks appropriately.
-+ *
-+ * We accumulate contiguous runs of blocks to free.  Conveniently, if these
-+ * blocks are contiguous then releasing them at one time will only affect one
-+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
-+ * actually use a lot of journal space.
-+ *
-+ * @this_bh will be %NULL if @first and @last point into the inode's direct
-+ * block pointers.
-+ */
-+static void ext3_free_data(handle_t *handle, struct inode *inode,
-+                         struct buffer_head *this_bh, u32 *first, u32 *last)
-+{
-+      unsigned long block_to_free = 0;    /* Starting block # of a run */
-+      unsigned long count = 0;            /* Number of blocks in the run */ 
-+      u32 *block_to_free_p = NULL;        /* Pointer into inode/ind
-+                                             corresponding to
-+                                             block_to_free */
-+      unsigned long nr;                   /* Current block # */
-+      u32 *p;                             /* Pointer into inode/ind
-+                                             for current block */
-+      int err;
-+
-+      if (this_bh) {                          /* For indirect block */
-+              BUFFER_TRACE(this_bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, this_bh);
-+              /* Important: if we can't update the indirect pointers
-+               * to the blocks, we can't free them. */
-+              if (err)
-+                      return;
-+      }
-+
-+      for (p = first; p < last; p++) {
-+              nr = le32_to_cpu(*p);
-+              if (nr) {
-+                      /* accumulate blocks to free if they're contiguous */
-+                      if (count == 0) {
-+                              block_to_free = nr;
-+                              block_to_free_p = p;
-+                              count = 1;
-+                      } else if (nr == block_to_free + count) {
-+                              count++;
-+                      } else {
-+                              ext3_clear_blocks(handle, inode, this_bh, 
-+                                                block_to_free,
-+                                                count, block_to_free_p, p);
-+                              block_to_free = nr;
-+                              block_to_free_p = p;
-+                              count = 1;
-+                      }
-+              }
-+      }
-+
-+      if (count > 0)
-+              ext3_clear_blocks(handle, inode, this_bh, block_to_free,
-+                                count, block_to_free_p, p);
-+
-+      if (this_bh) {
-+              BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-+              ext3_journal_dirty_metadata(handle, this_bh);
-+      }
-+}
-+
-+/**
-+ *    ext3_free_branches - free an array of branches
-+ *    @handle: JBD handle for this transaction
-+ *    @inode: inode we are dealing with
-+ *    @parent_bh: the buffer_head which contains *@first and *@last
-+ *    @first: array of block numbers
-+ *    @last:  pointer immediately past the end of array
-+ *    @depth: depth of the branches to free
-+ *
-+ *    We are freeing all blocks refered from these branches (numbers are
-+ *    stored as little-endian 32-bit) and updating @inode->i_blocks
-+ *    appropriately.
-+ */
-+static void ext3_free_branches(handle_t *handle, struct inode *inode,
-+                             struct buffer_head *parent_bh,
-+                             u32 *first, u32 *last, int depth)
-+{
-+      unsigned long nr;
-+      u32 *p;
-+
-+      if (is_handle_aborted(handle))
-+              return;
-+      
-+      if (depth--) {
-+              struct buffer_head *bh;
-+              int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-+              p = last;
-+              while (--p >= first) {
-+                      nr = le32_to_cpu(*p);
-+                      if (!nr)
-+                              continue;               /* A hole */
-+
-+                      /* Go read the buffer for the next level down */
-+                      bh = sb_bread(inode->i_sb, nr);
-+
-+                      /*
-+                       * A read failure? Report error and clear slot
-+                       * (should be rare).
-+                       */
-+                      if (!bh) {
-+                              ext3_error(inode->i_sb, "ext3_free_branches",
-+                                         "Read failure, inode=%ld, block=%ld",
-+                                         inode->i_ino, nr);
-+                              continue;
-+                      }
-+
-+                      /* This zaps the entire block.  Bottom up. */
-+                      BUFFER_TRACE(bh, "free child branches");
-+                      ext3_free_branches(handle, inode, bh, (u32*)bh->b_data,
-+                                         (u32*)bh->b_data + addr_per_block,
-+                                         depth);
-+
-+                      /*
-+                       * We've probably journalled the indirect block several
-+                       * times during the truncate.  But it's no longer
-+                       * needed and we now drop it from the transaction via
-+                       * journal_revoke().
-+                       *
-+                       * That's easy if it's exclusively part of this
-+                       * transaction.  But if it's part of the committing
-+                       * transaction then journal_forget() will simply
-+                       * brelse() it.  That means that if the underlying
-+                       * block is reallocated in ext3_get_block(),
-+                       * unmap_underlying_metadata() will find this block
-+                       * and will try to get rid of it.  damn, damn.
-+                       *
-+                       * If this block has already been committed to the
-+                       * journal, a revoke record will be written.  And
-+                       * revoke records must be emitted *before* clearing
-+                       * this block's bit in the bitmaps.
-+                       */
-+                      ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
-+
-+                      /*
-+                       * Everything below this this pointer has been
-+                       * released.  Now let this top-of-subtree go.
-+                       *
-+                       * We want the freeing of this indirect block to be
-+                       * atomic in the journal with the updating of the
-+                       * bitmap block which owns it.  So make some room in
-+                       * the journal.
-+                       *
-+                       * We zero the parent pointer *after* freeing its
-+                       * pointee in the bitmaps, so if extend_transaction()
-+                       * for some reason fails to put the bitmap changes and
-+                       * the release into the same transaction, recovery
-+                       * will merely complain about releasing a free block,
-+                       * rather than leaking blocks.
-+                       */
-+                      if (is_handle_aborted(handle))
-+                              return;
-+                      if (try_to_extend_transaction(handle, inode)) {
-+                              ext3_mark_inode_dirty(handle, inode);
-+                              ext3_journal_test_restart(handle, inode);
-+                      }
-+
-+                      ext3_free_blocks(handle, inode, nr, 1);
-+
-+                      if (parent_bh) {
-+                              /*
-+                               * The block which we have just freed is
-+                               * pointed to by an indirect block: journal it
-+                               */
-+                              BUFFER_TRACE(parent_bh, "get_write_access");
-+                              if (!ext3_journal_get_write_access(handle,
-+                                                                 parent_bh)){
-+                                      *p = 0;
-+                                      BUFFER_TRACE(parent_bh,
-+                                      "call ext3_journal_dirty_metadata");
-+                                      ext3_journal_dirty_metadata(handle, 
-+                                                                  parent_bh);
-+                              }
-+                      }
-+              }
-+      } else {
-+              /* We have reached the bottom of the tree. */
-+              BUFFER_TRACE(parent_bh, "free data blocks");
-+              ext3_free_data(handle, inode, parent_bh, first, last);
-+      }
-+}
-+
-+/*
-+ * ext3_truncate()
-+ *
-+ * We block out ext3_get_block() block instantiations across the entire
-+ * transaction, and VFS/VM ensures that ext3_truncate() cannot run
-+ * simultaneously on behalf of the same inode.
-+ *
-+ * As we work through the truncate and commmit bits of it to the journal there
-+ * is one core, guiding principle: the file's tree must always be consistent on
-+ * disk.  We must be able to restart the truncate after a crash.
-+ *
-+ * The file's tree may be transiently inconsistent in memory (although it
-+ * probably isn't), but whenever we close off and commit a journal transaction,
-+ * the contents of (the filesystem + the journal) must be consistent and
-+ * restartable.  It's pretty simple, really: bottom up, right to left (although
-+ * left-to-right works OK too).
-+ *
-+ * Note that at recovery time, journal replay occurs *before* the restart of
-+ * truncate against the orphan inode list.
-+ *
-+ * The committed inode has the new, desired i_size (which is the same as
-+ * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
-+ * that this inode's truncate did not complete and it will again call
-+ * ext3_truncate() to have another go.  So there will be instantiated blocks
-+ * to the right of the truncation point in a crashed ext3 filesystem.  But
-+ * that's fine - as long as they are linked from the inode, the post-crash
-+ * ext3_truncate() run will find them and release them.
-+ */
-+
-+void ext3_truncate(struct inode * inode)
-+{
-+      handle_t *handle;
-+      u32 *i_data = inode->u.ext3_i.i_data;
-+      int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-+      int offsets[4];
-+      Indirect chain[4];
-+      Indirect *partial;
-+      int nr = 0;
-+      int n;
-+      long last_block;
-+      unsigned blocksize;
-+
-+      if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-+          S_ISLNK(inode->i_mode)))
-+              return;
-+      if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-+              return;
-+
-+      ext3_discard_prealloc(inode);
-+
-+      handle = start_transaction(inode);
-+      if (IS_ERR(handle))
-+              return;         /* AKPM: return what? */
-+
-+      blocksize = inode->i_sb->s_blocksize;
-+      last_block = (inode->i_size + blocksize-1)
-+                                      >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-+
-+      ext3_block_truncate_page(handle, inode->i_mapping, inode->i_size);
-+              
-+
-+      n = ext3_block_to_path(inode, last_block, offsets);
-+      if (n == 0)
-+              goto out_stop;  /* error */
-+
-+      /*
-+       * OK.  This truncate is going to happen.  We add the inode to the
-+       * orphan list, so that if this truncate spans multiple transactions,
-+       * and we crash, we will resume the truncate when the filesystem
-+       * recovers.  It also marks the inode dirty, to catch the new size.
-+       *
-+       * Implication: the file must always be in a sane, consistent
-+       * truncatable state while each transaction commits.
-+       */
-+      if (ext3_orphan_add(handle, inode))
-+              goto out_stop;
-+
-+      /*
-+       * The orphan list entry will now protect us from any crash which
-+       * occurs before the truncate completes, so it is now safe to propagate
-+       * the new, shorter inode size (held for now in i_size) into the
-+       * on-disk inode. We do this via i_disksize, which is the value which
-+       * ext3 *really* writes onto the disk inode.
-+       */
-+      inode->u.ext3_i.i_disksize = inode->i_size;
-+
-+      /*
-+       * From here we block out all ext3_get_block() callers who want to
-+       * modify the block allocation tree.
-+       */
-+      down_write(&inode->u.ext3_i.truncate_sem);
-+
-+      if (n == 1) {           /* direct blocks */
-+              ext3_free_data(handle, inode, NULL, i_data+offsets[0],
-+                             i_data + EXT3_NDIR_BLOCKS);
-+              goto do_indirects;
-+      }
-+
-+      partial = ext3_find_shared(inode, n, offsets, chain, &nr);
-+      /* Kill the top of shared branch (not detached) */
-+      if (nr) {
-+              if (partial == chain) {
-+                      /* Shared branch grows from the inode */
-+                      ext3_free_branches(handle, inode, NULL,
-+                                         &nr, &nr+1, (chain+n-1) - partial);
-+                      *partial->p = 0;
-+                      /*
-+                       * We mark the inode dirty prior to restart,
-+                       * and prior to stop.  No need for it here.
-+                       */
-+              } else {
-+                      /* Shared branch grows from an indirect block */
-+                      BUFFER_TRACE(partial->bh, "get_write_access");
-+                      ext3_free_branches(handle, inode, partial->bh,
-+                                      partial->p,
-+                                      partial->p+1, (chain+n-1) - partial);
-+              }
-+      }
-+      /* Clear the ends of indirect blocks on the shared branch */
-+      while (partial > chain) {
-+              ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
-+                                 (u32*)partial->bh->b_data + addr_per_block,
-+                                 (chain+n-1) - partial);
-+              BUFFER_TRACE(partial->bh, "call brelse");
-+              brelse (partial->bh);
-+              partial--;
-+      }
-+do_indirects:
-+      /* Kill the remaining (whole) subtrees */
-+      switch (offsets[0]) {
-+              default:
-+                      nr = i_data[EXT3_IND_BLOCK];
-+                      if (nr) {
-+                              ext3_free_branches(handle, inode, NULL,
-+                                                 &nr, &nr+1, 1);
-+                              i_data[EXT3_IND_BLOCK] = 0;
-+                      }
-+              case EXT3_IND_BLOCK:
-+                      nr = i_data[EXT3_DIND_BLOCK];
-+                      if (nr) {
-+                              ext3_free_branches(handle, inode, NULL,
-+                                                 &nr, &nr+1, 2);
-+                              i_data[EXT3_DIND_BLOCK] = 0;
-+                      }
-+              case EXT3_DIND_BLOCK:
-+                      nr = i_data[EXT3_TIND_BLOCK];
-+                      if (nr) {
-+                              ext3_free_branches(handle, inode, NULL,
-+                                                 &nr, &nr+1, 3);
-+                              i_data[EXT3_TIND_BLOCK] = 0;
-+                      }
-+              case EXT3_TIND_BLOCK:
-+                      ;
-+      }
-+      up_write(&inode->u.ext3_i.truncate_sem);
-+      inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-+      ext3_mark_inode_dirty(handle, inode);
-+
-+      /* In a multi-transaction truncate, we only make the final
-+       * transaction synchronous */
-+      if (IS_SYNC(inode))
-+              handle->h_sync = 1;
-+out_stop:
-+      /*
-+       * If this was a simple ftruncate(), and the file will remain alive
-+       * then we need to clear up the orphan record which we created above.
-+       * However, if this was a real unlink then we were called by
-+       * ext3_delete_inode(), and we allow that function to clean up the
-+       * orphan info for us.
-+       */
-+      if (inode->i_nlink)
-+              ext3_orphan_del(handle, inode);
-+
-+      ext3_journal_stop(handle, inode);
-+}
-+
-+/* 
-+ * ext3_get_inode_loc returns with an extra refcount against the
-+ * inode's underlying buffer_head on success. 
-+ */
-+
-+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
-+{
-+      struct buffer_head *bh = 0;
-+      unsigned long block;
-+      unsigned long block_group;
-+      unsigned long group_desc;
-+      unsigned long desc;
-+      unsigned long offset;
-+      struct ext3_group_desc * gdp;
-+              
-+      if ((inode->i_ino != EXT3_ROOT_INO &&
-+              inode->i_ino != EXT3_ACL_IDX_INO &&
-+              inode->i_ino != EXT3_ACL_DATA_INO &&
-+              inode->i_ino != EXT3_JOURNAL_INO &&
-+              inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
-+              inode->i_ino > le32_to_cpu(
-+                      inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) {
-+              ext3_error (inode->i_sb, "ext3_get_inode_loc",
-+                          "bad inode number: %lu", inode->i_ino);
-+              goto bad_inode;
-+      }
-+      block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
-+      if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) {
-+              ext3_error (inode->i_sb, "ext3_get_inode_loc",
-+                          "group >= groups count");
-+              goto bad_inode;
-+      }
-+      group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
-+      desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
-+      bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
-+      if (!bh) {
-+              ext3_error (inode->i_sb, "ext3_get_inode_loc",
-+                          "Descriptor not loaded");
-+              goto bad_inode;
-+      }
-+
-+      gdp = (struct ext3_group_desc *) bh->b_data;
-+      /*
-+       * Figure out the offset within the block group inode table
-+       */
-+      offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) *
-+              EXT3_INODE_SIZE(inode->i_sb);
-+      block = le32_to_cpu(gdp[desc].bg_inode_table) +
-+              (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
-+      if (!(bh = sb_bread(inode->i_sb, block))) {
-+              ext3_error (inode->i_sb, "ext3_get_inode_loc",
-+                          "unable to read inode block - "
-+                          "inode=%lu, block=%lu", inode->i_ino, block);
-+              goto bad_inode;
-+      }
-+      offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
-+
-+      iloc->bh = bh;
-+      iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
-+      iloc->block_group = block_group;
-+      
-+      return 0;
-+      
-+ bad_inode:
-+      return -EIO;
-+}
-+
-+void ext3_read_inode(struct inode * inode)
-+{
-+      struct ext3_iloc iloc;
-+      struct ext3_inode *raw_inode;
-+      struct buffer_head *bh;
-+      int block;
-+      
-+      if(ext3_get_inode_loc(inode, &iloc))
-+              goto bad_inode;
-+      bh = iloc.bh;
-+      raw_inode = iloc.raw_inode;
-+      init_rwsem(&inode->u.ext3_i.truncate_sem);
-+      inode->i_mode = le16_to_cpu(raw_inode->i_mode);
-+      inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
-+      inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
-+      if(!(test_opt (inode->i_sb, NO_UID32))) {
-+              inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
-+              inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
-+      }
-+      inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
-+      inode->i_size = le32_to_cpu(raw_inode->i_size);
-+      inode->i_atime = le32_to_cpu(raw_inode->i_atime);
-+      inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
-+      inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
-+      inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
-+      /* We now have enough fields to check if the inode was active or not.
-+       * This is needed because nfsd might try to access dead inodes
-+       * the test is that same one that e2fsck uses
-+       * NeilBrown 1999oct15
-+       */
-+      if (inode->i_nlink == 0) {
-+              if (inode->i_mode == 0 ||
-+                  !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) {
-+                      /* this inode is deleted */
-+                      brelse (bh);
-+                      goto bad_inode;
-+              }
-+              /* The only unlinked inodes we let through here have
-+               * valid i_mode and are being read by the orphan
-+               * recovery code: that's fine, we're about to complete
-+               * the process of deleting those. */
-+      }
-+      inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
-+                                       * (for stat), not the fs block
-+                                       * size */  
-+      inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
-+      inode->i_version = ++event;
-+      inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags);
-+#ifdef EXT3_FRAGMENTS
-+      inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
-+      inode->u.ext3_i.i_frag_no = raw_inode->i_frag;
-+      inode->u.ext3_i.i_frag_size = raw_inode->i_fsize;
-+#endif
-+      inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
-+      if (!S_ISREG(inode->i_mode)) {
-+              inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
-+      } else {
-+              inode->i_size |=
-+                      ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
-+      }
-+      inode->u.ext3_i.i_disksize = inode->i_size;
-+      inode->i_generation = le32_to_cpu(raw_inode->i_generation);
-+#ifdef EXT3_PREALLOCATE
-+      inode->u.ext3_i.i_prealloc_count = 0;
-+#endif
-+      inode->u.ext3_i.i_block_group = iloc.block_group;
-+
-+      /*
-+       * NOTE! The in-memory inode i_data array is in little-endian order
-+       * even on big-endian machines: we do NOT byteswap the block numbers!
-+       */
-+      for (block = 0; block < EXT3_N_BLOCKS; block++)
-+              inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block];
-+      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
-+
-+      brelse (iloc.bh);
-+
-+      if (inode->i_ino == EXT3_ACL_IDX_INO ||
-+          inode->i_ino == EXT3_ACL_DATA_INO)
-+              /* Nothing to do */ ;
-+      else if (S_ISREG(inode->i_mode)) {
-+              inode->i_op = &ext3_file_inode_operations;
-+              inode->i_fop = &ext3_file_operations;
-+              inode->i_mapping->a_ops = &ext3_aops;
-+      } else if (S_ISDIR(inode->i_mode)) {
-+              inode->i_op = &ext3_dir_inode_operations;
-+              inode->i_fop = &ext3_dir_operations;
-+      } else if (S_ISLNK(inode->i_mode)) {
-+              if (!inode->i_blocks)
-+                      inode->i_op = &ext3_fast_symlink_inode_operations;
-+              else {
-+                      inode->i_op = &page_symlink_inode_operations;
-+                      inode->i_mapping->a_ops = &ext3_aops;
-+              }
-+      } else 
-+              init_special_inode(inode, inode->i_mode,
-+                                 le32_to_cpu(iloc.raw_inode->i_block[0]));
-+      /* inode->i_attr_flags = 0;                             unused */
-+      if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
-+              /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
-+              inode->i_flags |= S_SYNC;
-+      }
-+      if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) {
-+              /* inode->i_attr_flags |= ATTR_FLAG_APPEND;     unused */
-+              inode->i_flags |= S_APPEND;
-+      }
-+      if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) {
-+              /* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;  unused */
-+              inode->i_flags |= S_IMMUTABLE;
-+      }
-+      if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) {
-+              /* inode->i_attr_flags |= ATTR_FLAG_NOATIME;    unused */
-+              inode->i_flags |= S_NOATIME;
-+      }
-+      return;
-+      
-+bad_inode:
-+      make_bad_inode(inode);
-+      return;
-+}
-+
-+/*
-+ * Post the struct inode info into an on-disk inode location in the
-+ * buffer-cache.  This gobbles the caller's reference to the
-+ * buffer_head in the inode location struct.  
-+ */
-+
-+static int ext3_do_update_inode(handle_t *handle, 
-+                              struct inode *inode, 
-+                              struct ext3_iloc *iloc)
-+{
-+      struct ext3_inode *raw_inode = iloc->raw_inode;
-+      struct buffer_head *bh = iloc->bh;
-+      int err = 0, rc, block;
-+
-+      if (handle) {
-+              BUFFER_TRACE(bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, bh);
-+              if (err)
-+                      goto out_brelse;
-+      }
-+      raw_inode->i_mode = cpu_to_le16(inode->i_mode);
-+      if(!(test_opt(inode->i_sb, NO_UID32))) {
-+              raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
-+              raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
-+/*
-+ * Fix up interoperability with old kernels. Otherwise, old inodes get
-+ * re-used with the upper 16 bits of the uid/gid intact
-+ */
-+              if(!inode->u.ext3_i.i_dtime) {
-+                      raw_inode->i_uid_high =
-+                              cpu_to_le16(high_16_bits(inode->i_uid));
-+                      raw_inode->i_gid_high =
-+                              cpu_to_le16(high_16_bits(inode->i_gid));
-+              } else {
-+                      raw_inode->i_uid_high = 0;
-+                      raw_inode->i_gid_high = 0;
-+              }
-+      } else {
-+              raw_inode->i_uid_low =
-+                      cpu_to_le16(fs_high2lowuid(inode->i_uid));
-+              raw_inode->i_gid_low =
-+                      cpu_to_le16(fs_high2lowgid(inode->i_gid));
-+              raw_inode->i_uid_high = 0;
-+              raw_inode->i_gid_high = 0;
-+      }
-+      raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
-+      raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize);
-+      raw_inode->i_atime = cpu_to_le32(inode->i_atime);
-+      raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
-+      raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
-+      raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
-+      raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime);
-+      raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags);
-+#ifdef EXT3_FRAGMENTS
-+      raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr);
-+      raw_inode->i_frag = inode->u.ext3_i.i_frag_no;
-+      raw_inode->i_fsize = inode->u.ext3_i.i_frag_size;
-+#else
-+      /* If we are not tracking these fields in the in-memory inode,
-+       * then preserve them on disk, but still initialise them to zero
-+       * for new inodes. */
-+      if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
-+              raw_inode->i_faddr = 0;
-+              raw_inode->i_frag = 0;
-+              raw_inode->i_fsize = 0;
-+      }
-+#endif
-+      raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
-+      if (!S_ISREG(inode->i_mode)) {
-+              raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
-+      } else {
-+              raw_inode->i_size_high =
-+                      cpu_to_le32(inode->u.ext3_i.i_disksize >> 32);
-+              if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) {
-+                      struct super_block *sb = inode->i_sb;
-+                      if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-+                                      EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
-+                          EXT3_SB(sb)->s_es->s_rev_level ==
-+                                      cpu_to_le32(EXT3_GOOD_OLD_REV)) {
-+                             /* If this is the first large file
-+                              * created, add a flag to the superblock.
-+                              */
-+                              err = ext3_journal_get_write_access(handle,
-+                                              sb->u.ext3_sb.s_sbh);
-+                              if (err)
-+                                      goto out_brelse;
-+                              ext3_update_dynamic_rev(sb);
-+                              EXT3_SET_RO_COMPAT_FEATURE(sb,
-+                                      EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-+                              sb->s_dirt = 1;
-+                              handle->h_sync = 1;
-+                              err = ext3_journal_dirty_metadata(handle,
-+                                              sb->u.ext3_sb.s_sbh);
-+                      }
-+              }
-+      }
-+      raw_inode->i_generation = le32_to_cpu(inode->i_generation);
-+      if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
-+              raw_inode->i_block[0] =
-+                      cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
-+      else for (block = 0; block < EXT3_N_BLOCKS; block++)
-+              raw_inode->i_block[block] = inode->u.ext3_i.i_data[block];
-+
-+      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+      rc = ext3_journal_dirty_metadata(handle, bh);
-+      if (!err)
-+              err = rc;
-+      EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
-+
-+out_brelse:
-+      brelse (bh);
-+      ext3_std_error(inode->i_sb, err);
-+      return err;
-+}
-+
-+/*
-+ * ext3_write_inode()
-+ *
-+ * We are called from a few places:
-+ *
-+ * - Within generic_file_write() for O_SYNC files.
-+ *   Here, there will be no transaction running. We wait for any running
-+ *   trasnaction to commit.
-+ *
-+ * - Within sys_sync(), kupdate and such.
-+ *   We wait on commit, if tol to.
-+ *
-+ * - Within prune_icache() (PF_MEMALLOC == true)
-+ *   Here we simply return.  We can't afford to block kswapd on the
-+ *   journal commit.
-+ *
-+ * In all cases it is actually safe for us to return without doing anything,
-+ * because the inode has been copied into a raw inode buffer in
-+ * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
-+ * knfsd.
-+ *
-+ * Note that we are absolutely dependent upon all inode dirtiers doing the
-+ * right thing: they *must* call mark_inode_dirty() after dirtying info in
-+ * which we are interested.
-+ *
-+ * It would be a bug for them to not do this.  The code:
-+ *
-+ *    mark_inode_dirty(inode)
-+ *    stuff();
-+ *    inode->i_size = expr;
-+ *
-+ * is in error because a kswapd-driven write_inode() could occur while
-+ * `stuff()' is running, and the new i_size will be lost.  Plus the inode
-+ * will no longer be on the superblock's dirty inode list.
-+ */
-+void ext3_write_inode(struct inode *inode, int wait)
-+{
-+      if (current->flags & PF_MEMALLOC)
-+              return;
-+
-+      if (ext3_journal_current_handle()) {
-+              jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
-+              return;
-+      }
-+
-+      if (!wait)
-+              return;
-+
-+      ext3_force_commit(inode->i_sb); 
-+}
-+
-+/*
-+ * ext3_setattr()
-+ *
-+ * Called from notify_change.
-+ *
-+ * We want to trap VFS attempts to truncate the file as soon as
-+ * possible.  In particular, we want to make sure that when the VFS
-+ * shrinks i_size, we put the inode on the orphan list and modify
-+ * i_disksize immediately, so that during the subsequent flushing of
-+ * dirty pages and freeing of disk blocks, we can guarantee that any
-+ * commit will leave the blocks being flushed in an unused state on
-+ * disk.  (On recovery, the inode will get truncated and the blocks will
-+ * be freed, so we have a strong guarantee that no future commit will
-+ * leave these blocks visible to the user.)  
-+ *
-+ * This is only needed for regular files.  rmdir() has its own path, and
-+ * we can never truncate a direcory except on final unlink (at which
-+ * point i_nlink is zero so recovery is easy.)
-+ *
-+ * Called with the BKL.  
-+ */
-+
-+int ext3_setattr(struct dentry *dentry, struct iattr *attr)
-+{
-+      struct inode *inode = dentry->d_inode;
-+      int error, rc = 0;
-+      const unsigned int ia_valid = attr->ia_valid;
-+
-+      error = inode_change_ok(inode, attr);
-+      if (error)
-+              return error;
-+
-+      if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-+              (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-+              error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
-+              if (error)
-+                      return error;
-+      }
-+
-+      if (attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
-+              handle_t *handle;
-+
-+              handle = ext3_journal_start(inode, 3);
-+              if (IS_ERR(handle)) {
-+                      error = PTR_ERR(handle);
-+                      goto err_out;
-+              }
-+              
-+              error = ext3_orphan_add(handle, inode);
-+              inode->u.ext3_i.i_disksize = attr->ia_size;
-+              rc = ext3_mark_inode_dirty(handle, inode);
-+              if (!error)
-+                      error = rc;
-+              ext3_journal_stop(handle, inode);
-+      }
-+      
-+      rc = inode_setattr(inode, attr);
-+
-+      /* If inode_setattr's call to ext3_truncate failed to get a
-+       * transaction handle at all, we need to clean up the in-core
-+       * orphan list manually. */
-+      if (inode->i_nlink)
-+              ext3_orphan_del(NULL, inode);
-+
-+err_out:
-+      ext3_std_error(inode->i_sb, error);
-+      if (!error)
-+              error = rc;
-+      return error;
-+}
-+
-+
-+/*
-+ * akpm: how many blocks doth make a writepage()?
-+ *
-+ * With N blocks per page, it may be:
-+ * N data blocks
-+ * 2 indirect block
-+ * 2 dindirect
-+ * 1 tindirect
-+ * N+5 bitmap blocks (from the above)
-+ * N+5 group descriptor summary blocks
-+ * 1 inode block
-+ * 1 superblock.
-+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
-+ *
-+ * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
-+ *
-+ * With ordered or writeback data it's the same, less the N data blocks.
-+ *
-+ * If the inode's direct blocks can hold an integral number of pages then a
-+ * page cannot straddle two indirect blocks, and we can only touch one indirect
-+ * and dindirect block, and the "5" above becomes "3".
-+ *
-+ * This still overestimates under most circumstances.  If we were to pass the
-+ * start and end offsets in here as well we could do block_to_path() on each
-+ * block and work out the exact number of indirects which are touched.  Pah.
-+ */
-+
-+int ext3_writepage_trans_blocks(struct inode *inode)
-+{
-+      int bpp = ext3_journal_blocks_per_page(inode);
-+      int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
-+      int ret;
-+      
-+      if (ext3_should_journal_data(inode))
-+              ret = 3 * (bpp + indirects) + 2;
-+      else
-+              ret = 2 * (bpp + indirects) + 2;
-+
-+#ifdef CONFIG_QUOTA
-+      ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
-+#endif
-+
-+      return ret;
-+}
-+
-+int
-+ext3_mark_iloc_dirty(handle_t *handle, 
-+                   struct inode *inode,
-+                   struct ext3_iloc *iloc)
-+{
-+      int err = 0;
-+
-+      if (handle) {
-+              /* the do_update_inode consumes one bh->b_count */
-+              atomic_inc(&iloc->bh->b_count);
-+              err = ext3_do_update_inode(handle, inode, iloc);
-+              /* ext3_do_update_inode() does journal_dirty_metadata */
-+              brelse(iloc->bh);
-+      } else {
-+              printk(KERN_EMERG __FUNCTION__ ": called with no handle!\n");
-+      }
-+      return err;
-+}
-+
-+/* 
-+ * On success, We end up with an outstanding reference count against
-+ * iloc->bh.  This _must_ be cleaned up later. 
-+ */
-+
-+int
-+ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 
-+                       struct ext3_iloc *iloc)
-+{
-+      int err = 0;
-+      if (handle) {
-+              err = ext3_get_inode_loc(inode, iloc);
-+              if (!err) {
-+                      BUFFER_TRACE(iloc->bh, "get_write_access");
-+                      err = ext3_journal_get_write_access(handle, iloc->bh);
-+                      if (err) {
-+                              brelse(iloc->bh);
-+                              iloc->bh = NULL;
-+                      }
-+              }
-+      }
-+      ext3_std_error(inode->i_sb, err);
-+      return err;
-+}
-+
-+/*
-+ * akpm: What we do here is to mark the in-core inode as clean
-+ * with respect to inode dirtiness (it may still be data-dirty).
-+ * This means that the in-core inode may be reaped by prune_icache
-+ * without having to perform any I/O.  This is a very good thing,
-+ * because *any* task may call prune_icache - even ones which
-+ * have a transaction open against a different journal.
-+ *
-+ * Is this cheating?  Not really.  Sure, we haven't written the
-+ * inode out, but prune_icache isn't a user-visible syncing function.
-+ * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
-+ * we start and wait on commits.
-+ *
-+ * Is this efficient/effective?  Well, we're being nice to the system
-+ * by cleaning up our inodes proactively so they can be reaped
-+ * without I/O.  But we are potentially leaving up to five seconds'
-+ * worth of inodes floating about which prune_icache wants us to
-+ * write out.  One way to fix that would be to get prune_icache()
-+ * to do a write_super() to free up some memory.  It has the desired
-+ * effect.
-+ */
-+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
-+{
-+      struct ext3_iloc iloc;
-+      int err;
-+
-+      err = ext3_reserve_inode_write(handle, inode, &iloc);
-+      if (!err)
-+              err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-+      return err;
-+}
-+
-+/*
-+ * akpm: ext3_dirty_inode() is called from __mark_inode_dirty()
-+ *
-+ * We're really interested in the case where a file is being extended.
-+ * i_size has been changed by generic_commit_write() and we thus need
-+ * to include the updated inode in the current transaction.
-+ *
-+ * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
-+ * are allocated to the file.
-+ *
-+ * If the inode is marked synchronous, we don't honour that here - doing
-+ * so would cause a commit on atime updates, which we don't bother doing.
-+ * We handle synchronous inodes at the highest possible level.
-+ */
-+void ext3_dirty_inode(struct inode *inode)
-+{
-+      handle_t *current_handle = ext3_journal_current_handle();
-+      handle_t *handle;
-+
-+      lock_kernel();
-+      handle = ext3_journal_start(inode, 1);
-+      if (IS_ERR(handle))
-+              goto out;
-+      if (current_handle &&
-+              current_handle->h_transaction != handle->h_transaction) {
-+              /* This task has a transaction open against a different fs */
-+              printk(KERN_EMERG __FUNCTION__": transactions do not match!\n");
-+      } else {
-+              jbd_debug(5, "marking dirty.  outer handle=%p\n",
-+                              current_handle);
-+              ext3_mark_inode_dirty(handle, inode);
-+      }
-+      ext3_journal_stop(handle, inode);
-+out:
-+      unlock_kernel();
-+}
-+
-+#ifdef AKPM
-+/* 
-+ * Bind an inode's backing buffer_head into this transaction, to prevent
-+ * it from being flushed to disk early.  Unlike
-+ * ext3_reserve_inode_write, this leaves behind no bh reference and
-+ * returns no iloc structure, so the caller needs to repeat the iloc
-+ * lookup to mark the inode dirty later.
-+ */
-+static inline int
-+ext3_pin_inode(handle_t *handle, struct inode *inode)
-+{
-+      struct ext3_iloc iloc;
-+      
-+      int err = 0;
-+      if (handle) {
-+              err = ext3_get_inode_loc(inode, &iloc);
-+              if (!err) {
-+                      BUFFER_TRACE(iloc.bh, "get_write_access");
-+                      err = journal_get_write_access(handle, iloc.bh);
-+                      if (!err)
-+                              err = ext3_journal_dirty_metadata(handle, 
-+                                                                iloc.bh);
-+                      brelse(iloc.bh);
-+              }
-+      }
-+      ext3_std_error(inode->i_sb, err);
-+      return err;
-+}
-+#endif
-+
-+int ext3_change_inode_journal_flag(struct inode *inode, int val)
-+{
-+      journal_t *journal;
-+      handle_t *handle;
-+      int err;
-+
-+      /*
-+       * We have to be very careful here: changing a data block's
-+       * journaling status dynamically is dangerous.  If we write a
-+       * data block to the journal, change the status and then delete
-+       * that block, we risk forgetting to revoke the old log record
-+       * from the journal and so a subsequent replay can corrupt data.
-+       * So, first we make sure that the journal is empty and that
-+       * nobody is changing anything.
-+       */
-+
-+      journal = EXT3_JOURNAL(inode);
-+      if (is_journal_aborted(journal) || IS_RDONLY(inode))
-+              return -EROFS;
-+      
-+      journal_lock_updates(journal);
-+      journal_flush(journal);
-+
-+      /*
-+       * OK, there are no updates running now, and all cached data is
-+       * synced to disk.  We are now in a completely consistent state
-+       * which doesn't have anything in the journal, and we know that
-+       * no filesystem updates are running, so it is safe to modify
-+       * the inode's in-core data-journaling state flag now.
-+       */
-+
-+      if (val)
-+              inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
-+      else
-+              inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL;
-+
-+      journal_unlock_updates(journal);
-+
-+      /* Finally we can mark the inode as dirty. */
-+
-+      handle = ext3_journal_start(inode, 1);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      err = ext3_mark_inode_dirty(handle, inode);
-+      handle->h_sync = 1;
-+      ext3_journal_stop(handle, inode);
-+      ext3_std_error(inode->i_sb, err);
-+      
-+      return err;
-+}
-+
-+
-+/*
-+ * ext3_aops_journal_start().
-+ *
-+ * <This function died, but the comment lives on>
-+ *
-+ * We need to take the inode semaphore *outside* the
-+ * journal_start/journal_stop.  Otherwise, a different task could do a
-+ * wait_for_commit() while holding ->i_sem, which deadlocks.  The rule
-+ * is: transaction open/closes are considered to be a locking operation
-+ * and they nest *inside* ->i_sem.
-+ * ----------------------------------------------------------------------------
-+ * Possible problem:
-+ *    ext3_file_write()
-+ *    -> generic_file_write()
-+ *       -> __alloc_pages()
-+ *          -> page_launder()
-+ *             -> ext3_writepage()
-+ *
-+ * And the writepage can be on a different fs while we have a
-+ * transaction open against this one!  Bad.
-+ *
-+ * I tried making the task PF_MEMALLOC here, but that simply results in
-+ * 0-order allocation failures passed back to generic_file_write().
-+ * Instead, we rely on the reentrancy protection in ext3_writepage().
-+ * ----------------------------------------------------------------------------
-+ * When we do the journal_start() here we don't really need to reserve
-+ * any blocks - we won't need any until we hit ext3_prepare_write(),
-+ * which does all the needed journal extending.  However!  There is a
-+ * problem with quotas:
-+ *
-+ * Thread 1:
-+ * sys_sync
-+ * ->sync_dquots
-+ *   ->commit_dquot
-+ *     ->lock_dquot
-+ *     ->write_dquot
-+ *       ->ext3_file_write
-+ *         ->journal_start
-+ *         ->ext3_prepare_write
-+ *           ->journal_extend
-+ *           ->journal_start
-+ * Thread 2:
-+ * ext3_create                (for example)
-+ * ->ext3_new_inode
-+ *   ->dquot_initialize
-+ *     ->lock_dquot
-+ *
-+ * Deadlock.  Thread 1's journal_start blocks because thread 2 has a
-+ * transaction open.  Thread 2's transaction will never close because
-+ * thread 2 is stuck waiting for the dquot lock.
-+ *
-+ * So.  We must ensure that thread 1 *never* needs to extend the journal
-+ * for quota writes.  We do that by reserving enough journal blocks
-+ * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we
-+ * need to extend" test in ext3_prepare_write() succeeds.  
-+ */
-diff -rup --new-file linux.mcp2/fs/ext3/ioctl.c linux_tmp/fs/ext3/ioctl.c
---- linux.mcp2/fs/ext3/ioctl.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/ioctl.c  2001-11-09 14:25:04.000000000 -0800
-@@ -0,0 +1,170 @@
-+/*
-+ * linux/fs/ext3/ioctl.c
-+ *
-+ * Copyright (C) 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/sched.h>
-+#include <asm/uaccess.h>
-+
-+
-+int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
-+              unsigned long arg)
-+{
-+      unsigned int flags;
-+
-+      ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
-+
-+      switch (cmd) {
-+      case EXT3_IOC_GETFLAGS:
-+              flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE;
-+              return put_user(flags, (int *) arg);
-+      case EXT3_IOC_SETFLAGS: {
-+              handle_t *handle = NULL;
-+              int err;
-+              struct ext3_iloc iloc;
-+              unsigned int oldflags;
-+              unsigned int jflag;
-+
-+              if (IS_RDONLY(inode))
-+                      return -EROFS;
-+
-+              if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
-+                      return -EPERM;
-+
-+              if (get_user(flags, (int *) arg))
-+                      return -EFAULT;
-+
-+              oldflags = inode->u.ext3_i.i_flags;
-+
-+              /* The JOURNAL_DATA flag is modifiable only by root */
-+              jflag = flags & EXT3_JOURNAL_DATA_FL;
-+
-+              /*
-+               * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-+               * the relevant capability.
-+               *
-+               * This test looks nicer. Thanks to Pauline Middelink
-+               */
-+              if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
-+                      if (!capable(CAP_LINUX_IMMUTABLE))
-+                              return -EPERM;
-+              }
-+              
-+              /*
-+               * The JOURNAL_DATA flag can only be changed by
-+               * the relevant capability.
-+               */
-+              if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
-+                      if (!capable(CAP_SYS_RESOURCE))
-+                              return -EPERM;
-+              }
-+
-+
-+              handle = ext3_journal_start(inode, 1);
-+              if (IS_ERR(handle))
-+                      return PTR_ERR(handle);
-+              if (IS_SYNC(inode))
-+                      handle->h_sync = 1;
-+              err = ext3_reserve_inode_write(handle, inode, &iloc);
-+              if (err)
-+                      goto flags_err;
-+              
-+              flags = flags & EXT3_FL_USER_MODIFIABLE;
-+              flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
-+              inode->u.ext3_i.i_flags = flags;
-+
-+              if (flags & EXT3_SYNC_FL)
-+                      inode->i_flags |= S_SYNC;
-+              else
-+                      inode->i_flags &= ~S_SYNC;
-+              if (flags & EXT3_APPEND_FL)
-+                      inode->i_flags |= S_APPEND;
-+              else
-+                      inode->i_flags &= ~S_APPEND;
-+              if (flags & EXT3_IMMUTABLE_FL)
-+                      inode->i_flags |= S_IMMUTABLE;
-+              else
-+                      inode->i_flags &= ~S_IMMUTABLE;
-+              if (flags & EXT3_NOATIME_FL)
-+                      inode->i_flags |= S_NOATIME;
-+              else
-+                      inode->i_flags &= ~S_NOATIME;
-+              inode->i_ctime = CURRENT_TIME;
-+
-+              err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-+flags_err:
-+              ext3_journal_stop(handle, inode);
-+              if (err)
-+                      return err;
-+              
-+              if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
-+                      err = ext3_change_inode_journal_flag(inode, jflag);
-+              return err;
-+      }
-+      case EXT3_IOC_GETVERSION:
-+      case EXT3_IOC_GETVERSION_OLD:
-+              return put_user(inode->i_generation, (int *) arg);
-+      case EXT3_IOC_SETVERSION:
-+      case EXT3_IOC_SETVERSION_OLD: {
-+              handle_t *handle;
-+              struct ext3_iloc iloc;
-+              __u32 generation;
-+              int err;
-+
-+              if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
-+                      return -EPERM;
-+              if (IS_RDONLY(inode))
-+                      return -EROFS;
-+              if (get_user(generation, (int *) arg))
-+                      return -EFAULT;
-+
-+              handle = ext3_journal_start(inode, 1);
-+              if (IS_ERR(handle))
-+                      return PTR_ERR(handle);
-+              err = ext3_reserve_inode_write(handle, inode, &iloc);
-+              if (err)
-+                      return err;
-+
-+              inode->i_ctime = CURRENT_TIME;
-+              inode->i_generation = generation;
-+
-+              err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-+              ext3_journal_stop(handle, inode);
-+              return err;
-+      }
-+#ifdef CONFIG_JBD_DEBUG
-+      case EXT3_IOC_WAIT_FOR_READONLY:
-+              /*
-+               * This is racy - by the time we're woken up and running,
-+               * the superblock could be released.  And the module could
-+               * have been unloaded.  So sue me.
-+               *
-+               * Returns 1 if it slept, else zero.
-+               */
-+              {
-+                      struct super_block *sb = inode->i_sb;
-+                      DECLARE_WAITQUEUE(wait, current);
-+                      int ret = 0;
-+
-+                      set_current_state(TASK_INTERRUPTIBLE);
-+                      add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
-+                      if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) {
-+                              schedule();
-+                              ret = 1;
-+                      }
-+                      remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
-+                      return ret;
-+              }
-+#endif
-+      default:
-+              return -ENOTTY;
-+      }
-+}
-diff -rup --new-file linux.mcp2/fs/ext3/namei.c linux_tmp/fs/ext3/namei.c
---- linux.mcp2/fs/ext3/namei.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/namei.c  2001-11-09 14:25:04.000000000 -0800
-@@ -0,0 +1,1125 @@
-+/*
-+ *  linux/fs/ext3/namei.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/namei.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ *  Directory entry file type support and forward compatibility hooks
-+ *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/sched.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/fcntl.h>
-+#include <linux/stat.h>
-+#include <linux/string.h>
-+#include <linux/locks.h>
-+#include <linux/quotaops.h>
-+
-+
-+/*
-+ * define how far ahead to read directories while searching them.
-+ */
-+#define NAMEI_RA_CHUNKS  2
-+#define NAMEI_RA_BLOCKS  4
-+#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-+#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
-+
-+/*
-+ * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
-+ *
-+ * `len <= EXT3_NAME_LEN' is guaranteed by caller.
-+ * `de != NULL' is guaranteed by caller.
-+ */
-+static inline int ext3_match (int len, const char * const name,
-+                            struct ext3_dir_entry_2 * de)
-+{
-+      if (len != de->name_len)
-+              return 0;
-+      if (!de->inode)
-+              return 0;
-+      return !memcmp(name, de->name, len);
-+}
-+
-+/*
-+ * Returns 0 if not found, -1 on failure, and 1 on success
-+ */
-+static int inline search_dirblock(struct buffer_head * bh,
-+                                struct inode *dir,
-+                                struct dentry *dentry,
-+                                unsigned long offset,
-+                                struct ext3_dir_entry_2 ** res_dir)
-+{
-+      struct ext3_dir_entry_2 * de;
-+      char * dlimit;
-+      int de_len;
-+      const char *name = dentry->d_name.name;
-+      int namelen = dentry->d_name.len;
-+
-+      de = (struct ext3_dir_entry_2 *) bh->b_data;
-+      dlimit = bh->b_data + dir->i_sb->s_blocksize;
-+      while ((char *) de < dlimit) {
-+              /* this code is executed quadratically often */
-+              /* do minimal checking `by hand' */
-+
-+              if ((char *) de + namelen <= dlimit &&
-+                  ext3_match (namelen, name, de)) {
-+                      /* found a match - just to be sure, do a full check */
-+                      if (!ext3_check_dir_entry("ext3_find_entry",
-+                                                dir, de, bh, offset))
-+                              return -1;
-+                      *res_dir = de;
-+                      return 1;
-+              }
-+              /* prevent looping on a bad block */
-+              de_len = le16_to_cpu(de->rec_len);
-+              if (de_len <= 0)
-+                      return -1;
-+              offset += de_len;
-+              de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
-+      }
-+      return 0;
-+}
-+
-+/*
-+ *    ext3_find_entry()
-+ *
-+ * finds an entry in the specified directory with the wanted name. It
-+ * returns the cache buffer in which the entry was found, and the entry
-+ * itself (as a parameter - res_dir). It does NOT read the inode of the
-+ * entry - you'll have to do that yourself if you want to.
-+ *
-+ * The returned buffer_head has ->b_count elevated.  The caller is expected
-+ * to brelse() it when appropriate.
-+ */
-+static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-+                                      struct ext3_dir_entry_2 ** res_dir)
-+{
-+      struct super_block * sb;
-+      struct buffer_head * bh_use[NAMEI_RA_SIZE];
-+      struct buffer_head * bh, *ret = NULL;
-+      unsigned long start, block, b;
-+      int ra_max = 0;         /* Number of bh's in the readahead
-+                                 buffer, bh_use[] */
-+      int ra_ptr = 0;         /* Current index into readahead
-+                                 buffer */
-+      int num = 0;
-+      int nblocks, i, err;
-+      struct inode *dir = dentry->d_parent->d_inode;
-+
-+      *res_dir = NULL;
-+      sb = dir->i_sb;
-+
-+      nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
-+      start = dir->u.ext3_i.i_dir_start_lookup;
-+      if (start >= nblocks)
-+              start = 0;
-+      block = start;
-+restart:
-+      do {
-+              /*
-+               * We deal with the read-ahead logic here.
-+               */
-+              if (ra_ptr >= ra_max) {
-+                      /* Refill the readahead buffer */
-+                      ra_ptr = 0;
-+                      b = block;
-+                      for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
-+                              /*
-+                               * Terminate if we reach the end of the
-+                               * directory and must wrap, or if our
-+                               * search has finished at this block.
-+                               */
-+                              if (b >= nblocks || (num && block == start)) {
-+                                      bh_use[ra_max] = NULL;
-+                                      break;
-+                              }
-+                              num++;
-+                              bh = ext3_getblk(NULL, dir, b++, 0, &err);
-+                              bh_use[ra_max] = bh;
-+                              if (bh)
-+                                      ll_rw_block(READ, 1, &bh);
-+                      }
-+              }
-+              if ((bh = bh_use[ra_ptr++]) == NULL)
-+                      goto next;
-+              wait_on_buffer(bh);
-+              if (!buffer_uptodate(bh)) {
-+                      /* read error, skip block & hope for the best */
-+                      brelse(bh);
-+                      goto next;
-+              }
-+              i = search_dirblock(bh, dir, dentry,
-+                          block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
-+              if (i == 1) {
-+                      dir->u.ext3_i.i_dir_start_lookup = block;
-+                      ret = bh;
-+                      goto cleanup_and_exit;
-+              } else {
-+                      brelse(bh);
-+                      if (i < 0)
-+                              goto cleanup_and_exit;
-+              }
-+      next:
-+              if (++block >= nblocks)
-+                      block = 0;
-+      } while (block != start);
-+
-+      /*
-+       * If the directory has grown while we were searching, then
-+       * search the last part of the directory before giving up.
-+       */
-+      block = nblocks;
-+      nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
-+      if (block < nblocks) {
-+              start = 0;
-+              goto restart;
-+      }
-+              
-+cleanup_and_exit:
-+      /* Clean up the read-ahead blocks */
-+      for (; ra_ptr < ra_max; ra_ptr++)
-+              brelse (bh_use[ra_ptr]);
-+      return ret;
-+}
-+
-+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
-+{
-+      struct inode * inode;
-+      struct ext3_dir_entry_2 * de;
-+      struct buffer_head * bh;
-+
-+      if (dentry->d_name.len > EXT3_NAME_LEN)
-+              return ERR_PTR(-ENAMETOOLONG);
-+
-+      bh = ext3_find_entry(dentry, &de);
-+      inode = NULL;
-+      if (bh) {
-+              unsigned long ino = le32_to_cpu(de->inode);
-+              brelse (bh);
-+              inode = iget(dir->i_sb, ino);
-+
-+              if (!inode)
-+                      return ERR_PTR(-EACCES);
-+      }
-+      d_add(dentry, inode);
-+      return NULL;
-+}
-+
-+#define S_SHIFT 12
-+static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
-+      [S_IFREG >> S_SHIFT]    EXT3_FT_REG_FILE,
-+      [S_IFDIR >> S_SHIFT]    EXT3_FT_DIR,
-+      [S_IFCHR >> S_SHIFT]    EXT3_FT_CHRDEV,
-+      [S_IFBLK >> S_SHIFT]    EXT3_FT_BLKDEV,
-+      [S_IFIFO >> S_SHIFT]    EXT3_FT_FIFO,
-+      [S_IFSOCK >> S_SHIFT]   EXT3_FT_SOCK,
-+      [S_IFLNK >> S_SHIFT]    EXT3_FT_SYMLINK,
-+};
-+
-+static inline void ext3_set_de_type(struct super_block *sb,
-+                              struct ext3_dir_entry_2 *de,
-+                              umode_t mode) {
-+      if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
-+              de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-+}
-+
-+/*
-+ *    ext3_add_entry()
-+ *
-+ * adds a file entry to the specified directory, using the same
-+ * semantics as ext3_find_entry(). It returns NULL if it failed.
-+ *
-+ * NOTE!! The inode part of 'de' is left at 0 - which means you
-+ * may not sleep between calling this and putting something into
-+ * the entry, as someone else might have used it while you slept.
-+ */
-+
-+/*
-+ * AKPM: the journalling code here looks wrong on the error paths
-+ */
-+static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
-+      struct inode *inode)
-+{
-+      struct inode *dir = dentry->d_parent->d_inode;
-+      const char *name = dentry->d_name.name;
-+      int namelen = dentry->d_name.len;
-+      unsigned long offset;
-+      unsigned short rec_len;
-+      struct buffer_head * bh;
-+      struct ext3_dir_entry_2 * de, * de1;
-+      struct super_block * sb;
-+      int     retval;
-+
-+      sb = dir->i_sb;
-+
-+      if (!namelen)
-+              return -EINVAL;
-+      bh = ext3_bread (handle, dir, 0, 0, &retval);
-+      if (!bh)
-+              return retval;
-+      rec_len = EXT3_DIR_REC_LEN(namelen);
-+      offset = 0;
-+      de = (struct ext3_dir_entry_2 *) bh->b_data;
-+      while (1) {
-+              if ((char *)de >= sb->s_blocksize + bh->b_data) {
-+                      brelse (bh);
-+                      bh = NULL;
-+                      bh = ext3_bread (handle, dir,
-+                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
-+                      if (!bh)
-+                              return retval;
-+                      if (dir->i_size <= offset) {
-+                              if (dir->i_size == 0) {
-+                                      brelse(bh);
-+                                      return -ENOENT;
-+                              }
-+
-+                              ext3_debug ("creating next block\n");
-+
-+                              BUFFER_TRACE(bh, "get_write_access");
-+                              ext3_journal_get_write_access(handle, bh);
-+                              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+                              de->inode = 0;
-+                              de->rec_len = le16_to_cpu(sb->s_blocksize);
-+                              dir->u.ext3_i.i_disksize =
-+                                      dir->i_size = offset + sb->s_blocksize;
-+                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                              ext3_mark_inode_dirty(handle, dir);
-+                      } else {
-+
-+                              ext3_debug ("skipping to next block\n");
-+
-+                              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+                      }
-+              }
-+              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
-+                                         offset)) {
-+                      brelse (bh);
-+                      return -ENOENT;
-+              }
-+              if (ext3_match (namelen, name, de)) {
-+                              brelse (bh);
-+                              return -EEXIST;
-+              }
-+              if ((le32_to_cpu(de->inode) == 0 &&
-+                              le16_to_cpu(de->rec_len) >= rec_len) ||
-+                  (le16_to_cpu(de->rec_len) >=
-+                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
-+                      BUFFER_TRACE(bh, "get_write_access");
-+                      ext3_journal_get_write_access(handle, bh);
-+                      /* By now the buffer is marked for journaling */
-+                      offset += le16_to_cpu(de->rec_len);
-+                      if (le32_to_cpu(de->inode)) {
-+                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
-+                                      EXT3_DIR_REC_LEN(de->name_len));
-+                              de1->rec_len =
-+                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
-+                                      EXT3_DIR_REC_LEN(de->name_len));
-+                              de->rec_len = cpu_to_le16(
-+                                              EXT3_DIR_REC_LEN(de->name_len));
-+                              de = de1;
-+                      }
-+                      de->file_type = EXT3_FT_UNKNOWN;
-+                      if (inode) {
-+                              de->inode = cpu_to_le32(inode->i_ino);
-+                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
-+                      } else
-+                              de->inode = 0;
-+                      de->name_len = namelen;
-+                      memcpy (de->name, name, namelen);
-+                      /*
-+                       * XXX shouldn't update any times until successful
-+                       * completion of syscall, but too many callers depend
-+                       * on this.
-+                       *
-+                       * XXX similarly, too many callers depend on
-+                       * ext3_new_inode() setting the times, but error
-+                       * recovery deletes the inode, so the worst that can
-+                       * happen is that the times are slightly out of date
-+                       * and/or different from the directory change time.
-+                       */
-+                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-+                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                      ext3_mark_inode_dirty(handle, dir);
-+                      dir->i_version = ++event;
-+                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+                      ext3_journal_dirty_metadata(handle, bh);
-+                      brelse(bh);
-+                      return 0;
-+              }
-+              offset += le16_to_cpu(de->rec_len);
-+              de = (struct ext3_dir_entry_2 *)
-+                      ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      brelse (bh);
-+      return -ENOSPC;
-+}
-+
-+/*
-+ * ext3_delete_entry deletes a directory entry by merging it with the
-+ * previous entry
-+ */
-+static int ext3_delete_entry (handle_t *handle, 
-+                            struct inode * dir,
-+                            struct ext3_dir_entry_2 * de_del,
-+                            struct buffer_head * bh)
-+{
-+      struct ext3_dir_entry_2 * de, * pde;
-+      int i;
-+
-+      i = 0;
-+      pde = NULL;
-+      de = (struct ext3_dir_entry_2 *) bh->b_data;
-+      while (i < bh->b_size) {
-+              if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
-+                      return -EIO;
-+              if (de == de_del)  {
-+                      BUFFER_TRACE(bh, "get_write_access");
-+                      ext3_journal_get_write_access(handle, bh);
-+                      if (pde)
-+                              pde->rec_len =
-+                                      cpu_to_le16(le16_to_cpu(pde->rec_len) +
-+                                                  le16_to_cpu(de->rec_len));
-+                      else
-+                              de->inode = 0;
-+                      dir->i_version = ++event;
-+                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+                      ext3_journal_dirty_metadata(handle, bh);
-+                      return 0;
-+              }
-+              i += le16_to_cpu(de->rec_len);
-+              pde = de;
-+              de = (struct ext3_dir_entry_2 *)
-+                      ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      return -ENOENT;
-+}
-+
-+/*
-+ * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
-+ * do not perform it in these functions.  We perform it at the call site,
-+ * if it is needed.
-+ */
-+static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
-+{
-+      inode->i_nlink++;
-+}
-+
-+static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
-+{
-+      inode->i_nlink--;
-+}
-+
-+static int ext3_add_nondir(handle_t *handle,
-+              struct dentry *dentry, struct inode *inode)
-+{
-+      int err = ext3_add_entry(handle, dentry, inode);
-+      if (!err) {
-+              d_instantiate(dentry, inode);
-+              return 0;
-+      }
-+      ext3_dec_count(handle, inode);
-+      iput(inode);
-+      return err;
-+}
-+
-+/*
-+ * By the time this is called, we already have created
-+ * the directory cache entry for the new file, but it
-+ * is so far negative - it has no inode.
-+ *
-+ * If the create succeeds, we fill in the inode information
-+ * with d_instantiate(). 
-+ */
-+static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
-+{
-+      handle_t *handle; 
-+      struct inode * inode;
-+      int err;
-+
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = ext3_new_inode (handle, dir, mode);
-+      err = PTR_ERR(inode);
-+      if (!IS_ERR(inode)) {
-+              inode->i_op = &ext3_file_inode_operations;
-+              inode->i_fop = &ext3_file_operations;
-+              inode->i_mapping->a_ops = &ext3_aops;
-+              ext3_mark_inode_dirty(handle, inode);
-+              err = ext3_add_nondir(handle, dentry, inode);
-+      }
-+      ext3_journal_stop(handle, dir);
-+      return err;
-+}
-+
-+static int ext3_mknod (struct inode * dir, struct dentry *dentry,
-+                      int mode, int rdev)
-+{
-+      handle_t *handle;
-+      struct inode *inode;
-+      int err;
-+
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = ext3_new_inode (handle, dir, mode);
-+      err = PTR_ERR(inode);
-+      if (!IS_ERR(inode)) {
-+              init_special_inode(inode, mode, rdev);
-+              ext3_mark_inode_dirty(handle, inode);
-+              err = ext3_add_nondir(handle, dentry, inode);
-+      }
-+      ext3_journal_stop(handle, dir);
-+      return err;
-+}
-+
-+static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
-+{
-+      handle_t *handle;
-+      struct inode * inode;
-+      struct buffer_head * dir_block;
-+      struct ext3_dir_entry_2 * de;
-+      int err;
-+
-+      if (dir->i_nlink >= EXT3_LINK_MAX)
-+              return -EMLINK;
-+
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = ext3_new_inode (handle, dir, S_IFDIR);
-+      err = PTR_ERR(inode);
-+      if (IS_ERR(inode))
-+              goto out_stop;
-+
-+      inode->i_op = &ext3_dir_inode_operations;
-+      inode->i_fop = &ext3_dir_operations;
-+      inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
-+      inode->i_blocks = 0;    
-+      dir_block = ext3_bread (handle, inode, 0, 1, &err);
-+      if (!dir_block) {
-+              inode->i_nlink--; /* is this nlink == 0? */
-+              ext3_mark_inode_dirty(handle, inode);
-+              iput (inode);
-+              goto out_stop;
-+      }
-+      BUFFER_TRACE(dir_block, "get_write_access");
-+      ext3_journal_get_write_access(handle, dir_block);
-+      de = (struct ext3_dir_entry_2 *) dir_block->b_data;
-+      de->inode = cpu_to_le32(inode->i_ino);
-+      de->name_len = 1;
-+      de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
-+      strcpy (de->name, ".");
-+      ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-+      de = (struct ext3_dir_entry_2 *)
-+                      ((char *) de + le16_to_cpu(de->rec_len));
-+      de->inode = cpu_to_le32(dir->i_ino);
-+      de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
-+      de->name_len = 2;
-+      strcpy (de->name, "..");
-+      ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-+      inode->i_nlink = 2;
-+      BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
-+      ext3_journal_dirty_metadata(handle, dir_block);
-+      brelse (dir_block);
-+      inode->i_mode = S_IFDIR | mode;
-+      if (dir->i_mode & S_ISGID)
-+              inode->i_mode |= S_ISGID;
-+      ext3_mark_inode_dirty(handle, inode);
-+      err = ext3_add_entry (handle, dentry, inode);
-+      if (err)
-+              goto out_no_entry;
-+      dir->i_nlink++;
-+      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_mark_inode_dirty(handle, dir);
-+      d_instantiate(dentry, inode);
-+out_stop:
-+      ext3_journal_stop(handle, dir);
-+      return err;
-+
-+out_no_entry:
-+      inode->i_nlink = 0;
-+      ext3_mark_inode_dirty(handle, inode);
-+      iput (inode);
-+      goto out_stop;
-+}
-+
-+/*
-+ * routine to check that the specified directory is empty (for rmdir)
-+ */
-+static int empty_dir (struct inode * inode)
-+{
-+      unsigned long offset;
-+      struct buffer_head * bh;
-+      struct ext3_dir_entry_2 * de, * de1;
-+      struct super_block * sb;
-+      int err;
-+
-+      sb = inode->i_sb;
-+      if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
-+          !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
-+              ext3_warning (inode->i_sb, "empty_dir",
-+                            "bad directory (dir #%lu) - no data block",
-+                            inode->i_ino);
-+              return 1;
-+      }
-+      de = (struct ext3_dir_entry_2 *) bh->b_data;
-+      de1 = (struct ext3_dir_entry_2 *)
-+                      ((char *) de + le16_to_cpu(de->rec_len));
-+      if (le32_to_cpu(de->inode) != inode->i_ino ||
-+                      !le32_to_cpu(de1->inode) || 
-+                      strcmp (".", de->name) ||
-+                      strcmp ("..", de1->name)) {
-+              ext3_warning (inode->i_sb, "empty_dir",
-+                            "bad directory (dir #%lu) - no `.' or `..'",
-+                            inode->i_ino);
-+              brelse (bh);
-+              return 1;
-+      }
-+      offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
-+      de = (struct ext3_dir_entry_2 *)
-+                      ((char *) de1 + le16_to_cpu(de1->rec_len));
-+      while (offset < inode->i_size ) {
-+              if (!bh ||
-+                      (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
-+                      brelse (bh);
-+                      bh = ext3_bread (NULL, inode,
-+                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
-+                      if (!bh) {
-+#if 0
-+                              ext3_error (sb, "empty_dir",
-+                              "directory #%lu contains a hole at offset %lu",
-+                                      inode->i_ino, offset);
-+#endif
-+                              offset += sb->s_blocksize;
-+                              continue;
-+                      }
-+                      de = (struct ext3_dir_entry_2 *) bh->b_data;
-+              }
-+              if (!ext3_check_dir_entry ("empty_dir", inode, de, bh,
-+                                         offset)) {
-+                      brelse (bh);
-+                      return 1;
-+              }
-+              if (le32_to_cpu(de->inode)) {
-+                      brelse (bh);
-+                      return 0;
-+              }
-+              offset += le16_to_cpu(de->rec_len);
-+              de = (struct ext3_dir_entry_2 *)
-+                              ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      brelse (bh);
-+      return 1;
-+}
-+
-+/* ext3_orphan_add() links an unlinked or truncated inode into a list of
-+ * such inodes, starting at the superblock, in case we crash before the
-+ * file is closed/deleted, or in case the inode truncate spans multiple
-+ * transactions and the last transaction is not recovered after a crash.
-+ *
-+ * At filesystem recovery time, we walk this list deleting unlinked
-+ * inodes and truncating linked inodes in ext3_orphan_cleanup().
-+ */
-+int ext3_orphan_add(handle_t *handle, struct inode *inode)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      struct ext3_iloc iloc;
-+      int err = 0, rc;
-+      
-+      lock_super(sb);
-+      if (!list_empty(&inode->u.ext3_i.i_orphan))
-+              goto out_unlock;
-+
-+      /* Orphan handling is only valid for files with data blocks
-+       * being truncated, or files being unlinked. */
-+
-+      /* @@@ FIXME: Observation from aviro:
-+       * I think I can trigger J_ASSERT in ext3_orphan_add().  We block 
-+       * here (on lock_super()), so race with ext3_link() which might bump
-+       * ->i_nlink. For, say it, character device. Not a regular file,
-+       * not a directory, not a symlink and ->i_nlink > 0.
-+       */
-+      J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-+              S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
-+
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-+      if (err)
-+              goto out_unlock;
-+      
-+      err = ext3_reserve_inode_write(handle, inode, &iloc);
-+      if (err)
-+              goto out_unlock;
-+
-+      /* Insert this inode at the head of the on-disk orphan list... */
-+      NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
-+      EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-+      err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-+      rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
-+      if (!err)
-+              err = rc;
-+
-+      /* Only add to the head of the in-memory list if all the
-+       * previous operations succeeded.  If the orphan_add is going to
-+       * fail (possibly taking the journal offline), we can't risk
-+       * leaving the inode on the orphan list: stray orphan-list
-+       * entries can cause panics at unmount time.
-+       *
-+       * This is safe: on error we're going to ignore the orphan list
-+       * anyway on the next recovery. */
-+      if (!err)
-+              list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
-+
-+      jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
-+      jbd_debug(4, "orphan inode %ld will point to %d\n",
-+                      inode->i_ino, NEXT_ORPHAN(inode));
-+out_unlock:
-+      unlock_super(sb);
-+      ext3_std_error(inode->i_sb, err);
-+      return err;
-+}
-+
-+/*
-+ * ext3_orphan_del() removes an unlinked or truncated inode from the list
-+ * of such inodes stored on disk, because it is finally being cleaned up.
-+ */
-+int ext3_orphan_del(handle_t *handle, struct inode *inode)
-+{
-+      struct list_head *prev;
-+      struct ext3_sb_info *sbi;
-+      ino_t ino_next; 
-+      struct ext3_iloc iloc;
-+      int err = 0;
-+      
-+      lock_super(inode->i_sb);
-+      if (list_empty(&inode->u.ext3_i.i_orphan)) {
-+              unlock_super(inode->i_sb);
-+              return 0;
-+      }
-+
-+      ino_next = NEXT_ORPHAN(inode);
-+      prev = inode->u.ext3_i.i_orphan.prev;
-+      sbi = EXT3_SB(inode->i_sb);
-+
-+      jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
-+
-+      list_del(&inode->u.ext3_i.i_orphan);
-+      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
-+
-+      /* If we're on an error path, we may not have a valid
-+       * transaction handle with which to update the orphan list on
-+       * disk, but we still need to remove the inode from the linked
-+       * list in memory. */
-+      if (!handle)
-+              goto out;
-+      
-+      err = ext3_reserve_inode_write(handle, inode, &iloc);
-+      if (err)
-+              goto out_err;
-+
-+      if (prev == &sbi->s_orphan) {
-+              jbd_debug(4, "superblock will point to %ld\n", ino_next);
-+              BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, sbi->s_sbh);
-+              if (err)
-+                      goto out_brelse;
-+              sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-+              err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
-+      } else {
-+              struct ext3_iloc iloc2;
-+              struct inode *i_prev =
-+                      list_entry(prev, struct inode, u.ext3_i.i_orphan);
-+              
-+              jbd_debug(4, "orphan inode %ld will point to %ld\n",
-+                        i_prev->i_ino, ino_next);
-+              err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
-+              if (err)
-+                      goto out_brelse;
-+              NEXT_ORPHAN(i_prev) = ino_next;
-+              err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
-+      }
-+      if (err)
-+              goto out_brelse;
-+      NEXT_ORPHAN(inode) = 0;
-+      err = ext3_mark_iloc_dirty(handle, inode, &iloc);
-+      if (err)
-+              goto out_brelse;
-+
-+out_err:      
-+      ext3_std_error(inode->i_sb, err);
-+out:
-+      unlock_super(inode->i_sb);
-+      return err;
-+
-+out_brelse:
-+      brelse(iloc.bh);
-+      goto out_err;
-+}
-+
-+static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
-+{
-+      int retval;
-+      struct inode * inode;
-+      struct buffer_head * bh;
-+      struct ext3_dir_entry_2 * de;
-+      handle_t *handle;
-+
-+      handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      retval = -ENOENT;
-+      bh = ext3_find_entry (dentry, &de);
-+      if (!bh)
-+              goto end_rmdir;
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = dentry->d_inode;
-+      DQUOT_INIT(inode);
-+
-+      retval = -EIO;
-+      if (le32_to_cpu(de->inode) != inode->i_ino)
-+              goto end_rmdir;
-+
-+      retval = -ENOTEMPTY;
-+      if (!empty_dir (inode))
-+              goto end_rmdir;
-+
-+      retval = ext3_delete_entry(handle, dir, de, bh);
-+      if (retval)
-+              goto end_rmdir;
-+      if (inode->i_nlink != 2)
-+              ext3_warning (inode->i_sb, "ext3_rmdir",
-+                            "empty directory has nlink!=2 (%d)",
-+                            inode->i_nlink);
-+      inode->i_version = ++event;
-+      inode->i_nlink = 0;
-+      /* There's no need to set i_disksize: the fact that i_nlink is
-+       * zero will ensure that the right thing happens during any
-+       * recovery. */
-+      inode->i_size = 0;
-+      ext3_orphan_add(handle, inode);
-+      ext3_mark_inode_dirty(handle, inode);
-+      dir->i_nlink--;
-+      inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-+      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_mark_inode_dirty(handle, dir);
-+
-+end_rmdir:
-+      ext3_journal_stop(handle, dir);
-+      brelse (bh);
-+      return retval;
-+}
-+
-+static int ext3_unlink(struct inode * dir, struct dentry *dentry)
-+{
-+      int retval;
-+      struct inode * inode;
-+      struct buffer_head * bh;
-+      struct ext3_dir_entry_2 * de;
-+      handle_t *handle;
-+
-+      handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      retval = -ENOENT;
-+      bh = ext3_find_entry (dentry, &de);
-+      if (!bh)
-+              goto end_unlink;
-+
-+      inode = dentry->d_inode;
-+      DQUOT_INIT(inode);
-+
-+      retval = -EIO;
-+      if (le32_to_cpu(de->inode) != inode->i_ino)
-+              goto end_unlink;
-+      
-+      if (!inode->i_nlink) {
-+              ext3_warning (inode->i_sb, "ext3_unlink",
-+                            "Deleting nonexistent file (%lu), %d",
-+                            inode->i_ino, inode->i_nlink);
-+              inode->i_nlink = 1;
-+      }
-+      retval = ext3_delete_entry(handle, dir, de, bh);
-+      if (retval)
-+              goto end_unlink;
-+      dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-+      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_mark_inode_dirty(handle, dir);
-+      inode->i_nlink--;
-+      if (!inode->i_nlink)
-+              ext3_orphan_add(handle, inode);
-+      ext3_mark_inode_dirty(handle, inode);
-+      inode->i_ctime = dir->i_ctime;
-+      retval = 0;
-+
-+end_unlink:
-+      ext3_journal_stop(handle, dir);
-+      brelse (bh);
-+      return retval;
-+}
-+
-+static int ext3_symlink (struct inode * dir,
-+              struct dentry *dentry, const char * symname)
-+{
-+      handle_t *handle;
-+      struct inode * inode;
-+      int l, err;
-+
-+      l = strlen(symname)+1;
-+      if (l > dir->i_sb->s_blocksize)
-+              return -ENAMETOOLONG;
-+
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
-+      err = PTR_ERR(inode);
-+      if (IS_ERR(inode))
-+              goto out_stop;
-+
-+      if (l > sizeof (inode->u.ext3_i.i_data)) {
-+              inode->i_op = &page_symlink_inode_operations;
-+              inode->i_mapping->a_ops = &ext3_aops;
-+              /*
-+               * block_symlink() calls back into ext3_prepare/commit_write.
-+               * We have a transaction open.  All is sweetness.  It also sets
-+               * i_size in generic_commit_write().
-+               */
-+              err = block_symlink(inode, symname, l);
-+              if (err)
-+                      goto out_no_entry;
-+      } else {
-+              inode->i_op = &ext3_fast_symlink_inode_operations;
-+              memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
-+              inode->i_size = l-1;
-+      }
-+      inode->u.ext3_i.i_disksize = inode->i_size;
-+      ext3_mark_inode_dirty(handle, inode);
-+      err = ext3_add_nondir(handle, dentry, inode);
-+out_stop:
-+      ext3_journal_stop(handle, dir);
-+      return err;
-+
-+out_no_entry:
-+      ext3_dec_count(handle, inode);
-+      ext3_mark_inode_dirty(handle, inode);
-+      iput (inode);
-+      goto out_stop;
-+}
-+
-+static int ext3_link (struct dentry * old_dentry,
-+              struct inode * dir, struct dentry *dentry)
-+{
-+      handle_t *handle;
-+      struct inode *inode = old_dentry->d_inode;
-+      int err;
-+
-+      if (S_ISDIR(inode->i_mode))
-+              return -EPERM;
-+
-+      if (inode->i_nlink >= EXT3_LINK_MAX)
-+              return -EMLINK;
-+
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode->i_ctime = CURRENT_TIME;
-+      ext3_inc_count(handle, inode);
-+      atomic_inc(&inode->i_count);
-+
-+      ext3_mark_inode_dirty(handle, inode);
-+      err = ext3_add_nondir(handle, dentry, inode);
-+      ext3_journal_stop(handle, dir);
-+      return err;
-+}
-+
-+#define PARENT_INO(buffer) \
-+      ((struct ext3_dir_entry_2 *) ((char *) buffer + \
-+      le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
-+
-+/*
-+ * Anybody can rename anything with this: the permission checks are left to the
-+ * higher-level routines.
-+ */
-+static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
-+                         struct inode * new_dir,struct dentry *new_dentry)
-+{
-+      handle_t *handle;
-+      struct inode * old_inode, * new_inode;
-+      struct buffer_head * old_bh, * new_bh, * dir_bh;
-+      struct ext3_dir_entry_2 * old_de, * new_de;
-+      int retval;
-+
-+      old_bh = new_bh = dir_bh = NULL;
-+
-+      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
-+              handle->h_sync = 1;
-+
-+      old_bh = ext3_find_entry (old_dentry, &old_de);
-+      /*
-+       *  Check for inode number is _not_ due to possible IO errors.
-+       *  We might rmdir the source, keep it as pwd of some process
-+       *  and merrily kill the link to whatever was created under the
-+       *  same name. Goodbye sticky bit ;-<
-+       */
-+      old_inode = old_dentry->d_inode;
-+      retval = -ENOENT;
-+      if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
-+              goto end_rename;
-+
-+      new_inode = new_dentry->d_inode;
-+      new_bh = ext3_find_entry (new_dentry, &new_de);
-+      if (new_bh) {
-+              if (!new_inode) {
-+                      brelse (new_bh);
-+                      new_bh = NULL;
-+              } else {
-+                      DQUOT_INIT(new_inode);
-+              }
-+      }
-+      if (S_ISDIR(old_inode->i_mode)) {
-+              if (new_inode) {
-+                      retval = -ENOTEMPTY;
-+                      if (!empty_dir (new_inode))
-+                              goto end_rename;
-+              }
-+              retval = -EIO;
-+              dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
-+              if (!dir_bh)
-+                      goto end_rename;
-+              if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
-+                      goto end_rename;
-+              retval = -EMLINK;
-+              if (!new_inode && new_dir!=old_dir &&
-+                              new_dir->i_nlink >= EXT3_LINK_MAX)
-+                      goto end_rename;
-+      }
-+      if (!new_bh) {
-+              retval = ext3_add_entry (handle, new_dentry, old_inode);
-+              if (retval)
-+                      goto end_rename;
-+      } else {
-+              BUFFER_TRACE(new_bh, "get write access");
-+              BUFFER_TRACE(new_bh, "get_write_access");
-+              ext3_journal_get_write_access(handle, new_bh);
-+              new_de->inode = le32_to_cpu(old_inode->i_ino);
-+              if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
-+                                            EXT3_FEATURE_INCOMPAT_FILETYPE))
-+                      new_de->file_type = old_de->file_type;
-+              new_dir->i_version = ++event;
-+              BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
-+              ext3_journal_dirty_metadata(handle, new_bh);
-+              brelse(new_bh);
-+              new_bh = NULL;
-+      }
-+
-+      /*
-+       * Like most other Unix systems, set the ctime for inodes on a
-+       * rename.
-+       */
-+      old_inode->i_ctime = CURRENT_TIME;
-+      ext3_mark_inode_dirty(handle, old_inode);
-+
-+      /*
-+       * ok, that's it
-+       */
-+      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+
-+      if (new_inode) {
-+              new_inode->i_nlink--;
-+              new_inode->i_ctime = CURRENT_TIME;
-+      }
-+      old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
-+      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      if (dir_bh) {
-+              BUFFER_TRACE(dir_bh, "get_write_access");
-+              ext3_journal_get_write_access(handle, dir_bh);
-+              PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
-+              BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
-+              ext3_journal_dirty_metadata(handle, dir_bh);
-+              old_dir->i_nlink--;
-+              if (new_inode) {
-+                      new_inode->i_nlink--;
-+              } else {
-+                      new_dir->i_nlink++;
-+                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                      ext3_mark_inode_dirty(handle, new_dir);
-+              }
-+      }
-+      ext3_mark_inode_dirty(handle, old_dir);
-+      if (new_inode) {
-+              ext3_mark_inode_dirty(handle, new_inode);
-+              if (!new_inode->i_nlink)
-+                      ext3_orphan_add(handle, new_inode);
-+      }
-+      retval = 0;
-+
-+end_rename:
-+      brelse (dir_bh);
-+      brelse (old_bh);
-+      brelse (new_bh);
-+      ext3_journal_stop(handle, old_dir);
-+      return retval;
-+}
-+
-+/*
-+ * directories can handle most operations...
-+ */
-+struct inode_operations ext3_dir_inode_operations = {
-+      create:         ext3_create,            /* BKL held */
-+      lookup:         ext3_lookup,            /* BKL held */
-+      link:           ext3_link,              /* BKL held */
-+      unlink:         ext3_unlink,            /* BKL held */
-+      symlink:        ext3_symlink,           /* BKL held */
-+      mkdir:          ext3_mkdir,             /* BKL held */
-+      rmdir:          ext3_rmdir,             /* BKL held */
-+      mknod:          ext3_mknod,             /* BKL held */
-+      rename:         ext3_rename,            /* BKL held */
-+};
-diff -rup --new-file linux.mcp2/fs/ext3/super.c linux_tmp/fs/ext3/super.c
---- linux.mcp2/fs/ext3/super.c 1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/super.c  2002-02-25 11:38:08.000000000 -0800
-@@ -0,0 +1,1753 @@
-+/*
-+ *  linux/fs/ext3/super.c
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/inode.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  Big-endian to little-endian byte-swapping/bitmaps by
-+ *        David S. Miller (davem@caip.rutgers.edu), 1995
-+ */
-+
-+#include <linux/config.h>
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/fs.h>
-+#include <linux/sched.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/slab.h>
-+#include <linux/init.h>
-+#include <linux/locks.h>
-+#include <linux/blkdev.h>
-+#include <linux/smp_lock.h>
-+#include <linux/random.h>
-+#include <asm/uaccess.h>
-+
-+#ifdef CONFIG_JBD_DEBUG
-+static int ext3_ro_after; /* Make fs read-only after this many jiffies */
-+#endif
-+
-+static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
-+static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
-+                             int);
-+static void ext3_commit_super (struct super_block * sb,
-+                             struct ext3_super_block * es,
-+                             int sync);
-+static void ext3_mark_recovery_complete(struct super_block * sb,
-+                                      struct ext3_super_block * es);
-+static void ext3_clear_journal_err(struct super_block * sb,
-+                                 struct ext3_super_block * es);
-+
-+#ifdef CONFIG_JBD_DEBUG
-+int journal_no_write[2];
-+
-+/*
-+ * Debug code for turning filesystems "read-only" after a specified
-+ * amount of time.  This is for crash/recovery testing.
-+ */
-+
-+static void make_rdonly(kdev_t dev, int *no_write)
-+{
-+      if (dev) {
-+              printk(KERN_WARNING "Turning device %s read-only\n", 
-+                     bdevname(dev));
-+              *no_write = 0xdead0000 + dev;
-+      }
-+}
-+
-+static void turn_fs_readonly(unsigned long arg)
-+{
-+      struct super_block *sb = (struct super_block *)arg;
-+
-+      make_rdonly(sb->s_dev, &journal_no_write[0]);
-+      make_rdonly(EXT3_SB(sb)->s_journal->j_dev, &journal_no_write[1]);
-+      wake_up(&EXT3_SB(sb)->ro_wait_queue);
-+}
-+
-+static void setup_ro_after(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      init_timer(&sbi->turn_ro_timer);
-+      if (ext3_ro_after) {
-+              printk(KERN_DEBUG "fs will go read-only in %d jiffies\n",
-+                     ext3_ro_after);
-+              init_waitqueue_head(&sbi->ro_wait_queue);
-+              journal_no_write[0] = 0;
-+              journal_no_write[1] = 0;
-+              sbi->turn_ro_timer.function = turn_fs_readonly;
-+              sbi->turn_ro_timer.data = (unsigned long)sb;
-+              sbi->turn_ro_timer.expires = jiffies + ext3_ro_after;
-+              ext3_ro_after = 0;
-+              add_timer(&sbi->turn_ro_timer);
-+      }
-+}
-+
-+static void clear_ro_after(struct super_block *sb)
-+{
-+      del_timer_sync(&EXT3_SB(sb)->turn_ro_timer);
-+      journal_no_write[0] = 0;
-+      journal_no_write[1] = 0;
-+      ext3_ro_after = 0;
-+}
-+#else
-+#define setup_ro_after(sb)    do {} while (0)
-+#define clear_ro_after(sb)    do {} while (0)
-+#endif
-+
-+
-+static char error_buf[1024];
-+
-+/* Determine the appropriate response to ext3_error on a given filesystem */
-+
-+static int ext3_error_behaviour(struct super_block *sb)
-+{
-+      /* First check for mount-time options */
-+      if (test_opt (sb, ERRORS_PANIC))
-+              return EXT3_ERRORS_PANIC;
-+      if (test_opt (sb, ERRORS_RO))
-+              return EXT3_ERRORS_RO;
-+      if (test_opt (sb, ERRORS_CONT))
-+              return EXT3_ERRORS_CONTINUE;
-+      
-+      /* If no overrides were specified on the mount, then fall back
-+       * to the default behaviour set in the filesystem's superblock
-+       * on disk. */
-+      switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) {
-+      case EXT3_ERRORS_PANIC:
-+              return EXT3_ERRORS_PANIC;
-+      case EXT3_ERRORS_RO:
-+              return EXT3_ERRORS_RO;
-+      default:
-+              break;
-+      }
-+      return EXT3_ERRORS_CONTINUE;
-+}
-+
-+/* Deal with the reporting of failure conditions on a filesystem such as
-+ * inconsistencies detected or read IO failures.
-+ *
-+ * On ext2, we can store the error state of the filesystem in the
-+ * superblock.  That is not possible on ext3, because we may have other
-+ * write ordering constraints on the superblock which prevent us from
-+ * writing it out straight away; and given that the journal is about to
-+ * be aborted, we can't rely on the current, or future, transactions to
-+ * write out the superblock safely.
-+ *
-+ * We'll just use the journal_abort() error code to record an error in
-+ * the journal instead.  On recovery, the journal will compain about
-+ * that error until we've noted it down and cleared it.
-+ */
-+
-+static void ext3_handle_error(struct super_block *sb)
-+{
-+      struct ext3_super_block *es = EXT3_SB(sb)->s_es;
-+
-+      EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
-+      es->s_state |= cpu_to_le32(EXT3_ERROR_FS);
-+
-+      if (sb->s_flags & MS_RDONLY)
-+              return;
-+
-+      if (ext3_error_behaviour(sb) != EXT3_ERRORS_CONTINUE) {
-+              EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
-+              journal_abort(EXT3_SB(sb)->s_journal, -EIO);
-+      }
-+
-+      if (ext3_error_behaviour(sb) == EXT3_ERRORS_PANIC) 
-+              panic ("EXT3-fs (device %s): panic forced after error\n",
-+                     bdevname(sb->s_dev));
-+
-+      if (ext3_error_behaviour(sb) == EXT3_ERRORS_RO) {
-+              printk (KERN_CRIT "Remounting filesystem read-only\n");
-+              sb->s_flags |= MS_RDONLY;
-+      }
-+
-+      ext3_commit_super(sb, es, 1);
-+}
-+
-+void ext3_error (struct super_block * sb, const char * function,
-+               const char * fmt, ...)
-+{
-+      va_list args;
-+
-+      va_start (args, fmt);
-+      vsprintf (error_buf, fmt, args);
-+      va_end (args);
-+
-+      printk (KERN_CRIT "EXT3-fs error (device %s): %s: %s\n",
-+              bdevname(sb->s_dev), function, error_buf);
-+
-+      ext3_handle_error(sb);
-+}
-+
-+const char *ext3_decode_error(struct super_block * sb, int errno, char nbuf[16])
-+{
-+      char *errstr = NULL;
-+      
-+      switch (errno) {
-+      case -EIO:
-+              errstr = "IO failure";
-+              break;
-+      case -ENOMEM:
-+              errstr = "Out of memory";
-+              break;
-+      case -EROFS:
-+              if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
-+                      errstr = "Journal has aborted";
-+              else
-+                      errstr = "Readonly filesystem";
-+              break;
-+      default:
-+              /* If the caller passed in an extra buffer for unknown
-+               * errors, textualise them now.  Else we just return
-+               * NULL. */
-+              if (nbuf) {
-+                      /* Check for truncated error codes... */
-+                      if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
-+                              errstr = nbuf;
-+              }
-+              
-+              break;
-+      }
-+
-+      return errstr;
-+}
-+
-+/* __ext3_std_error decodes expected errors from journaling functions
-+ * automatically and invokes the appropriate error response.  */
-+
-+void __ext3_std_error (struct super_block * sb, const char * function,
-+                     int errno)
-+{
-+      char nbuf[16];
-+      const char *errstr = ext3_decode_error(sb, errno, nbuf);
-+
-+      printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
-+              bdevname(sb->s_dev), function, errstr);
-+      
-+      ext3_handle_error(sb);
-+}
-+
-+/*
-+ * ext3_abort is a much stronger failure handler than ext3_error.  The
-+ * abort function may be used to deal with unrecoverable failures such
-+ * as journal IO errors or ENOMEM at a critical moment in log management.
-+ *
-+ * We unconditionally force the filesystem into an ABORT|READONLY state,
-+ * unless the error response on the fs has been set to panic in which
-+ * case we take the easy way out and panic immediately.
-+ */
-+
-+void ext3_abort (struct super_block * sb, const char * function,
-+               const char * fmt, ...)
-+{
-+      va_list args;
-+
-+      printk (KERN_CRIT "ext3_abort called.\n");
-+
-+      va_start (args, fmt);
-+      vsprintf (error_buf, fmt, args);
-+      va_end (args);
-+
-+      if (ext3_error_behaviour(sb) == EXT3_ERRORS_PANIC)
-+              panic ("EXT3-fs panic (device %s): %s: %s\n",
-+                     bdevname(sb->s_dev), function, error_buf);
-+
-+      printk (KERN_CRIT "EXT3-fs abort (device %s): %s: %s\n",
-+              bdevname(sb->s_dev), function, error_buf);
-+
-+      if (sb->s_flags & MS_RDONLY)
-+              return;
-+      
-+      printk (KERN_CRIT "Remounting filesystem read-only\n");
-+      sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
-+      sb->s_flags |= MS_RDONLY;
-+      sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT;
-+      journal_abort(EXT3_SB(sb)->s_journal, -EIO);
-+}
-+
-+/* Deal with the reporting of failure conditions while running, such as
-+ * inconsistencies in operation or invalid system states.
-+ *
-+ * Use ext3_error() for cases of invalid filesystem states, as that will
-+ * record an error on disk and force a filesystem check on the next boot.
-+ */
-+NORET_TYPE void ext3_panic (struct super_block * sb, const char * function,
-+                          const char * fmt, ...)
-+{
-+      va_list args;
-+
-+      va_start (args, fmt);
-+      vsprintf (error_buf, fmt, args);
-+      va_end (args);
-+
-+      /* this is to prevent panic from syncing this filesystem */
-+      /* AKPM: is this sufficient? */
-+      sb->s_flags |= MS_RDONLY;
-+      panic ("EXT3-fs panic (device %s): %s: %s\n",
-+             bdevname(sb->s_dev), function, error_buf);
-+}
-+
-+void ext3_warning (struct super_block * sb, const char * function,
-+                 const char * fmt, ...)
-+{
-+      va_list args;
-+
-+      va_start (args, fmt);
-+      vsprintf (error_buf, fmt, args);
-+      va_end (args);
-+      printk (KERN_WARNING "EXT3-fs warning (device %s): %s: %s\n",
-+              bdevname(sb->s_dev), function, error_buf);
-+}
-+
-+void ext3_update_dynamic_rev(struct super_block *sb)
-+{
-+      struct ext3_super_block *es = EXT3_SB(sb)->s_es;
-+
-+      if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
-+              return;
-+
-+      ext3_warning(sb, __FUNCTION__,
-+                   "updating to rev %d because of new feature flag, "
-+                   "running e2fsck is recommended",
-+                   EXT3_DYNAMIC_REV);
-+
-+      es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
-+      es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
-+      es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
-+      /* leave es->s_feature_*compat flags alone */
-+      /* es->s_uuid will be set by e2fsck if empty */
-+
-+      /*
-+       * The rest of the superblock fields should be zero, and if not it
-+       * means they are likely already in use, so leave them alone.  We
-+       * can leave it up to e2fsck to clean up any inconsistencies there.
-+       */
-+}
-+
-+/*
-+ * Open the external journal device
-+ */
-+static struct block_device *ext3_blkdev_get(kdev_t dev)
-+{
-+      struct block_device *bdev;
-+      int err = -ENODEV;
-+
-+      bdev = bdget(kdev_t_to_nr(dev));
-+      if (bdev == NULL)
-+              goto fail;
-+      err = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS);
-+      if (err < 0)
-+              goto fail;
-+      return bdev;
-+
-+fail:
-+      printk(KERN_ERR "EXT3: failed to open journal device %s: %d\n",
-+                      bdevname(dev), err);
-+      return NULL;
-+}
-+
-+/*
-+ * Release the journal device
-+ */
-+static int ext3_blkdev_put(struct block_device *bdev)
-+{
-+      return blkdev_put(bdev, BDEV_FS);
-+}
-+
-+static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
-+{
-+      struct block_device *bdev;
-+      int ret = -ENODEV;
-+
-+      bdev = sbi->journal_bdev;
-+      if (bdev) {
-+              ret = ext3_blkdev_put(bdev);
-+              sbi->journal_bdev = 0;
-+      }
-+      return ret;
-+}
-+
-+#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan)
-+
-+static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
-+{
-+      struct list_head *l;
-+      
-+      printk(KERN_ERR "sb orphan head is %d\n", 
-+             le32_to_cpu(sbi->s_es->s_last_orphan));
-+      
-+      printk(KERN_ERR "sb_info orphan list:\n");
-+      list_for_each(l, &sbi->s_orphan) {
-+              struct inode *inode = orphan_list_entry(l);
-+              printk(KERN_ERR "  "
-+                     "inode 0x%04x:%ld at %p: mode %o, nlink %d, next %d\n",
-+                     inode->i_dev, inode->i_ino, inode,
-+                     inode->i_mode, inode->i_nlink, 
-+                     le32_to_cpu(NEXT_ORPHAN(inode)));
-+      }
-+}
-+
-+void ext3_put_super (struct super_block * sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct ext3_super_block *es = sbi->s_es;
-+      kdev_t j_dev = sbi->s_journal->j_dev;
-+      int i;
-+
-+      journal_destroy(sbi->s_journal);
-+      if (!(sb->s_flags & MS_RDONLY)) {
-+              EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+              es->s_state = le16_to_cpu(sbi->s_mount_state);
-+              BUFFER_TRACE(sbi->s_sbh, "marking dirty");
-+              mark_buffer_dirty(sbi->s_sbh);
-+              ext3_commit_super(sb, es, 1);
-+      }
-+
-+      for (i = 0; i < sbi->s_gdb_count; i++)
-+              brelse(sbi->s_group_desc[i]);
-+      kfree(sbi->s_group_desc);
-+      for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
-+              brelse(sbi->s_inode_bitmap[i]);
-+      for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
-+              brelse(sbi->s_block_bitmap[i]);
-+      brelse(sbi->s_sbh);
-+
-+      /* Debugging code just in case the in-memory inode orphan list
-+       * isn't empty.  The on-disk one can be non-empty if we've
-+       * detected an error and taken the fs readonly, but the
-+       * in-memory list had better be clean by this point. */
-+      if (!list_empty(&sbi->s_orphan))
-+              dump_orphan_list(sb, sbi);
-+      J_ASSERT(list_empty(&sbi->s_orphan));
-+
-+      invalidate_buffers(sb->s_dev);
-+      if (j_dev != sb->s_dev) {
-+              /*
-+               * Invalidate the journal device's buffers.  We don't want them
-+               * floating about in memory - the physical journal device may
-+               * hotswapped, and it breaks the `ro-after' testing code.
-+               */
-+              fsync_no_super(j_dev);
-+              invalidate_buffers(j_dev);
-+              ext3_blkdev_remove(sbi);
-+      }
-+      clear_ro_after(sb);
-+
-+      return;
-+}
-+
-+static struct super_operations ext3_sops = {
-+      read_inode:     ext3_read_inode,        /* BKL held */
-+      write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
-+      dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
-+      put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
-+      delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
-+      put_super:      ext3_put_super,         /* BKL held */
-+      write_super:    ext3_write_super,       /* BKL held */
-+      write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-+      unlockfs:       ext3_unlockfs,          /* BKL not held.  We take it */
-+      statfs:         ext3_statfs,            /* BKL held */
-+      remount_fs:     ext3_remount,           /* BKL held */
-+};
-+
-+static int want_value(char *value, char *option)
-+{
-+      if (!value || !*value) {
-+              printk(KERN_NOTICE "EXT3-fs: the %s option needs an argument\n",
-+                     option);
-+              return -1;
-+      }
-+      return 0;
-+}
-+
-+static int want_null_value(char *value, char *option)
-+{
-+      if (*value) {
-+              printk(KERN_NOTICE "EXT3-fs: Invalid %s argument: %s\n",
-+                     option, value);
-+              return -1;
-+      }
-+      return 0;
-+}
-+
-+static int want_numeric(char *value, char *option, unsigned long *number)
-+{
-+      if (want_value(value, option))
-+              return -1;
-+      *number = simple_strtoul(value, &value, 0);
-+      if (want_null_value(value, option))
-+              return -1;
-+      return 0;
-+}
-+
-+/*
-+ * This function has been shamelessly adapted from the msdos fs
-+ */
-+static int parse_options (char * options, unsigned long * sb_block,
-+                        struct ext3_sb_info *sbi,
-+                        unsigned long * inum,
-+                        int is_remount)
-+{
-+      unsigned long *mount_options = &sbi->s_mount_opt;
-+      uid_t *resuid = &sbi->s_resuid;
-+      gid_t *resgid = &sbi->s_resgid;
-+      char * this_char;
-+      char * value;
-+
-+      if (!options)
-+              return 1;
-+      for (this_char = strtok (options, ",");
-+           this_char != NULL;
-+           this_char = strtok (NULL, ",")) {
-+              if ((value = strchr (this_char, '=')) != NULL)
-+                      *value++ = 0;
-+              if (!strcmp (this_char, "bsddf"))
-+                      clear_opt (*mount_options, MINIX_DF);
-+              else if (!strcmp (this_char, "nouid32")) {
-+                      set_opt (*mount_options, NO_UID32);
-+              }
-+              else if (!strcmp (this_char, "abort"))
-+                      set_opt (*mount_options, ABORT);
-+              else if (!strcmp (this_char, "check")) {
-+                      if (!value || !*value || !strcmp (value, "none"))
-+                              clear_opt (*mount_options, CHECK);
-+                      else
-+#ifdef CONFIG_EXT3_CHECK
-+                              set_opt (*mount_options, CHECK);
-+#else
-+                              printk(KERN_ERR 
-+                                     "EXT3 Check option not supported\n");
-+#endif
-+              }
-+              else if (!strcmp (this_char, "debug"))
-+                      set_opt (*mount_options, DEBUG);
-+              else if (!strcmp (this_char, "errors")) {
-+                      if (want_value(value, "errors"))
-+                              return 0;
-+                      if (!strcmp (value, "continue")) {
-+                              clear_opt (*mount_options, ERRORS_RO);
-+                              clear_opt (*mount_options, ERRORS_PANIC);
-+                              set_opt (*mount_options, ERRORS_CONT);
-+                      }
-+                      else if (!strcmp (value, "remount-ro")) {
-+                              clear_opt (*mount_options, ERRORS_CONT);
-+                              clear_opt (*mount_options, ERRORS_PANIC);
-+                              set_opt (*mount_options, ERRORS_RO);
-+                      }
-+                      else if (!strcmp (value, "panic")) {
-+                              clear_opt (*mount_options, ERRORS_CONT);
-+                              clear_opt (*mount_options, ERRORS_RO);
-+                              set_opt (*mount_options, ERRORS_PANIC);
-+                      }
-+                      else {
-+                              printk (KERN_ERR
-+                                      "EXT3-fs: Invalid errors option: %s\n",
-+                                      value);
-+                              return 0;
-+                      }
-+              }
-+              else if (!strcmp (this_char, "grpid") ||
-+                       !strcmp (this_char, "bsdgroups"))
-+                      set_opt (*mount_options, GRPID);
-+              else if (!strcmp (this_char, "minixdf"))
-+                      set_opt (*mount_options, MINIX_DF);
-+              else if (!strcmp (this_char, "nocheck"))
-+                      clear_opt (*mount_options, CHECK);
-+              else if (!strcmp (this_char, "nogrpid") ||
-+                       !strcmp (this_char, "sysvgroups"))
-+                      clear_opt (*mount_options, GRPID);
-+              else if (!strcmp (this_char, "resgid")) {
-+                      unsigned long v;
-+                      if (want_numeric(value, "resgid", &v))
-+                              return 0;
-+                      *resgid = v;
-+              }
-+              else if (!strcmp (this_char, "resuid")) {
-+                      unsigned long v;
-+                      if (want_numeric(value, "resuid", &v))
-+                              return 0;
-+                      *resuid = v;
-+              }
-+              else if (!strcmp (this_char, "sb")) {
-+                      if (want_numeric(value, "sb", sb_block))
-+                              return 0;
-+              }
-+#ifdef CONFIG_JBD_DEBUG
-+              else if (!strcmp (this_char, "ro-after")) {
-+                      unsigned long v;
-+                      if (want_numeric(value, "ro-after", &v))
-+                              return 0;
-+                      ext3_ro_after = v;
-+              }
-+#endif
-+              /* Silently ignore the quota options */
-+              else if (!strcmp (this_char, "grpquota")
-+                       || !strcmp (this_char, "noquota")
-+                       || !strcmp (this_char, "quota")
-+                       || !strcmp (this_char, "usrquota"))
-+                      /* Don't do anything ;-) */ ;
-+              else if (!strcmp (this_char, "journal")) {
-+                      /* @@@ FIXME */
-+                      /* Eventually we will want to be able to create
-+                           a journal file here.  For now, only allow the
-+                           user to specify an existing inode to be the
-+                           journal file. */
-+                      if (is_remount) {
-+                              printk(KERN_ERR "EXT3-fs: cannot specify "
-+                                     "journal on remount\n");
-+                              return 0;
-+                      }
-+
-+                      if (want_value(value, "journal"))
-+                              return 0;
-+                      if (!strcmp (value, "update"))
-+                              set_opt (*mount_options, UPDATE_JOURNAL);
-+                      else if (want_numeric(value, "journal", inum))
-+                              return 0;
-+              }
-+              else if (!strcmp (this_char, "noload"))
-+                      set_opt (*mount_options, NOLOAD);
-+              else if (!strcmp (this_char, "data")) {
-+                      int data_opt = 0;
-+
-+                      if (want_value(value, "data"))
-+                              return 0;
-+                      if (!strcmp (value, "journal"))
-+                              data_opt = EXT3_MOUNT_JOURNAL_DATA;
-+                      else if (!strcmp (value, "ordered"))
-+                              data_opt = EXT3_MOUNT_ORDERED_DATA;
-+                      else if (!strcmp (value, "writeback"))
-+                              data_opt = EXT3_MOUNT_WRITEBACK_DATA;
-+                      else {
-+                              printk (KERN_ERR 
-+                                      "EXT3-fs: Invalid data option: %s\n",
-+                                      value);
-+                              return 0;
-+                      }
-+                      if (is_remount) {
-+                              if ((*mount_options & EXT3_MOUNT_DATA_FLAGS) !=
-+                                                      data_opt) {
-+                                      printk(KERN_ERR
-+                                             "EXT3-fs: cannot change data "
-+                                             "mode on remount\n");
-+                                      return 0;
-+                              }
-+                      } else {
-+                              *mount_options &= ~EXT3_MOUNT_DATA_FLAGS;
-+                              *mount_options |= data_opt;
-+                      }
-+              } else {
-+                      printk (KERN_ERR 
-+                              "EXT3-fs: Unrecognized mount option %s\n",
-+                              this_char);
-+                      return 0;
-+              }
-+      }
-+      return 1;
-+}
-+
-+static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
-+                          int read_only)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int res = 0;
-+
-+      if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
-+              printk (KERN_ERR "EXT3-fs warning: revision level too high, "
-+                      "forcing read-only mode\n");
-+              res = MS_RDONLY;
-+      }
-+      if (read_only)
-+              return res;
-+      if (!(sbi->s_mount_state & EXT3_VALID_FS))
-+              printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
-+                      "running e2fsck is recommended\n");
-+      else if ((sbi->s_mount_state & EXT3_ERROR_FS))
-+              printk (KERN_WARNING
-+                      "EXT3-fs warning: mounting fs with errors, "
-+                      "running e2fsck is recommended\n");
-+      else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
-+               le16_to_cpu(es->s_mnt_count) >=
-+               (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
-+              printk (KERN_WARNING
-+                      "EXT3-fs warning: maximal mount count reached, "
-+                      "running e2fsck is recommended\n");
-+      else if (le32_to_cpu(es->s_checkinterval) &&
-+              (le32_to_cpu(es->s_lastcheck) +
-+                      le32_to_cpu(es->s_checkinterval) <= CURRENT_TIME))
-+              printk (KERN_WARNING
-+                      "EXT3-fs warning: checktime reached, "
-+                      "running e2fsck is recommended\n");
-+#if 0
-+              /* @@@ We _will_ want to clear the valid bit if we find
-+                   inconsistencies, to force a fsck at reboot.  But for
-+                   a plain journaled filesystem we can keep it set as
-+                   valid forever! :) */
-+      es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
-+#endif
-+      if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
-+              es->s_max_mnt_count =
-+                      (__s16) cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
-+      es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
-+      es->s_mtime = cpu_to_le32(CURRENT_TIME);
-+      ext3_update_dynamic_rev(sb);
-+      EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+      ext3_commit_super (sb, es, 1);
-+      if (test_opt (sb, DEBUG))
-+              printk (KERN_INFO
-+                      "[EXT3 FS %s, %s, bs=%lu, gc=%lu, "
-+                      "bpg=%lu, ipg=%lu, mo=%04lx]\n",
-+                      EXT3FS_VERSION, EXT3FS_DATE, sb->s_blocksize,
-+                      sbi->s_groups_count,
-+                      EXT3_BLOCKS_PER_GROUP(sb),
-+                      EXT3_INODES_PER_GROUP(sb),
-+                      sbi->s_mount_opt);
-+      printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
-+                              bdevname(sb->s_dev));
-+      if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
-+              printk("external journal on %s\n",
-+                              bdevname(EXT3_SB(sb)->s_journal->j_dev));
-+      } else {
-+              printk("internal journal\n");
-+      }
-+#ifdef CONFIG_EXT3_CHECK
-+      if (test_opt (sb, CHECK)) {
-+              ext3_check_blocks_bitmap (sb);
-+              ext3_check_inodes_bitmap (sb);
-+      }
-+#endif
-+      setup_ro_after(sb);
-+      return res;
-+}
-+
-+static int ext3_check_descriptors (struct super_block * sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
-+      struct ext3_group_desc * gdp = NULL;
-+      int desc_block = 0;
-+      int i;
-+
-+      ext3_debug ("Checking group descriptors");
-+
-+      for (i = 0; i < sbi->s_groups_count; i++)
-+      {
-+              if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
-+                      gdp = (struct ext3_group_desc *)
-+                                      sbi->s_group_desc[desc_block++]->b_data;
-+              if (le32_to_cpu(gdp->bg_block_bitmap) < block ||
-+                  le32_to_cpu(gdp->bg_block_bitmap) >=
-+                              block + EXT3_BLOCKS_PER_GROUP(sb))
-+              {
-+                      ext3_error (sb, "ext3_check_descriptors",
-+                                  "Block bitmap for group %d"
-+                                  " not in group (block %lu)!",
-+                                  i, (unsigned long)
-+                                      le32_to_cpu(gdp->bg_block_bitmap));
-+                      return 0;
-+              }
-+              if (le32_to_cpu(gdp->bg_inode_bitmap) < block ||
-+                  le32_to_cpu(gdp->bg_inode_bitmap) >=
-+                              block + EXT3_BLOCKS_PER_GROUP(sb))
-+              {
-+                      ext3_error (sb, "ext3_check_descriptors",
-+                                  "Inode bitmap for group %d"
-+                                  " not in group (block %lu)!",
-+                                  i, (unsigned long)
-+                                      le32_to_cpu(gdp->bg_inode_bitmap));
-+                      return 0;
-+              }
-+              if (le32_to_cpu(gdp->bg_inode_table) < block ||
-+                  le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >=
-+                  block + EXT3_BLOCKS_PER_GROUP(sb))
-+              {
-+                      ext3_error (sb, "ext3_check_descriptors",
-+                                  "Inode table for group %d"
-+                                  " not in group (block %lu)!",
-+                                  i, (unsigned long)
-+                                      le32_to_cpu(gdp->bg_inode_table));
-+                      return 0;
-+              }
-+              block += EXT3_BLOCKS_PER_GROUP(sb);
-+              gdp++;
-+      }
-+      return 1;
-+}
-+
-+
-+/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
-+ * the superblock) which were deleted from all directories, but held open by
-+ * a process at the time of a crash.  We walk the list and try to delete these
-+ * inodes at recovery time (only with a read-write filesystem).
-+ *
-+ * In order to keep the orphan inode chain consistent during traversal (in
-+ * case of crash during recovery), we link each inode into the superblock
-+ * orphan list_head and handle it the same way as an inode deletion during
-+ * normal operation (which journals the operations for us).
-+ *
-+ * We only do an iget() and an iput() on each inode, which is very safe if we
-+ * accidentally point at an in-use or already deleted inode.  The worst that
-+ * can happen in this case is that we get a "bit already cleared" message from
-+ * ext3_free_inode().  The only reason we would point at a wrong inode is if
-+ * e2fsck was run on this filesystem, and it must have already done the orphan
-+ * inode cleanup for us, so we can safely abort without any further action.
-+ */
-+static void ext3_orphan_cleanup (struct super_block * sb,
-+                               struct ext3_super_block * es)
-+{
-+      unsigned int s_flags = sb->s_flags;
-+      int nr_orphans = 0, nr_truncates = 0;
-+      if (!es->s_last_orphan) {
-+              jbd_debug(4, "no orphan inodes to clean up\n");
-+              return;
-+      }
-+
-+      if (s_flags & MS_RDONLY) {
-+              printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
-+                     bdevname(sb->s_dev));
-+              sb->s_flags &= ~MS_RDONLY;
-+      }
-+
-+      if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) {
-+              if (es->s_last_orphan)
-+                      jbd_debug(1, "Errors on filesystem, "
-+                                "clearing orphan list.\n");
-+              es->s_last_orphan = 0;
-+              jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
-+              return;
-+      }
-+
-+      while (es->s_last_orphan) {
-+              struct inode *inode;
-+
-+              if (!(inode =
-+                    ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
-+                      es->s_last_orphan = 0;
-+                      break;
-+              }
-+
-+              list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-+              if (inode->i_nlink) {
-+                      printk(KERN_DEBUG __FUNCTION__
-+                              ": truncating inode %ld to %Ld bytes\n",
-+                              inode->i_ino, inode->i_size);
-+                      jbd_debug(2, "truncating inode %ld to %Ld bytes\n",
-+                                inode->i_ino, inode->i_size);
-+                      ext3_truncate(inode);
-+                      nr_truncates++;
-+              } else {
-+                      printk(KERN_DEBUG __FUNCTION__
-+                              ": deleting unreferenced inode %ld\n",
-+                              inode->i_ino);
-+                      jbd_debug(2, "deleting unreferenced inode %ld\n",
-+                                inode->i_ino);
-+                      nr_orphans++;
-+              }
-+              iput(inode);  /* The delete magic happens here! */
-+      }
-+
-+#define PLURAL(x) (x), ((x)==1) ? "" : "s"
-+
-+      if (nr_orphans)
-+              printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
-+                     bdevname(sb->s_dev), PLURAL(nr_orphans));
-+      if (nr_truncates)
-+              printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
-+                     bdevname(sb->s_dev), PLURAL(nr_truncates));
-+      sb->s_flags = s_flags; /* Restore MS_RDONLY status */
-+}
-+
-+#define log2(n) ffz(~(n))
-+
-+/*
-+ * Maximal file size.  There is a direct, and {,double-,triple-}indirect
-+ * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
-+ * We need to be 1 filesystem block less than the 2^32 sector limit.
-+ */
-+static loff_t ext3_max_size(int bits)
-+{
-+      loff_t res = EXT3_NDIR_BLOCKS;
-+      res += 1LL << (bits-2);
-+      res += 1LL << (2*(bits-2));
-+      res += 1LL << (3*(bits-2));
-+      res <<= bits;
-+      if (res > (512LL << 32) - (1 << bits))
-+              res = (512LL << 32) - (1 << bits);
-+      return res;
-+}
-+
-+struct super_block * ext3_read_super (struct super_block * sb, void * data,
-+                                    int silent)
-+{
-+      struct buffer_head * bh;
-+      struct ext3_super_block *es = 0;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      unsigned long sb_block = 1;
-+      unsigned long logic_sb_block = 1;
-+      unsigned long offset = 0;
-+      unsigned long journal_inum = 0;
-+      kdev_t dev = sb->s_dev;
-+      int blocksize;
-+      int hblock;
-+      int db_count;
-+      int i;
-+      int needs_recovery;
-+
-+#ifdef CONFIG_JBD_DEBUG
-+      ext3_ro_after = 0;
-+#endif
-+      /*
-+       * See what the current blocksize for the device is, and
-+       * use that as the blocksize.  Otherwise (or if the blocksize
-+       * is smaller than the default) use the default.
-+       * This is important for devices that have a hardware
-+       * sectorsize that is larger than the default.
-+       */
-+      blocksize = EXT3_MIN_BLOCK_SIZE;
-+      hblock = get_hardsect_size(dev);
-+      if (blocksize < hblock)
-+              blocksize = hblock;
-+
-+      sbi->s_mount_opt = 0;
-+      sbi->s_resuid = EXT3_DEF_RESUID;
-+      sbi->s_resgid = EXT3_DEF_RESGID;
-+      if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) {
-+              sb->s_dev = 0;
-+              goto out_fail;
-+      }
-+
-+      sb->s_blocksize = blocksize;
-+      set_blocksize (dev, blocksize);
-+
-+      /*
-+       * The ext3 superblock will not be buffer aligned for other than 1kB
-+       * block sizes.  We need to calculate the offset from buffer start.
-+       */
-+      if (blocksize != EXT3_MIN_BLOCK_SIZE) {
-+              logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-+              offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
-+      }
-+
-+      if (!(bh = sb_bread(sb, logic_sb_block))) {
-+              printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
-+              goto out_fail;
-+      }
-+      /*
-+       * Note: s_es must be initialized as soon as possible because
-+       *       some ext3 macro-instructions depend on its value
-+       */
-+      es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
-+      sbi->s_es = es;
-+      sb->s_magic = le16_to_cpu(es->s_magic);
-+      if (sb->s_magic != EXT3_SUPER_MAGIC) {
-+              if (!silent)
-+                      printk(KERN_ERR 
-+                             "VFS: Can't find ext3 filesystem on dev %s.\n",
-+                             bdevname(dev));
-+              goto failed_mount;
-+      }
-+      if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
-+          (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
-+           EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
-+           EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
-+              printk(KERN_WARNING 
-+                     "EXT3-fs warning: feature flags set on rev 0 fs, "
-+                     "running e2fsck is recommended\n");
-+      /*
-+       * Check feature flags regardless of the revision level, since we
-+       * previously didn't change the revision level when setting the flags,
-+       * so there is a chance incompat flags are set on a rev 0 filesystem.
-+       */
-+      if ((i = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))) {
-+              printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
-+                     "unsupported optional features (%x).\n",
-+                     bdevname(dev), i);
-+              goto failed_mount;
-+      }
-+      if (!(sb->s_flags & MS_RDONLY) &&
-+          (i = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))){
-+              printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
-+                     "unsupported optional features (%x).\n",
-+                     bdevname(dev), i);
-+              goto failed_mount;
-+      }
-+      sb->s_blocksize_bits = le32_to_cpu(es->s_log_block_size) + 10;
-+      sb->s_blocksize = 1 << sb->s_blocksize_bits;
-+
-+      if (sb->s_blocksize < EXT3_MIN_BLOCK_SIZE ||
-+          sb->s_blocksize > EXT3_MAX_BLOCK_SIZE) {
-+              printk(KERN_ERR 
-+                     "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
-+                     blocksize, bdevname(dev));
-+              goto failed_mount;
-+      }
-+
-+      sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
-+
-+      if (sb->s_blocksize != blocksize) {
-+              blocksize = sb->s_blocksize;
-+
-+              /*
-+               * Make sure the blocksize for the filesystem is larger
-+               * than the hardware sectorsize for the machine.
-+               */
-+              if (sb->s_blocksize < hblock) {
-+                      printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
-+                             "device blocksize %d.\n", blocksize, hblock);
-+                      goto failed_mount;
-+              }
-+
-+              brelse (bh);
-+              set_blocksize (dev, sb->s_blocksize);
-+              logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-+              offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
-+              bh = sb_bread(sb, logic_sb_block);
-+              if (!bh) {
-+                      printk(KERN_ERR 
-+                             "EXT3-fs: Can't read superblock on 2nd try.\n");
-+                      return NULL;
-+              }
-+              es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
-+              sbi->s_es = es;
-+              if (es->s_magic != le16_to_cpu(EXT3_SUPER_MAGIC)) {
-+                      printk (KERN_ERR 
-+                              "EXT3-fs: Magic mismatch, very weird !\n");
-+                      goto failed_mount;
-+              }
-+      }
-+
-+      if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
-+              sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
-+              sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
-+      } else {
-+              sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
-+              sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
-+              if (sbi->s_inode_size != EXT3_GOOD_OLD_INODE_SIZE) {
-+                      printk (KERN_ERR
-+                              "EXT3-fs: unsupported inode size: %d\n",
-+                              sbi->s_inode_size);
-+                      goto failed_mount;
-+              }
-+      }
-+      sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
-+                                 le32_to_cpu(es->s_log_frag_size);
-+      if (blocksize != sbi->s_frag_size) {
-+              printk(KERN_ERR
-+                     "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
-+                     sbi->s_frag_size, blocksize);
-+              goto failed_mount;
-+      }
-+      sbi->s_frags_per_block = 1;
-+      sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
-+      sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
-+      sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-+      sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
-+      sbi->s_itb_per_group = sbi->s_inodes_per_group /sbi->s_inodes_per_block;
-+      sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
-+      sbi->s_sbh = bh;
-+      if (sbi->s_resuid == EXT3_DEF_RESUID)
-+              sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
-+      if (sbi->s_resgid == EXT3_DEF_RESGID)
-+              sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
-+      sbi->s_mount_state = le16_to_cpu(es->s_state);
-+      sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-+      sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
-+
-+      if (sbi->s_blocks_per_group > blocksize * 8) {
-+              printk (KERN_ERR
-+                      "EXT3-fs: #blocks per group too big: %lu\n",
-+                      sbi->s_blocks_per_group);
-+              goto failed_mount;
-+      }
-+      if (sbi->s_frags_per_group > blocksize * 8) {
-+              printk (KERN_ERR
-+                      "EXT3-fs: #fragments per group too big: %lu\n",
-+                      sbi->s_frags_per_group);
-+              goto failed_mount;
-+      }
-+      if (sbi->s_inodes_per_group > blocksize * 8) {
-+              printk (KERN_ERR
-+                      "EXT3-fs: #inodes per group too big: %lu\n",
-+                      sbi->s_inodes_per_group);
-+              goto failed_mount;
-+      }
-+
-+      sbi->s_groups_count = (le32_to_cpu(es->s_blocks_count) -
-+                             le32_to_cpu(es->s_first_data_block) +
-+                             EXT3_BLOCKS_PER_GROUP(sb) - 1) /
-+                            EXT3_BLOCKS_PER_GROUP(sb);
-+      db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
-+                 EXT3_DESC_PER_BLOCK(sb);
-+      sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
-+                                  GFP_KERNEL);
-+      if (sbi->s_group_desc == NULL) {
-+              printk (KERN_ERR "EXT3-fs: not enough memory\n");
-+              goto failed_mount;
-+      }
-+      for (i = 0; i < db_count; i++) {
-+              sbi->s_group_desc[i] = sb_bread(sb, logic_sb_block + i + 1);
-+              if (!sbi->s_group_desc[i]) {
-+                      printk (KERN_ERR "EXT3-fs: "
-+                              "can't read group descriptor %d\n", i);
-+                      db_count = i;
-+                      goto failed_mount2;
-+              }
-+      }
-+      if (!ext3_check_descriptors (sb)) {
-+              printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n");
-+              goto failed_mount2;
-+      }
-+      for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) {
-+              sbi->s_inode_bitmap_number[i] = 0;
-+              sbi->s_inode_bitmap[i] = NULL;
-+              sbi->s_block_bitmap_number[i] = 0;
-+              sbi->s_block_bitmap[i] = NULL;
-+      }
-+      sbi->s_loaded_inode_bitmaps = 0;
-+      sbi->s_loaded_block_bitmaps = 0;
-+      sbi->s_gdb_count = db_count;
-+      get_random_bytes(&sbi->s_next_generation, sizeof(u32));
-+      /*
-+       * set up enough so that it can read an inode
-+       */
-+      sb->s_op = &ext3_sops;
-+      INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
-+
-+      sb->s_root = 0;
-+
-+      needs_recovery = (es->s_last_orphan != 0 ||
-+                        EXT3_HAS_INCOMPAT_FEATURE(sb,
-+                                  EXT3_FEATURE_INCOMPAT_RECOVER));
-+
-+      /*
-+       * The first inode we look at is the journal inode.  Don't try
-+       * root first: it may be modified in the journal!
-+       */
-+      if (!test_opt(sb, NOLOAD) &&
-+          EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-+              if (ext3_load_journal(sb, es))
-+                      goto failed_mount2;
-+      } else if (journal_inum) {
-+              if (ext3_create_journal(sb, es, journal_inum))
-+                      goto failed_mount2;
-+      } else {
-+              if (!silent)
-+                      printk (KERN_ERR
-+                              "ext3: No journal on filesystem on %s\n",
-+                              bdevname(dev));
-+              goto failed_mount2;
-+      }
-+
-+      /* We have now updated the journal if required, so we can
-+       * validate the data journaling mode. */
-+      switch (test_opt(sb, DATA_FLAGS)) {
-+      case 0:
-+              /* No mode set, assume a default based on the journal
-+                   capabilities: ORDERED_DATA if the journal can
-+                   cope, else JOURNAL_DATA */
-+              if (journal_check_available_features
-+                  (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
-+                      set_opt(sbi->s_mount_opt, ORDERED_DATA);
-+              else
-+                      set_opt(sbi->s_mount_opt, JOURNAL_DATA);
-+              break;
-+
-+      case EXT3_MOUNT_ORDERED_DATA:
-+      case EXT3_MOUNT_WRITEBACK_DATA:
-+              if (!journal_check_available_features
-+                  (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
-+                      printk(KERN_ERR "EXT3-fs: Journal does not support "
-+                             "requested data journaling mode\n");
-+                      goto failed_mount3;
-+              }
-+      default:
-+              break;
-+      }
-+
-+      /*
-+       * The journal_load will have done any necessary log recovery,
-+       * so we can safely mount the rest of the filesystem now.
-+       */
-+
-+      sb->s_root = d_alloc_root(iget(sb, EXT3_ROOT_INO));
-+      if (!sb->s_root || !S_ISDIR(sb->s_root->d_inode->i_mode) ||
-+          !sb->s_root->d_inode->i_blocks || !sb->s_root->d_inode->i_size) {
-+              if (sb->s_root) {
-+                      dput(sb->s_root);
-+                      sb->s_root = NULL;
-+                      printk(KERN_ERR
-+                             "EXT3-fs: corrupt root inode, run e2fsck\n");
-+              } else
-+                      printk(KERN_ERR "EXT3-fs: get root inode failed\n");
-+              goto failed_mount3;
-+      }
-+
-+      ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
-+      /*
-+       * akpm: core read_super() calls in here with the superblock locked.
-+       * That deadlocks, because orphan cleanup needs to lock the superblock
-+       * in numerous places.  Here we just pop the lock - it's relatively
-+       * harmless, because we are now ready to accept write_super() requests,
-+       * and aviro says that's the only reason for hanging onto the
-+       * superblock lock.
-+       */
-+      EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
-+      unlock_super(sb);       /* akpm: sigh */
-+      ext3_orphan_cleanup(sb, es);
-+      lock_super(sb);
-+      EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
-+      if (needs_recovery)
-+              printk (KERN_INFO "EXT3-fs: recovery complete.\n");
-+      ext3_mark_recovery_complete(sb, es);
-+      printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
-+              test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
-+              test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
-+              "writeback");
-+
-+      return sb;
-+
-+failed_mount3:
-+      journal_destroy(sbi->s_journal);
-+failed_mount2:
-+      for (i = 0; i < db_count; i++)
-+              brelse(sbi->s_group_desc[i]);
-+      kfree(sbi->s_group_desc);
-+failed_mount:
-+      ext3_blkdev_remove(sbi);
-+      brelse(bh);
-+out_fail:
-+      return NULL;
-+}
-+
-+static journal_t *ext3_get_journal(struct super_block *sb, int journal_inum)
-+{
-+      struct inode *journal_inode;
-+      journal_t *journal;
-+
-+      /* First, test for the existence of a valid inode on disk.  Bad
-+       * things happen if we iget() an unused inode, as the subsequent
-+       * iput() will try to delete it. */
-+
-+      journal_inode = iget(sb, journal_inum);
-+      if (!journal_inode) {
-+              printk(KERN_ERR "EXT3-fs: no journal found.\n");
-+              return NULL;
-+      }
-+      if (!journal_inode->i_nlink) {
-+              make_bad_inode(journal_inode);
-+              iput(journal_inode);
-+              printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
-+              return NULL;
-+      }
-+
-+      jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
-+                journal_inode, journal_inode->i_size);
-+      if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
-+              printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
-+              iput(journal_inode);
-+              return NULL;
-+      }
-+
-+      journal = journal_init_inode(journal_inode);
-+      if (!journal) {
-+              printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
-+              iput(journal_inode);
-+      }
-+      
-+      return journal;
-+}
-+
-+static journal_t *ext3_get_dev_journal(struct super_block *sb,
-+                                     int dev)
-+{
-+      struct buffer_head * bh;
-+      journal_t *journal;
-+      int start;
-+      int len;
-+      int hblock, blocksize;
-+      unsigned long sb_block;
-+      unsigned long offset;
-+      kdev_t journal_dev = to_kdev_t(dev);
-+      struct ext3_super_block * es;
-+      struct block_device *bdev;
-+
-+      bdev = ext3_blkdev_get(journal_dev);
-+      if (bdev == NULL)
-+              return NULL;
-+
-+      blocksize = sb->s_blocksize;
-+      hblock = get_hardsect_size(journal_dev);
-+      if (blocksize < hblock) {
-+              printk(KERN_ERR
-+                      "EXT3-fs: blocksize too small for journal device.\n");
-+              goto out_bdev;
-+      }
-+      
-+      sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
-+      offset = EXT3_MIN_BLOCK_SIZE % blocksize;
-+      set_blocksize(dev, blocksize);
-+      if (!(bh = bread(dev, sb_block, blocksize))) {
-+              printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
-+                     "external journal\n");
-+              goto out_bdev;
-+      }
-+
-+      es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
-+      if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
-+          !(le32_to_cpu(es->s_feature_incompat) &
-+            EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-+              printk(KERN_ERR "EXT3-fs: external journal has "
-+                                      "bad superblock\n");
-+              brelse(bh);
-+              goto out_bdev;
-+      }
-+
-+      if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-+              printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
-+              brelse(bh);
-+              goto out_bdev;
-+      }
-+
-+      len = le32_to_cpu(es->s_blocks_count);
-+      start = sb_block + 1;
-+      brelse(bh);     /* we're done with the superblock */
-+
-+      journal = journal_init_dev(journal_dev, sb->s_dev, 
-+                                      start, len, blocksize);
-+      if (!journal) {
-+              printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
-+              goto out_bdev;
-+      }
-+      ll_rw_block(READ, 1, &journal->j_sb_buffer);
-+      wait_on_buffer(journal->j_sb_buffer);
-+      if (!buffer_uptodate(journal->j_sb_buffer)) {
-+              printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
-+              goto out_journal;
-+      }
-+      if (ntohl(journal->j_superblock->s_nr_users) != 1) {
-+              printk(KERN_ERR "EXT3-fs: External journal has more than one "
-+                                      "user (unsupported) - %d\n",
-+                      ntohl(journal->j_superblock->s_nr_users));
-+              goto out_journal;
-+      }
-+      EXT3_SB(sb)->journal_bdev = bdev;
-+      return journal;
-+out_journal:
-+      journal_destroy(journal);
-+out_bdev:
-+      ext3_blkdev_put(bdev);
-+      return NULL;
-+}
-+
-+static int ext3_load_journal(struct super_block * sb,
-+                           struct ext3_super_block * es)
-+{
-+      journal_t *journal;
-+      int journal_inum = le32_to_cpu(es->s_journal_inum);
-+      int journal_dev = le32_to_cpu(es->s_journal_dev);
-+      int err = 0;
-+      int really_read_only;
-+
-+      really_read_only = is_read_only(sb->s_dev);
-+
-+      /*
-+       * Are we loading a blank journal or performing recovery after a
-+       * crash?  For recovery, we need to check in advance whether we
-+       * can get read-write access to the device.
-+       */
-+
-+      if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
-+              if (sb->s_flags & MS_RDONLY) {
-+                      printk(KERN_INFO "EXT3-fs: INFO: recovery "
-+                                      "required on readonly filesystem.\n");
-+                      if (really_read_only) {
-+                              printk(KERN_ERR "EXT3-fs: write access "
-+                                      "unavailable, cannot proceed.\n");
-+                              return -EROFS;
-+                      }
-+                      printk (KERN_INFO "EXT3-fs: write access will "
-+                                      "be enabled during recovery.\n");
-+              }
-+      }
-+
-+      if (journal_inum && journal_dev) {
-+              printk(KERN_ERR "EXT3-fs: filesystem has both journal "
-+                     "and inode journals!\n");
-+              return -EINVAL;
-+      }
-+
-+      if (journal_inum) {
-+              if (!(journal = ext3_get_journal(sb, journal_inum)))
-+                      return -EINVAL;
-+      } else {
-+              if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
-+                      return -EINVAL;
-+      }
-+      
-+
-+      if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
-+              err = journal_update_format(journal);
-+              if (err)  {
-+                      printk(KERN_ERR "EXT3-fs: error updating journal.\n");
-+                      journal_destroy(journal);
-+                      return err;
-+              }
-+      }
-+
-+      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
-+              err = journal_wipe(journal, !really_read_only);
-+      if (!err)
-+              err = journal_load(journal);
-+
-+      if (err) {
-+              printk(KERN_ERR "EXT3-fs: error loading journal.\n");
-+              journal_destroy(journal);
-+              return err;
-+      }
-+
-+      EXT3_SB(sb)->s_journal = journal;
-+      ext3_clear_journal_err(sb, es);
-+      return 0;
-+}
-+
-+static int ext3_create_journal(struct super_block * sb,
-+                             struct ext3_super_block * es,
-+                             int journal_inum)
-+{
-+      journal_t *journal;
-+
-+      if (sb->s_flags & MS_RDONLY) {
-+              printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
-+                              "create journal.\n");
-+              return -EROFS;
-+      }
-+
-+      if (!(journal = ext3_get_journal(sb, journal_inum)))
-+              return -EINVAL;
-+
-+      printk(KERN_INFO "EXT3-fs: creating new journal on inode %d\n",
-+             journal_inum);
-+
-+      if (journal_create(journal)) {
-+              printk(KERN_ERR "EXT3-fs: error creating journal.\n");
-+              journal_destroy(journal);
-+              return -EIO;
-+      }
-+
-+      EXT3_SB(sb)->s_journal = journal;
-+
-+      ext3_update_dynamic_rev(sb);
-+      EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+      EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
-+
-+      es->s_journal_inum = cpu_to_le32(journal_inum);
-+      sb->s_dirt = 1;
-+
-+      /* Make sure we flush the recovery flag to disk. */
-+      ext3_commit_super(sb, es, 1);
-+
-+      return 0;
-+}
-+
-+static void ext3_commit_super (struct super_block * sb,
-+                             struct ext3_super_block * es,
-+                             int sync)
-+{
-+      es->s_wtime = cpu_to_le32(CURRENT_TIME);
-+      BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty");
-+      mark_buffer_dirty(sb->u.ext3_sb.s_sbh);
-+      if (sync) {
-+              ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh);
-+              wait_on_buffer(sb->u.ext3_sb.s_sbh);
-+      }
-+}
-+
-+
-+/*
-+ * Have we just finished recovery?  If so, and if we are mounting (or
-+ * remounting) the filesystem readonly, then we will end up with a
-+ * consistent fs on disk.  Record that fact.
-+ */
-+static void ext3_mark_recovery_complete(struct super_block * sb,
-+                                      struct ext3_super_block * es)
-+{
-+      journal_flush(EXT3_SB(sb)->s_journal);
-+      if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
-+          sb->s_flags & MS_RDONLY) {
-+              EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+              sb->s_dirt = 0;
-+              ext3_commit_super(sb, es, 1);
-+      }
-+}
-+
-+/*
-+ * If we are mounting (or read-write remounting) a filesystem whose journal
-+ * has recorded an error from a previous lifetime, move that error to the
-+ * main filesystem now.
-+ */
-+static void ext3_clear_journal_err(struct super_block * sb,
-+                                 struct ext3_super_block * es)
-+{
-+      journal_t *journal;
-+      int j_errno;
-+      const char *errstr;
-+      
-+      journal = EXT3_SB(sb)->s_journal;
-+
-+      /*
-+       * Now check for any error status which may have been recorded in the
-+       * journal by a prior ext3_error() or ext3_abort()
-+       */
-+
-+      j_errno = journal_errno(journal);
-+      if (j_errno) {
-+              char nbuf[16];
-+              
-+              errstr = ext3_decode_error(sb, j_errno, nbuf);
-+              ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
-+                           "from previous mount: %s", errstr);
-+              ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
-+                           "filesystem check.");
-+              
-+              sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
-+              es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
-+              ext3_commit_super (sb, es, 1);
-+
-+              journal_clear_err(journal);
-+      }
-+}
-+
-+/*
-+ * Force the running and committing transactions to commit,
-+ * and wait on the commit.
-+ */
-+int ext3_force_commit(struct super_block *sb)
-+{
-+      journal_t *journal;
-+      int ret;
-+
-+      if (sb->s_flags & MS_RDONLY)
-+              return 0;
-+
-+      journal = EXT3_SB(sb)->s_journal;
-+      sb->s_dirt = 0;
-+      lock_kernel();  /* important: lock down j_running_transaction */
-+      ret = ext3_journal_force_commit(journal);
-+      unlock_kernel();
-+      return ret;
-+}
-+
-+/*
-+ * Ext3 always journals updates to the superblock itself, so we don't
-+ * have to propagate any other updates to the superblock on disk at this
-+ * point.  Just start an async writeback to get the buffers on their way
-+ * to the disk.
-+ *
-+ * This implicitly triggers the writebehind on sync().
-+ */
-+
-+static int do_sync_supers = 0;
-+MODULE_PARM(do_sync_supers, "i");
-+MODULE_PARM_DESC(do_sync_supers, "Write superblocks synchronously");
-+
-+void ext3_write_super (struct super_block * sb)
-+{
-+      tid_t target;
-+      
-+      if (down_trylock(&sb->s_lock) == 0)
-+              BUG();          /* aviro detector */
-+      sb->s_dirt = 0;
-+      target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-+
-+      if (do_sync_supers) {
-+              unlock_super(sb);
-+              log_wait_commit(EXT3_SB(sb)->s_journal, target);
-+              lock_super(sb);
-+      }
-+}
-+
-+/*
-+ * LVM calls this function before a (read-only) snapshot is created.  This
-+ * gives us a chance to flush the journal completely and mark the fs clean.
-+ */
-+void ext3_write_super_lockfs(struct super_block *sb)
-+{
-+      sb->s_dirt = 0;
-+
-+      lock_kernel();          /* 2.4.5 forgot to do this for us */
-+      if (!(sb->s_flags & MS_RDONLY)) {
-+              journal_t *journal = EXT3_SB(sb)->s_journal;
-+
-+              /* Now we set up the journal barrier. */
-+              journal_lock_updates(journal);
-+              journal_flush(journal);
-+
-+              /* Journal blocked and flushed, clear needs_recovery flag. */
-+              EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+              ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
-+      }
-+      unlock_kernel();
-+}
-+
-+/*
-+ * Called by LVM after the snapshot is done.  We need to reset the RECOVER
-+ * flag here, even though the filesystem is not technically dirty yet.
-+ */
-+void ext3_unlockfs(struct super_block *sb)
-+{
-+      if (!(sb->s_flags & MS_RDONLY)) {
-+              lock_kernel();
-+              lock_super(sb);
-+              /* Reser the needs_recovery flag before the fs is unlocked. */
-+              EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+              ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
-+              unlock_super(sb);
-+              journal_unlock_updates(EXT3_SB(sb)->s_journal);
-+              unlock_kernel();
-+      }
-+}
-+
-+int ext3_remount (struct super_block * sb, int * flags, char * data)
-+{
-+      struct ext3_super_block * es;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      unsigned long tmp;
-+
-+      clear_ro_after(sb);
-+
-+      /*
-+       * Allow the "check" option to be passed as a remount option.
-+       */
-+      if (!parse_options(data, &tmp, sbi, &tmp, 1))
-+              return -EINVAL;
-+
-+      if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-+              ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-+
-+      es = sbi->s_es;
-+
-+      if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
-+              if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-+                      return -EROFS;
-+
-+              if (*flags & MS_RDONLY) {
-+                      /*
-+                       * First of all, the unconditional stuff we have to do
-+                       * to disable replay of the journal when we next remount
-+                       */
-+                      sb->s_flags |= MS_RDONLY;
-+
-+                      /*
-+                       * OK, test if we are remounting a valid rw partition
-+                       * readonly, and if so set the rdonly flag and then
-+                       * mark the partition as valid again.
-+                       */
-+                      if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
-+                          (sbi->s_mount_state & EXT3_VALID_FS))
-+                              es->s_state = cpu_to_le16(sbi->s_mount_state);
-+
-+                      ext3_mark_recovery_complete(sb, es);
-+              } else {
-+                      int ret;
-+                      if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
-+                                      ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
-+                              printk(KERN_WARNING "EXT3-fs: %s: couldn't "
-+                                     "remount RDWR because of unsupported "
-+                                     "optional features (%x).\n",
-+                                     bdevname(sb->s_dev), ret);
-+                              return -EROFS;
-+                      }
-+                      /*
-+                       * Mounting a RDONLY partition read-write, so reread
-+                       * and store the current valid flag.  (It may have
-+                       * been changed by e2fsck since we originally mounted
-+                       * the partition.)
-+                       */
-+                      ext3_clear_journal_err(sb, es);
-+                      sbi->s_mount_state = le16_to_cpu(es->s_state);
-+                      if (!ext3_setup_super (sb, es, 0))
-+                              sb->s_flags &= ~MS_RDONLY;
-+              }
-+      }
-+      setup_ro_after(sb);
-+      return 0;
-+}
-+
-+int ext3_statfs (struct super_block * sb, struct statfs * buf)
-+{
-+      struct ext3_super_block *es = EXT3_SB(sb)->s_es;
-+      unsigned long overhead;
-+      int i;
-+
-+      if (test_opt (sb, MINIX_DF))
-+              overhead = 0;
-+      else {
-+              /*
-+               * Compute the overhead (FS structures)
-+               */
-+
-+              /*
-+               * All of the blocks before first_data_block are
-+               * overhead
-+               */
-+              overhead = le32_to_cpu(es->s_first_data_block);
-+
-+              /*
-+               * Add the overhead attributed to the superblock and
-+               * block group descriptors.  If the sparse superblocks
-+               * feature is turned on, then not all groups have this.
-+               */
-+              for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
-+                      overhead += ext3_bg_has_super(sb, i) +
-+                              ext3_bg_num_gdb(sb, i);
-+
-+              /*
-+               * Every block group has an inode bitmap, a block
-+               * bitmap, and an inode table.
-+               */
-+              overhead += (EXT3_SB(sb)->s_groups_count *
-+                           (2 + EXT3_SB(sb)->s_itb_per_group));
-+      }
-+
-+      buf->f_type = EXT3_SUPER_MAGIC;
-+      buf->f_bsize = sb->s_blocksize;
-+      buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
-+      buf->f_bfree = ext3_count_free_blocks (sb);
-+      buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
-+      if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
-+              buf->f_bavail = 0;
-+      buf->f_files = le32_to_cpu(es->s_inodes_count);
-+      buf->f_ffree = ext3_count_free_inodes (sb);
-+      buf->f_namelen = EXT3_NAME_LEN;
-+      return 0;
-+}
-+
-+static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
-+
-+static int __init init_ext3_fs(void)
-+{
-+        return register_filesystem(&ext3_fs_type);
-+}
-+
-+static void __exit exit_ext3_fs(void)
-+{
-+      unregister_filesystem(&ext3_fs_type);
-+}
-+
-+EXPORT_NO_SYMBOLS;
-+
-+MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-+MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
-+MODULE_LICENSE("GPL");
-+module_init(init_ext3_fs)
-+module_exit(exit_ext3_fs)
-diff -rup --new-file linux.mcp2/fs/ext3/symlink.c linux_tmp/fs/ext3/symlink.c
---- linux.mcp2/fs/ext3/symlink.c       1969-12-31 16:00:00.000000000 -0800
-+++ linux_tmp/fs/ext3/symlink.c        2001-11-09 14:25:04.000000000 -0800
-@@ -0,0 +1,39 @@
-+/*
-+ *  linux/fs/ext3/symlink.c
-+ *
-+ * Only fast symlinks left here - the rest is done by generic code. AV, 1999
-+ *
-+ * Copyright (C) 1992, 1993, 1994, 1995
-+ * Remy Card (card@masi.ibp.fr)
-+ * Laboratoire MASI - Institut Blaise Pascal
-+ * Universite Pierre et Marie Curie (Paris VI)
-+ *
-+ *  from
-+ *
-+ *  linux/fs/minix/symlink.c
-+ *
-+ *  Copyright (C) 1991, 1992  Linus Torvalds
-+ *
-+ *  ext3 symlink handling code
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+
-+static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen)
-+{
-+      char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
-+      return vfs_readlink(dentry, buffer, buflen, s);
-+}
-+
-+static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
-+{
-+      char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
-+      return vfs_follow_link(nd, s);
-+}
-+
-+struct inode_operations ext3_fast_symlink_inode_operations = {
-+      readlink:       ext3_readlink,          /* BKL not held.  Don't need */
-+      follow_link:    ext3_follow_link,       /* BKL not held.  Don't need */
-+};
diff --git a/lustre/kernel_patches/patches/2.4.19-jbd.patch b/lustre/kernel_patches/patches/2.4.19-jbd.patch
deleted file mode 100644 (file)
index 4f4b38e..0000000
+++ /dev/null
@@ -1,6524 +0,0 @@
-diff -ruP linux.mcp2/fs/jbd/Makefile linuxppc_2.4.19_final/fs/jbd/Makefile
---- linux.mcp2/fs/jbd/Makefile 1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/Makefile      2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,15 @@
-+#
-+# fs/jbd/Makefile
-+# 
-+# Makefile for the linux journaling routines.
-+#
-+
-+export-objs := journal.o
-+O_TARGET := jbd.o
-+
-+obj-y   := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
-+
-+obj-m   := $(O_TARGET)
-+
-+include $(TOPDIR)/Rules.make
-+
-diff -ruP linux.mcp2/fs/jbd/checkpoint.c linuxppc_2.4.19_final/fs/jbd/checkpoint.c
---- linux.mcp2/fs/jbd/checkpoint.c     1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/checkpoint.c  2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,605 @@
-+/*
-+ * linux/fs/checkpoint.c
-+ * 
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
-+ *
-+ * Copyright 1999 Red Hat Software --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Checkpoint routines for the generic filesystem journaling code.  
-+ * Part of the ext2fs journaling system.  
-+ *
-+ * Checkpointing is the process of ensuring that a section of the log is
-+ * committed fully to disk, so that that portion of the log can be
-+ * reused.
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+
-+extern spinlock_t journal_datalist_lock;
-+
-+/*
-+ * Unlink a buffer from a transaction. 
-+ *
-+ * Called with journal_datalist_lock held.
-+ */
-+
-+static inline void __buffer_unlink(struct journal_head *jh)
-+{
-+      transaction_t *transaction;
-+
-+      transaction = jh->b_cp_transaction;
-+      jh->b_cp_transaction = NULL;
-+
-+      jh->b_cpnext->b_cpprev = jh->b_cpprev;
-+      jh->b_cpprev->b_cpnext = jh->b_cpnext;
-+      if (transaction->t_checkpoint_list == jh)
-+              transaction->t_checkpoint_list = jh->b_cpnext;
-+      if (transaction->t_checkpoint_list == jh)
-+              transaction->t_checkpoint_list = NULL;
-+}
-+
-+/*
-+ * Try to release a checkpointed buffer from its transaction.
-+ * Returns 1 if we released it.
-+ * Requires journal_datalist_lock
-+ */
-+static int __try_to_free_cp_buf(struct journal_head *jh)
-+{
-+      int ret = 0;
-+      struct buffer_head *bh = jh2bh(jh);
-+
-+      if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
-+              JBUFFER_TRACE(jh, "remove from checkpoint list");
-+              __journal_remove_checkpoint(jh);
-+              __journal_remove_journal_head(bh);
-+              BUFFER_TRACE(bh, "release");
-+              /* BUF_LOCKED -> BUF_CLEAN (fwiw) */
-+              refile_buffer(bh);
-+              __brelse(bh);
-+              ret = 1;
-+      }
-+      return ret;
-+}
-+
-+/*
-+ * log_wait_for_space: wait until there is space in the journal.
-+ *
-+ * Called with the journal already locked, but it will be unlocked if we have
-+ * to wait for a checkpoint to free up some space in the log.
-+ */
-+
-+void log_wait_for_space(journal_t *journal, int nblocks)
-+{
-+      while (log_space_left(journal) < nblocks) {
-+              if (journal->j_flags & JFS_ABORT)
-+                      return;
-+              unlock_journal(journal);
-+              down(&journal->j_checkpoint_sem);
-+              lock_journal(journal);
-+              
-+              /* Test again, another process may have checkpointed
-+               * while we were waiting for the checkpoint lock */
-+              if (log_space_left(journal) < nblocks) {
-+                      log_do_checkpoint(journal, nblocks);
-+              }
-+              up(&journal->j_checkpoint_sem);
-+      }
-+}
-+
-+/*
-+ * Clean up a transaction's checkpoint list.  
-+ *
-+ * We wait for any pending IO to complete and make sure any clean
-+ * buffers are removed from the transaction. 
-+ *
-+ * Return 1 if we performed any actions which might have destroyed the
-+ * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
-+ * the last checkpoint buffer is cleansed)
-+ *
-+ * Called with the journal locked.
-+ * Called with journal_datalist_lock held.
-+ */
-+static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
-+{
-+      struct journal_head *jh, *next_jh, *last_jh;
-+      struct buffer_head *bh;
-+      int ret = 0;
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+      jh = transaction->t_checkpoint_list;
-+      if (!jh)
-+              return 0;
-+
-+      last_jh = jh->b_cpprev;
-+      next_jh = jh;
-+      do {
-+              jh = next_jh;
-+              bh = jh2bh(jh);
-+              if (buffer_locked(bh)) {
-+                      atomic_inc(&bh->b_count);
-+                      spin_unlock(&journal_datalist_lock);
-+                      unlock_journal(journal);
-+                      wait_on_buffer(bh);
-+                      /* the journal_head may have gone by now */
-+                      BUFFER_TRACE(bh, "brelse");
-+                      __brelse(bh);
-+                      goto out_return_1;
-+              }
-+              
-+              if (jh->b_transaction != NULL) {
-+                      transaction_t *transaction = jh->b_transaction;
-+                      tid_t tid = transaction->t_tid;
-+
-+                      spin_unlock(&journal_datalist_lock);
-+                      log_start_commit(journal, transaction);
-+                      unlock_journal(journal);
-+                      log_wait_commit(journal, tid);
-+                      goto out_return_1;
-+              }
-+
-+              /*
-+               * We used to test for (jh->b_list != BUF_CLEAN) here.
-+               * But unmap_underlying_metadata() can place buffer onto
-+               * BUF_CLEAN. Since refile_buffer() no longer takes buffers
-+               * off checkpoint lists, we cope with it here
-+               */
-+              /*
-+               * AKPM: I think the buffer_jdirty test is redundant - it
-+               * shouldn't have NULL b_transaction?
-+               */
-+              next_jh = jh->b_cpnext;
-+              if (!buffer_dirty(bh) && !buffer_jdirty(bh)) {
-+                      BUFFER_TRACE(bh, "remove from checkpoint");
-+                      __journal_remove_checkpoint(jh);
-+                      __journal_remove_journal_head(bh);
-+                      refile_buffer(bh);
-+                      __brelse(bh);
-+                      ret = 1;
-+              }
-+              
-+              jh = next_jh;
-+      } while (jh != last_jh);
-+
-+      return ret;
-+out_return_1:
-+      lock_journal(journal);
-+      spin_lock(&journal_datalist_lock);
-+      return 1;
-+}
-+
-+#define NR_BATCH      64
-+
-+static void __flush_batch(struct buffer_head **bhs, int *batch_count)
-+{
-+      int i;
-+
-+      spin_unlock(&journal_datalist_lock);
-+      ll_rw_block(WRITE, *batch_count, bhs);
-+      run_task_queue(&tq_disk);
-+      spin_lock(&journal_datalist_lock);
-+      for (i = 0; i < *batch_count; i++) {
-+              struct buffer_head *bh = bhs[i];
-+              clear_bit(BH_JWrite, &bh->b_state);
-+              BUFFER_TRACE(bh, "brelse");
-+              __brelse(bh);
-+      }
-+      *batch_count = 0;
-+}
-+
-+/*
-+ * Try to flush one buffer from the checkpoint list to disk.
-+ *
-+ * Return 1 if something happened which requires us to abort the current
-+ * scan of the checkpoint list.  
-+ *
-+ * Called with journal_datalist_lock held.
-+ */
-+static int __flush_buffer(journal_t *journal, struct journal_head *jh,
-+                      struct buffer_head **bhs, int *batch_count,
-+                      int *drop_count)
-+{
-+      struct buffer_head *bh = jh2bh(jh);
-+      int ret = 0;
-+
-+      if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
-+              J_ASSERT_JH(jh, jh->b_transaction == NULL);
-+              
-+              /*
-+               * Important: we are about to write the buffer, and
-+               * possibly block, while still holding the journal lock.
-+               * We cannot afford to let the transaction logic start
-+               * messing around with this buffer before we write it to
-+               * disk, as that would break recoverability.  
-+               */
-+              BUFFER_TRACE(bh, "queue");
-+              atomic_inc(&bh->b_count);
-+              J_ASSERT_BH(bh, !test_bit(BH_JWrite, &bh->b_state));
-+              set_bit(BH_JWrite, &bh->b_state);
-+              bhs[*batch_count] = bh;
-+              (*batch_count)++;
-+              if (*batch_count == NR_BATCH) {
-+                      __flush_batch(bhs, batch_count);
-+                      ret = 1;
-+              }
-+      } else {
-+              int last_buffer = 0;
-+              if (jh->b_cpnext == jh) {
-+                      /* We may be about to drop the transaction.  Tell the
-+                       * caller that the lists have changed.
-+                       */
-+                      last_buffer = 1;
-+              }
-+              if (__try_to_free_cp_buf(jh)) {
-+                      (*drop_count)++;
-+                      ret = last_buffer;
-+              }
-+      }
-+      return ret;
-+}
-+
-+      
-+/*
-+ * Perform an actual checkpoint.  We don't write out only enough to
-+ * satisfy the current blocked requests: rather we submit a reasonably
-+ * sized chunk of the outstanding data to disk at once for
-+ * efficiency.  log_wait_for_space() will retry if we didn't free enough.
-+ * 
-+ * However, we _do_ take into account the amount requested so that once
-+ * the IO has been queued, we can return as soon as enough of it has
-+ * completed to disk.  
-+ *
-+ * The journal should be locked before calling this function.
-+ */
-+
-+/* @@@ `nblocks' is unused.  Should it be used? */
-+int log_do_checkpoint (journal_t *journal, int nblocks)
-+{
-+      transaction_t *transaction, *last_transaction, *next_transaction;
-+      int result;
-+      int target;
-+      int batch_count = 0;
-+      struct buffer_head *bhs[NR_BATCH];
-+
-+      jbd_debug(1, "Start checkpoint\n");
-+
-+      /* 
-+       * First thing: if there are any transactions in the log which
-+       * don't need checkpointing, just eliminate them from the
-+       * journal straight away.  
-+       */
-+      result = cleanup_journal_tail(journal);
-+      jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
-+      if (result <= 0)
-+              return result;
-+
-+      /*
-+       * OK, we need to start writing disk blocks.  Try to free up a
-+       * quarter of the log in a single checkpoint if we can.
-+       */
-+      /*
-+       * AKPM: check this code.  I had a feeling a while back that it
-+       * degenerates into a busy loop at unmount time.
-+       */
-+      target = (journal->j_last - journal->j_first) / 4;
-+
-+      spin_lock(&journal_datalist_lock);
-+repeat:
-+      transaction = journal->j_checkpoint_transactions;
-+      if (transaction == NULL)
-+              goto done;
-+      last_transaction = transaction->t_cpprev;
-+      next_transaction = transaction;
-+
-+      do {
-+              struct journal_head *jh, *last_jh, *next_jh;
-+              int drop_count = 0;
-+              int cleanup_ret, retry = 0;
-+
-+              transaction = next_transaction;
-+              next_transaction = transaction->t_cpnext;
-+              jh = transaction->t_checkpoint_list;
-+              last_jh = jh->b_cpprev;
-+              next_jh = jh;
-+              do {
-+                      jh = next_jh;
-+                      next_jh = jh->b_cpnext;
-+                      retry = __flush_buffer(journal, jh, bhs, &batch_count,
-+                                              &drop_count);
-+              } while (jh != last_jh && !retry);
-+              if (batch_count) {
-+                      __flush_batch(bhs, &batch_count);
-+                      goto repeat;
-+              }
-+              if (retry)
-+                      goto repeat;
-+              /*
-+               * We have walked the whole transaction list without
-+               * finding anything to write to disk.  We had better be
-+               * able to make some progress or we are in trouble. 
-+               */
-+              cleanup_ret = __cleanup_transaction(journal, transaction);
-+              J_ASSERT(drop_count != 0 || cleanup_ret != 0);
-+              goto repeat;    /* __cleanup may have dropped lock */
-+      } while (transaction != last_transaction);
-+
-+done:
-+      spin_unlock(&journal_datalist_lock);
-+      result = cleanup_journal_tail(journal);
-+      if (result < 0)
-+              return result;
-+      
-+      return 0;
-+}
-+
-+/*
-+ * Check the list of checkpoint transactions for the journal to see if
-+ * we have already got rid of any since the last update of the log tail
-+ * in the journal superblock.  If so, we can instantly roll the
-+ * superblock forward to remove those transactions from the log.
-+ * 
-+ * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
-+ * 
-+ * Called with the journal lock held.
-+ *
-+ * This is the only part of the journaling code which really needs to be
-+ * aware of transaction aborts.  Checkpointing involves writing to the
-+ * main filesystem area rather than to the journal, so it can proceed
-+ * even in abort state, but we must not update the journal superblock if
-+ * we have an abort error outstanding.
-+ */
-+
-+int cleanup_journal_tail(journal_t *journal)
-+{
-+      transaction_t * transaction;
-+      tid_t           first_tid;
-+      unsigned long   blocknr, freed;
-+
-+      /* OK, work out the oldest transaction remaining in the log, and
-+       * the log block it starts at. 
-+       * 
-+       * If the log is now empty, we need to work out which is the
-+       * next transaction ID we will write, and where it will
-+       * start. */
-+
-+      /* j_checkpoint_transactions needs locking */
-+      spin_lock(&journal_datalist_lock);
-+      transaction = journal->j_checkpoint_transactions;
-+      if (transaction) {
-+              first_tid = transaction->t_tid;
-+              blocknr = transaction->t_log_start;
-+      } else if ((transaction = journal->j_committing_transaction) != NULL) {
-+              first_tid = transaction->t_tid;
-+              blocknr = transaction->t_log_start;
-+      } else if ((transaction = journal->j_running_transaction) != NULL) {
-+              first_tid = transaction->t_tid;
-+              blocknr = journal->j_head;
-+      } else {
-+              first_tid = journal->j_transaction_sequence;
-+              blocknr = journal->j_head;
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+      J_ASSERT (blocknr != 0);
-+
-+      /* If the oldest pinned transaction is at the tail of the log
-+           already then there's not much we can do right now. */
-+      if (journal->j_tail_sequence == first_tid)
-+              return 1;
-+
-+      /* OK, update the superblock to recover the freed space.
-+       * Physical blocks come first: have we wrapped beyond the end of
-+       * the log?  */
-+      freed = blocknr - journal->j_tail;
-+      if (blocknr < journal->j_tail)
-+              freed = freed + journal->j_last - journal->j_first;
-+
-+      jbd_debug(1,
-+                "Cleaning journal tail from %d to %d (offset %lu), "
-+                "freeing %lu\n",
-+                journal->j_tail_sequence, first_tid, blocknr, freed);
-+
-+      journal->j_free += freed;
-+      journal->j_tail_sequence = first_tid;
-+      journal->j_tail = blocknr;
-+      if (!(journal->j_flags & JFS_ABORT))
-+              journal_update_superblock(journal, 1);
-+      return 0;
-+}
-+
-+
-+/* Checkpoint list management */
-+
-+/*
-+ * journal_clean_checkpoint_list
-+ *
-+ * Find all the written-back checkpoint buffers in the journal and release them.
-+ *
-+ * Called with the journal locked.
-+ * Called with journal_datalist_lock held.
-+ * Returns number of bufers reaped (for debug)
-+ */
-+
-+int __journal_clean_checkpoint_list(journal_t *journal)
-+{
-+      transaction_t *transaction, *last_transaction, *next_transaction;
-+      int ret = 0;
-+
-+      transaction = journal->j_checkpoint_transactions;
-+      if (transaction == 0)
-+              goto out;
-+
-+      last_transaction = transaction->t_cpprev;
-+      next_transaction = transaction;
-+      do {
-+              struct journal_head *jh;
-+
-+              transaction = next_transaction;
-+              next_transaction = transaction->t_cpnext;
-+              jh = transaction->t_checkpoint_list;
-+              if (jh) {
-+                      struct journal_head *last_jh = jh->b_cpprev;
-+                      struct journal_head *next_jh = jh;
-+                      do {
-+                              jh = next_jh;
-+                              next_jh = jh->b_cpnext;
-+                              ret += __try_to_free_cp_buf(jh);
-+                      } while (jh != last_jh);
-+              }
-+      } while (transaction != last_transaction);
-+out:
-+      return ret;
-+}
-+
-+/* 
-+ * journal_remove_checkpoint: called after a buffer has been committed
-+ * to disk (either by being write-back flushed to disk, or being
-+ * committed to the log).
-+ *
-+ * We cannot safely clean a transaction out of the log until all of the
-+ * buffer updates committed in that transaction have safely been stored
-+ * elsewhere on disk.  To achieve this, all of the buffers in a
-+ * transaction need to be maintained on the transaction's checkpoint
-+ * list until they have been rewritten, at which point this function is
-+ * called to remove the buffer from the existing transaction's
-+ * checkpoint list.  
-+ *
-+ * This function is called with the journal locked.
-+ * This function is called with journal_datalist_lock held.
-+ */
-+
-+void __journal_remove_checkpoint(struct journal_head *jh)
-+{
-+      transaction_t *transaction;
-+      journal_t *journal;
-+
-+      JBUFFER_TRACE(jh, "entry");
-+      
-+      if ((transaction = jh->b_cp_transaction) == NULL) {
-+              JBUFFER_TRACE(jh, "not on transaction");
-+              goto out;
-+      }
-+
-+      journal = transaction->t_journal;
-+
-+      __buffer_unlink(jh);
-+
-+      if (transaction->t_checkpoint_list != NULL)
-+              goto out;
-+      JBUFFER_TRACE(jh, "transaction has no more buffers");
-+
-+      /* There is one special case to worry about: if we have just
-+           pulled the buffer off a committing transaction's forget list,
-+           then even if the checkpoint list is empty, the transaction
-+           obviously cannot be dropped! */
-+
-+      if (transaction == journal->j_committing_transaction) {
-+              JBUFFER_TRACE(jh, "belongs to committing transaction");
-+              goto out;
-+      }
-+
-+      /* OK, that was the last buffer for the transaction: we can now
-+         safely remove this transaction from the log */
-+
-+      __journal_drop_transaction(journal, transaction);
-+
-+      /* Just in case anybody was waiting for more transactions to be
-+           checkpointed... */
-+      wake_up(&journal->j_wait_logspace);
-+out:
-+      JBUFFER_TRACE(jh, "exit");
-+}
-+
-+void journal_remove_checkpoint(struct journal_head *jh)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      __journal_remove_checkpoint(jh);
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+/*
-+ * journal_insert_checkpoint: put a committed buffer onto a checkpoint
-+ * list so that we know when it is safe to clean the transaction out of
-+ * the log.
-+ *
-+ * Called with the journal locked.
-+ * Called with journal_datalist_lock held.
-+ */
-+void __journal_insert_checkpoint(struct journal_head *jh, 
-+                             transaction_t *transaction)
-+{
-+      JBUFFER_TRACE(jh, "entry");
-+      J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jdirty(jh2bh(jh)));
-+      J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+      jh->b_cp_transaction = transaction;
-+
-+      if (!transaction->t_checkpoint_list) {
-+              jh->b_cpnext = jh->b_cpprev = jh;
-+      } else {
-+              jh->b_cpnext = transaction->t_checkpoint_list;
-+              jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
-+              jh->b_cpprev->b_cpnext = jh;
-+              jh->b_cpnext->b_cpprev = jh;
-+      }
-+      transaction->t_checkpoint_list = jh;
-+}
-+
-+void journal_insert_checkpoint(struct journal_head *jh, 
-+                             transaction_t *transaction)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      __journal_insert_checkpoint(jh, transaction);
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+/*
-+ * We've finished with this transaction structure: adios...
-+ * 
-+ * The transaction must have no links except for the checkpoint by this
-+ * point.
-+ *
-+ * Called with the journal locked.
-+ * Called with journal_datalist_lock held.
-+ */
-+
-+void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
-+{
-+      assert_spin_locked(&journal_datalist_lock);
-+      if (transaction->t_cpnext) {
-+              transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
-+              transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
-+              if (journal->j_checkpoint_transactions == transaction)
-+                      journal->j_checkpoint_transactions =
-+                              transaction->t_cpnext;
-+              if (journal->j_checkpoint_transactions == transaction)
-+                      journal->j_checkpoint_transactions = NULL;
-+      }
-+
-+      J_ASSERT (transaction->t_ilist == NULL);
-+      J_ASSERT (transaction->t_buffers == NULL);
-+      J_ASSERT (transaction->t_sync_datalist == NULL);
-+      J_ASSERT (transaction->t_async_datalist == NULL);
-+      J_ASSERT (transaction->t_forget == NULL);
-+      J_ASSERT (transaction->t_iobuf_list == NULL);
-+      J_ASSERT (transaction->t_shadow_list == NULL);
-+      J_ASSERT (transaction->t_log_list == NULL);
-+      J_ASSERT (transaction->t_checkpoint_list == NULL);
-+      J_ASSERT (transaction->t_updates == 0);
-+      
-+      J_ASSERT (transaction->t_journal->j_committing_transaction !=
-+                                      transaction);
-+      
-+      jbd_debug (1, "Dropping transaction %d, all done\n", 
-+                 transaction->t_tid);
-+      kfree (transaction);
-+}
-+
-diff -ruP linux.mcp2/fs/jbd/commit.c linuxppc_2.4.19_final/fs/jbd/commit.c
---- linux.mcp2/fs/jbd/commit.c 1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/commit.c      2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,719 @@
-+/*
-+ * linux/fs/commit.c
-+ *
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
-+ *
-+ * Copyright 1998 Red Hat corp --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Journal commit routines for the generic filesystem journaling code;
-+ * part of the ext2fs journaling system.
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+#include <linux/smp_lock.h>
-+
-+extern spinlock_t journal_datalist_lock;
-+
-+/*
-+ * Default IO end handler for temporary BJ_IO buffer_heads.
-+ */
-+void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
-+{
-+      BUFFER_TRACE(bh, "");
-+      mark_buffer_uptodate(bh, uptodate);
-+      unlock_buffer(bh);
-+}
-+
-+/*
-+ * journal_commit_transaction
-+ *
-+ * The primary function for committing a transaction to the log.  This
-+ * function is called by the journal thread to begin a complete commit.
-+ */
-+void journal_commit_transaction(journal_t *journal)
-+{
-+      transaction_t *commit_transaction;
-+      struct journal_head *jh, *new_jh, *descriptor;
-+      struct journal_head *next_jh, *last_jh;
-+      struct buffer_head *wbuf[64];
-+      int bufs;
-+      int flags;
-+      int err;
-+      unsigned long blocknr;
-+      char *tagp = NULL;
-+      journal_header_t *header;
-+      journal_block_tag_t *tag = NULL;
-+      int space_left = 0;
-+      int first_tag = 0;
-+      int tag_flag;
-+      int i;
-+
-+      /*
-+       * First job: lock down the current transaction and wait for
-+       * all outstanding updates to complete.
-+       */
-+
-+      lock_journal(journal); /* Protect journal->j_running_transaction */
-+
-+#ifdef COMMIT_STATS
-+      spin_lock(&journal_datalist_lock);
-+      summarise_journal_usage(journal);
-+      spin_unlock(&journal_datalist_lock);
-+#endif
-+
-+      lock_kernel();
-+      
-+      J_ASSERT (journal->j_running_transaction != NULL);
-+      J_ASSERT (journal->j_committing_transaction == NULL);
-+
-+      commit_transaction = journal->j_running_transaction;
-+      J_ASSERT (commit_transaction->t_state == T_RUNNING);
-+
-+      jbd_debug (1, "JBD: starting commit of transaction %d\n",
-+                 commit_transaction->t_tid);
-+
-+      commit_transaction->t_state = T_LOCKED;
-+      while (commit_transaction->t_updates != 0) {
-+              unlock_journal(journal);
-+              sleep_on(&journal->j_wait_updates);
-+              lock_journal(journal);
-+      }
-+
-+      J_ASSERT (commit_transaction->t_outstanding_credits <=
-+                      journal->j_max_transaction_buffers);
-+
-+      /* Do we need to erase the effects of a prior journal_flush? */
-+      if (journal->j_flags & JFS_FLUSHED) {
-+              jbd_debug(3, "super block updated\n");
-+              journal_update_superblock(journal, 1);
-+      } else {
-+              jbd_debug(3, "superblock not updated\n");
-+      }
-+
-+      /*
-+       * First thing we are allowed to do is to discard any remaining
-+       * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
-+       * that there are no such buffers: if a large filesystem
-+       * operation like a truncate needs to split itself over multiple
-+       * transactions, then it may try to do a journal_restart() while
-+       * there are still BJ_Reserved buffers outstanding.  These must
-+       * be released cleanly from the current transaction.
-+       *
-+       * In this case, the filesystem must still reserve write access
-+       * again before modifying the buffer in the new transaction, but
-+       * we do not require it to remember exactly which old buffers it
-+       * has reserved.  This is consistent with the existing behaviour
-+       * that multiple journal_get_write_access() calls to the same
-+       * buffer are perfectly permissable.
-+       */
-+
-+      while (commit_transaction->t_reserved_list) {
-+              jh = commit_transaction->t_reserved_list;
-+              JBUFFER_TRACE(jh, "reserved, unused: refile");
-+              journal_refile_buffer(jh);
-+      }
-+
-+      /*
-+       * Now try to drop any written-back buffers from the journal's
-+       * checkpoint lists.  We do this *before* commit because it potentially
-+       * frees some memory
-+       */
-+      spin_lock(&journal_datalist_lock);
-+      __journal_clean_checkpoint_list(journal);
-+      spin_unlock(&journal_datalist_lock);
-+
-+      /* First part of the commit: force the revoke list out to disk.
-+       * The revoke code generates its own metadata blocks on disk for this.
-+       *
-+       * It is important that we do this while the transaction is
-+       * still locked.  Generating the revoke records should not
-+       * generate any IO stalls, so this should be quick; and doing
-+       * the work while we have the transaction locked means that we
-+       * only ever have to maintain the revoke list for one
-+       * transaction at a time.
-+       */
-+
-+      jbd_debug (3, "JBD: commit phase 1\n");
-+
-+      journal_write_revoke_records(journal, commit_transaction);
-+
-+      /*
-+       * Now that we have built the revoke records, we can start
-+       * reusing the revoke list for a new running transaction.  We
-+       * can now safely start committing the old transaction: time to
-+       * get a new running transaction for incoming filesystem updates
-+       */
-+
-+      commit_transaction->t_state = T_FLUSH;
-+
-+      wake_up(&journal->j_wait_transaction_locked);
-+
-+      journal->j_committing_transaction = commit_transaction;
-+      journal->j_running_transaction = NULL;
-+
-+      commit_transaction->t_log_start = journal->j_head;
-+
-+      unlock_kernel();
-+      
-+      jbd_debug (3, "JBD: commit phase 2\n");
-+
-+      /*
-+       * Now start flushing things to disk, in the order they appear
-+       * on the transaction lists.  Data blocks go first.
-+       */
-+
-+      /*
-+       * Whenever we unlock the journal and sleep, things can get added
-+       * onto ->t_datalist, so we have to keep looping back to write_out_data
-+       * until we *know* that the list is empty.
-+       */
-+write_out_data:
-+
-+      /*
-+       * Cleanup any flushed data buffers from the data list.  Even in
-+       * abort mode, we want to flush this out as soon as possible.
-+       *
-+       * We take journal_datalist_lock to protect the lists from
-+       * journal_try_to_free_buffers().
-+       */
-+      spin_lock(&journal_datalist_lock);
-+
-+write_out_data_locked:
-+      bufs = 0;
-+      next_jh = commit_transaction->t_sync_datalist;
-+      if (next_jh == NULL)
-+              goto sync_datalist_empty;
-+      last_jh = next_jh->b_tprev;
-+
-+      do {
-+              struct buffer_head *bh;
-+
-+              jh = next_jh;
-+              next_jh = jh->b_tnext;
-+              bh = jh2bh(jh);
-+              if (!buffer_locked(bh)) {
-+                      if (buffer_dirty(bh)) {
-+                              BUFFER_TRACE(bh, "start journal writeout");
-+                              atomic_inc(&bh->b_count);
-+                              wbuf[bufs++] = bh;
-+                      } else {
-+                              BUFFER_TRACE(bh, "writeout complete: unfile");
-+                              __journal_unfile_buffer(jh);
-+                              jh->b_transaction = NULL;
-+                              __journal_remove_journal_head(bh);
-+                              refile_buffer(bh);
-+                              __brelse(bh);
-+                      }
-+              }
-+              if (bufs == ARRAY_SIZE(wbuf)) {
-+                      /*
-+                       * Major speedup: start here on the next scan
-+                       */
-+                      J_ASSERT(commit_transaction->t_sync_datalist != 0);
-+                      commit_transaction->t_sync_datalist = jh;
-+                      break;
-+              }
-+      } while (jh != last_jh);
-+
-+      if (bufs || current->need_resched) {
-+              jbd_debug(2, "submit %d writes\n", bufs);
-+              spin_unlock(&journal_datalist_lock);
-+              unlock_journal(journal);
-+              if (bufs)
-+                      ll_rw_block(WRITE, bufs, wbuf);
-+              if (current->need_resched)
-+                      schedule();
-+              journal_brelse_array(wbuf, bufs);
-+              lock_journal(journal);
-+              spin_lock(&journal_datalist_lock);
-+              if (bufs)
-+                      goto write_out_data_locked;
-+      }
-+
-+      /*
-+       * Wait for all previously submitted IO on the data list to complete.
-+       */
-+      jh = commit_transaction->t_sync_datalist;
-+      if (jh == NULL)
-+              goto sync_datalist_empty;
-+
-+      do {
-+              struct buffer_head *bh;
-+              jh = jh->b_tprev;       /* Wait on the last written */
-+              bh = jh2bh(jh);
-+              if (buffer_locked(bh)) {
-+                      spin_unlock(&journal_datalist_lock);
-+                      unlock_journal(journal);
-+                      wait_on_buffer(bh);
-+                      /* the journal_head may have been removed now */
-+                      lock_journal(journal);
-+                      goto write_out_data;
-+              } else if (buffer_dirty(bh)) {
-+                      goto write_out_data_locked;
-+              }
-+      } while (jh != commit_transaction->t_sync_datalist);
-+      goto write_out_data_locked;
-+
-+sync_datalist_empty:
-+      /*
-+       * Wait for all the async writepage data.  As they become unlocked
-+       * in end_buffer_io_async(), the only place where they can be
-+       * reaped is in try_to_free_buffers(), and we're locked against
-+       * that.
-+       */
-+      while ((jh = commit_transaction->t_async_datalist)) {
-+              struct buffer_head *bh = jh2bh(jh);
-+              if (buffer_locked(bh)) {
-+                      spin_unlock(&journal_datalist_lock);
-+                      unlock_journal(journal);
-+                      wait_on_buffer(bh);
-+                      lock_journal(journal);
-+                      spin_lock(&journal_datalist_lock);
-+                      continue;       /* List may have changed */
-+              }
-+              if (jh->b_next_transaction) {
-+                      /*
-+                       * For writepage() buffers in journalled data mode: a
-+                       * later transaction may want the buffer for "metadata"
-+                       */
-+                      __journal_refile_buffer(jh);
-+              } else {
-+                      BUFFER_TRACE(bh, "finished async writeout: unfile");
-+                      __journal_unfile_buffer(jh);
-+                      jh->b_transaction = NULL;
-+                      __journal_remove_journal_head(bh);
-+                      BUFFER_TRACE(bh, "finished async writeout: refile");
-+                      /* It can sometimes be on BUF_LOCKED due to migration
-+                       * from syncdata to asyncdata */
-+                      if (bh->b_list != BUF_CLEAN)
-+                              refile_buffer(bh);
-+                      __brelse(bh);
-+              }
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+
-+      /*
-+       * If we found any dirty or locked buffers, then we should have
-+       * looped back up to the write_out_data label.  If there weren't
-+       * any then journal_clean_data_list should have wiped the list
-+       * clean by now, so check that it is in fact empty.
-+       */
-+      J_ASSERT (commit_transaction->t_sync_datalist == NULL);
-+      J_ASSERT (commit_transaction->t_async_datalist == NULL);
-+
-+      jbd_debug (3, "JBD: commit phase 3\n");
-+
-+      /*
-+       * Way to go: we have now written out all of the data for a
-+       * transaction!  Now comes the tricky part: we need to write out
-+       * metadata.  Loop over the transaction's entire buffer list:
-+       */
-+      commit_transaction->t_state = T_COMMIT;
-+
-+      descriptor = 0;
-+      bufs = 0;
-+      while (commit_transaction->t_buffers) {
-+
-+              /* Find the next buffer to be journaled... */
-+
-+              jh = commit_transaction->t_buffers;
-+
-+              /* If we're in abort mode, we just un-journal the buffer and
-+                 release it for background writing. */
-+
-+              if (is_journal_aborted(journal)) {
-+                      JBUFFER_TRACE(jh, "journal is aborting: refile");
-+                      journal_refile_buffer(jh);
-+                      /* If that was the last one, we need to clean up
-+                       * any descriptor buffers which may have been
-+                       * already allocated, even if we are now
-+                       * aborting. */
-+                      if (!commit_transaction->t_buffers)
-+                              goto start_journal_io;
-+                      continue;
-+              }
-+
-+              /* Make sure we have a descriptor block in which to
-+                 record the metadata buffer. */
-+
-+              if (!descriptor) {
-+                      struct buffer_head *bh;
-+
-+                      J_ASSERT (bufs == 0);
-+
-+                      jbd_debug(4, "JBD: get descriptor\n");
-+
-+                      descriptor = journal_get_descriptor_buffer(journal);
-+                      if (!descriptor) {
-+                              __journal_abort_hard(journal);
-+                              continue;
-+                      }
-+                      
-+                      bh = jh2bh(descriptor);
-+                      jbd_debug(4, "JBD: got buffer %ld (%p)\n",
-+                              bh->b_blocknr, bh->b_data);
-+                      header = (journal_header_t *)&bh->b_data[0];
-+                      header->h_magic     = htonl(JFS_MAGIC_NUMBER);
-+                      header->h_blocktype = htonl(JFS_DESCRIPTOR_BLOCK);
-+                      header->h_sequence  = htonl(commit_transaction->t_tid);
-+
-+                      tagp = &bh->b_data[sizeof(journal_header_t)];
-+                      space_left = bh->b_size - sizeof(journal_header_t);
-+                      first_tag = 1;
-+                      set_bit(BH_JWrite, &bh->b_state);
-+                      wbuf[bufs++] = bh;
-+
-+                      /* Record it so that we can wait for IO
-+                           completion later */
-+                      BUFFER_TRACE(bh, "ph3: file as descriptor");
-+                      journal_file_buffer(descriptor, commit_transaction,
-+                                              BJ_LogCtl);
-+              }
-+
-+              /* Where is the buffer to be written? */
-+
-+              err = journal_next_log_block(journal, &blocknr);
-+              /* If the block mapping failed, just abandon the buffer
-+                 and repeat this loop: we'll fall into the
-+                 refile-on-abort condition above. */
-+              if (err) {
-+                      __journal_abort_hard(journal);
-+                      continue;
-+              }
-+
-+              /* Bump b_count to prevent truncate from stumbling over
-+                   the shadowed buffer!  @@@ This can go if we ever get
-+                   rid of the BJ_IO/BJ_Shadow pairing of buffers. */
-+              atomic_inc(&jh2bh(jh)->b_count);
-+
-+              /* Make a temporary IO buffer with which to write it out
-+                   (this will requeue both the metadata buffer and the
-+                   temporary IO buffer). new_bh goes on BJ_IO*/
-+
-+              set_bit(BH_JWrite, &jh2bh(jh)->b_state);
-+              /*
-+               * akpm: journal_write_metadata_buffer() sets
-+               * new_bh->b_transaction to commit_transaction.
-+               * We need to clean this up before we release new_bh
-+               * (which is of type BJ_IO)
-+               */
-+              JBUFFER_TRACE(jh, "ph3: write metadata");
-+              flags = journal_write_metadata_buffer(commit_transaction,
-+                                                    jh, &new_jh, blocknr);
-+              set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
-+              set_bit(BH_Lock, &jh2bh(new_jh)->b_state);
-+              wbuf[bufs++] = jh2bh(new_jh);
-+
-+              /* Record the new block's tag in the current descriptor
-+                   buffer */
-+
-+              tag_flag = 0;
-+              if (flags & 1)
-+                      tag_flag |= JFS_FLAG_ESCAPE;
-+              if (!first_tag)
-+                      tag_flag |= JFS_FLAG_SAME_UUID;
-+
-+              tag = (journal_block_tag_t *) tagp;
-+              tag->t_blocknr = htonl(jh2bh(jh)->b_blocknr);
-+              tag->t_flags = htonl(tag_flag);
-+              tagp += sizeof(journal_block_tag_t);
-+              space_left -= sizeof(journal_block_tag_t);
-+
-+              if (first_tag) {
-+                      memcpy (tagp, journal->j_uuid, 16);
-+                      tagp += 16;
-+                      space_left -= 16;
-+                      first_tag = 0;
-+              }
-+
-+              /* If there's no more to do, or if the descriptor is full,
-+                 let the IO rip! */
-+
-+              if (bufs == ARRAY_SIZE(wbuf) ||
-+                  commit_transaction->t_buffers == NULL ||
-+                  space_left < sizeof(journal_block_tag_t) + 16) {
-+
-+                      jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
-+
-+                      /* Write an end-of-descriptor marker before
-+                           submitting the IOs.  "tag" still points to
-+                           the last tag we set up. */
-+
-+                      tag->t_flags |= htonl(JFS_FLAG_LAST_TAG);
-+
-+start_journal_io:
-+                      unlock_journal(journal);
-+                      for (i=0; i<bufs; i++) {
-+                              struct buffer_head *bh = wbuf[i];
-+                              clear_bit(BH_Dirty, &bh->b_state);
-+                              bh->b_end_io = journal_end_buffer_io_sync;
-+                              submit_bh(WRITE, bh);
-+                      }
-+                      if (current->need_resched)
-+                              schedule();
-+                      lock_journal(journal);
-+
-+                      /* Force a new descriptor to be generated next
-+                           time round the loop. */
-+                      descriptor = NULL;
-+                      bufs = 0;
-+              }
-+      }
-+
-+      /* Lo and behold: we have just managed to send a transaction to
-+           the log.  Before we can commit it, wait for the IO so far to
-+           complete.  Control buffers being written are on the
-+           transaction's t_log_list queue, and metadata buffers are on
-+           the t_iobuf_list queue.
-+
-+         Wait for the transactions in reverse order.  That way we are
-+         less likely to be woken up until all IOs have completed, and
-+         so we incur less scheduling load.
-+      */
-+
-+      jbd_debug(3, "JBD: commit phase 4\n");
-+
-+      /* akpm: these are BJ_IO, and journal_datalist_lock is not needed */
-+ wait_for_iobuf:
-+      while (commit_transaction->t_iobuf_list != NULL) {
-+              struct buffer_head *bh;
-+              jh = commit_transaction->t_iobuf_list->b_tprev;
-+              bh = jh2bh(jh);
-+              if (buffer_locked(bh)) {
-+                      unlock_journal(journal);
-+                      wait_on_buffer(bh);
-+                      lock_journal(journal);
-+                      goto wait_for_iobuf;
-+              }
-+
-+              clear_bit(BH_JWrite, &jh2bh(jh)->b_state);
-+
-+              JBUFFER_TRACE(jh, "ph4: unfile after journal write");
-+              journal_unfile_buffer(jh);
-+
-+              /*
-+               * akpm: don't put back a buffer_head with stale pointers
-+               * dangling around.
-+               */
-+              J_ASSERT_JH(jh, jh->b_transaction != NULL);
-+              jh->b_transaction = NULL;
-+
-+              /*
-+               * ->t_iobuf_list should contain only dummy buffer_heads
-+               * which were created by journal_write_metadata_buffer().
-+               */
-+              bh = jh2bh(jh);
-+              BUFFER_TRACE(bh, "dumping temporary bh");
-+              journal_unlock_journal_head(jh);
-+              __brelse(bh);
-+              J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
-+              put_unused_buffer_head(bh);
-+
-+              /* We also have to unlock and free the corresponding
-+                   shadowed buffer */
-+              jh = commit_transaction->t_shadow_list->b_tprev;
-+              bh = jh2bh(jh);
-+              clear_bit(BH_JWrite, &bh->b_state);
-+              J_ASSERT_BH(bh, buffer_jdirty(bh));
-+
-+              /* The metadata is now released for reuse, but we need
-+                   to remember it against this transaction so that when
-+                   we finally commit, we can do any checkpointing
-+                   required. */
-+              JBUFFER_TRACE(jh, "file as BJ_Forget");
-+              journal_file_buffer(jh, commit_transaction, BJ_Forget);
-+              /* Wake up any transactions which were waiting for this
-+                 IO to complete */
-+              wake_up(&bh->b_wait);
-+              JBUFFER_TRACE(jh, "brelse shadowed buffer");
-+              __brelse(bh);
-+      }
-+
-+      J_ASSERT (commit_transaction->t_shadow_list == NULL);
-+
-+      jbd_debug(3, "JBD: commit phase 5\n");
-+
-+      /* Here we wait for the revoke record and descriptor record buffers */
-+ wait_for_ctlbuf:
-+      while (commit_transaction->t_log_list != NULL) {
-+              struct buffer_head *bh;
-+
-+              jh = commit_transaction->t_log_list->b_tprev;
-+              bh = jh2bh(jh);
-+              if (buffer_locked(bh)) {
-+                      unlock_journal(journal);
-+                      wait_on_buffer(bh);
-+                      lock_journal(journal);
-+                      goto wait_for_ctlbuf;
-+              }
-+
-+              BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
-+              clear_bit(BH_JWrite, &bh->b_state);
-+              journal_unfile_buffer(jh);
-+              jh->b_transaction = NULL;
-+              journal_unlock_journal_head(jh);
-+              put_bh(bh);                     /* One for getblk */
-+      }
-+
-+      jbd_debug(3, "JBD: commit phase 6\n");
-+
-+      if (is_journal_aborted(journal))
-+              goto skip_commit;
-+
-+      /* Done it all: now write the commit record.  We should have
-+       * cleaned up our previous buffers by now, so if we are in abort
-+       * mode we can now just skip the rest of the journal write
-+       * entirely. */
-+
-+      descriptor = journal_get_descriptor_buffer(journal);
-+      if (!descriptor) {
-+              __journal_abort_hard(journal);
-+              goto skip_commit;
-+      }
-+      
-+      /* AKPM: buglet - add `i' to tmp! */
-+      for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
-+              journal_header_t *tmp =
-+                      (journal_header_t*)jh2bh(descriptor)->b_data;
-+              tmp->h_magic = htonl(JFS_MAGIC_NUMBER);
-+              tmp->h_blocktype = htonl(JFS_COMMIT_BLOCK);
-+              tmp->h_sequence = htonl(commit_transaction->t_tid);
-+      }
-+
-+      unlock_journal(journal);
-+      JBUFFER_TRACE(descriptor, "write commit block");
-+      {
-+              struct buffer_head *bh = jh2bh(descriptor);
-+              clear_bit(BH_Dirty, &bh->b_state);
-+              bh->b_end_io = journal_end_buffer_io_sync;
-+              submit_bh(WRITE, bh);
-+              wait_on_buffer(bh);
-+              put_bh(bh);             /* One for getblk() */
-+              journal_unlock_journal_head(descriptor);
-+      }
-+      lock_journal(journal);
-+
-+      /* End of a transaction!  Finally, we can do checkpoint
-+           processing: any buffers committed as a result of this
-+           transaction can be removed from any checkpoint list it was on
-+           before. */
-+
-+skip_commit:
-+
-+      jbd_debug(3, "JBD: commit phase 7\n");
-+
-+      J_ASSERT(commit_transaction->t_sync_datalist == NULL);
-+      J_ASSERT(commit_transaction->t_async_datalist == NULL);
-+      J_ASSERT(commit_transaction->t_buffers == NULL);
-+      J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
-+      J_ASSERT(commit_transaction->t_iobuf_list == NULL);
-+      J_ASSERT(commit_transaction->t_shadow_list == NULL);
-+      J_ASSERT(commit_transaction->t_log_list == NULL);
-+
-+      while (commit_transaction->t_forget) {
-+              transaction_t *cp_transaction;
-+              struct buffer_head *bh;
-+
-+              jh = commit_transaction->t_forget;
-+              J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
-+                      jh->b_transaction == journal->j_running_transaction);
-+
-+              /*
-+               * If there is undo-protected committed data against
-+               * this buffer, then we can remove it now.  If it is a
-+               * buffer needing such protection, the old frozen_data
-+               * field now points to a committed version of the
-+               * buffer, so rotate that field to the new committed
-+               * data.
-+               *
-+               * Otherwise, we can just throw away the frozen data now.
-+               */
-+              if (jh->b_committed_data) {
-+                      kfree(jh->b_committed_data);
-+                      jh->b_committed_data = NULL;
-+                      if (jh->b_frozen_data) {
-+                              jh->b_committed_data = jh->b_frozen_data;
-+                              jh->b_frozen_data = NULL;
-+                      }
-+              } else if (jh->b_frozen_data) {
-+                      kfree(jh->b_frozen_data);
-+                      jh->b_frozen_data = NULL;
-+              }
-+
-+              spin_lock(&journal_datalist_lock);
-+              cp_transaction = jh->b_cp_transaction;
-+              if (cp_transaction) {
-+                      JBUFFER_TRACE(jh, "remove from old cp transaction");
-+                      J_ASSERT_JH(jh, commit_transaction != cp_transaction);
-+                      __journal_remove_checkpoint(jh);
-+              }
-+
-+              /* Only re-checkpoint the buffer_head if it is marked
-+               * dirty.  If the buffer was added to the BJ_Forget list
-+               * by journal_forget, it may no longer be dirty and
-+               * there's no point in keeping a checkpoint record for
-+               * it. */
-+              bh = jh2bh(jh);
-+              if (buffer_jdirty(bh)) {
-+                      JBUFFER_TRACE(jh, "add to new checkpointing trans");
-+                      __journal_insert_checkpoint(jh, commit_transaction);
-+                      JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-+                      __journal_refile_buffer(jh);
-+              } else {
-+                      J_ASSERT_BH(bh, !buffer_dirty(bh));
-+                      J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+                      __journal_unfile_buffer(jh);
-+                      jh->b_transaction = 0;
-+                      __journal_remove_journal_head(bh);
-+                      __brelse(bh);
-+              }
-+              spin_unlock(&journal_datalist_lock);
-+      }
-+
-+      /* Done with this transaction! */
-+
-+      jbd_debug(3, "JBD: commit phase 8\n");
-+
-+      J_ASSERT (commit_transaction->t_state == T_COMMIT);
-+      commit_transaction->t_state = T_FINISHED;
-+
-+      J_ASSERT (commit_transaction == journal->j_committing_transaction);
-+      journal->j_commit_sequence = commit_transaction->t_tid;
-+      journal->j_committing_transaction = NULL;
-+
-+      spin_lock(&journal_datalist_lock);
-+      if (commit_transaction->t_checkpoint_list == NULL) {
-+              __journal_drop_transaction(journal, commit_transaction);
-+      } else {
-+              if (journal->j_checkpoint_transactions == NULL) {
-+                      journal->j_checkpoint_transactions = commit_transaction;
-+                      commit_transaction->t_cpnext = commit_transaction;
-+                      commit_transaction->t_cpprev = commit_transaction;
-+              } else {
-+                      commit_transaction->t_cpnext =
-+                              journal->j_checkpoint_transactions;
-+                      commit_transaction->t_cpprev =
-+                              commit_transaction->t_cpnext->t_cpprev;
-+                      commit_transaction->t_cpnext->t_cpprev =
-+                              commit_transaction;
-+                      commit_transaction->t_cpprev->t_cpnext =
-+                              commit_transaction;
-+              }
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+
-+      jbd_debug(1, "JBD: commit %d complete, head %d\n",
-+                journal->j_commit_sequence, journal->j_tail_sequence);
-+
-+      unlock_journal(journal);
-+      wake_up(&journal->j_wait_done_commit);
-+}
-diff -ruP linux.mcp2/fs/jbd/journal.c linuxppc_2.4.19_final/fs/jbd/journal.c
---- linux.mcp2/fs/jbd/journal.c        1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/journal.c     2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,1877 @@
-+/*
-+ * linux/fs/journal.c
-+ *
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
-+ *
-+ * Copyright 1998 Red Hat corp --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Generic filesystem journal-writing code; part of the ext2fs
-+ * journaling system.
-+ *
-+ * This file manages journals: areas of disk reserved for logging
-+ * transactional updates.  This includes the kernel journaling thread
-+ * which is responsible for scheduling updates to the log.
-+ *
-+ * We do not actually manage the physical storage of the journal in this
-+ * file: that is left to a per-journal policy function, which allows us
-+ * to store the journal within a filesystem-specified area for ext2
-+ * journaling (ext2 can use a reserved inode for storing the log).
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+#include <linux/smp_lock.h>
-+#include <linux/sched.h>
-+#include <linux/init.h>
-+#include <linux/mm.h>
-+#include <linux/slab.h>
-+#include <asm/uaccess.h>
-+#include <linux/proc_fs.h>
-+
-+EXPORT_SYMBOL(journal_start);
-+EXPORT_SYMBOL(journal_try_start);
-+EXPORT_SYMBOL(journal_restart);
-+EXPORT_SYMBOL(journal_extend);
-+EXPORT_SYMBOL(journal_stop);
-+EXPORT_SYMBOL(journal_lock_updates);
-+EXPORT_SYMBOL(journal_unlock_updates);
-+EXPORT_SYMBOL(journal_get_write_access);
-+EXPORT_SYMBOL(journal_get_create_access);
-+EXPORT_SYMBOL(journal_get_undo_access);
-+EXPORT_SYMBOL(journal_dirty_data);
-+EXPORT_SYMBOL(journal_dirty_metadata);
-+#if 0
-+EXPORT_SYMBOL(journal_release_buffer);
-+#endif
-+EXPORT_SYMBOL(journal_forget);
-+#if 0
-+EXPORT_SYMBOL(journal_sync_buffer);
-+#endif
-+EXPORT_SYMBOL(journal_flush);
-+EXPORT_SYMBOL(journal_revoke);
-+
-+EXPORT_SYMBOL(journal_init_dev);
-+EXPORT_SYMBOL(journal_init_inode);
-+EXPORT_SYMBOL(journal_update_format);
-+EXPORT_SYMBOL(journal_check_used_features);
-+EXPORT_SYMBOL(journal_check_available_features);
-+EXPORT_SYMBOL(journal_set_features);
-+EXPORT_SYMBOL(journal_create);
-+EXPORT_SYMBOL(journal_load);
-+EXPORT_SYMBOL(journal_destroy);
-+EXPORT_SYMBOL(journal_recover);
-+EXPORT_SYMBOL(journal_update_superblock);
-+EXPORT_SYMBOL(journal_abort);
-+EXPORT_SYMBOL(journal_errno);
-+EXPORT_SYMBOL(journal_ack_err);
-+EXPORT_SYMBOL(journal_clear_err);
-+EXPORT_SYMBOL(log_wait_commit);
-+EXPORT_SYMBOL(log_start_commit);
-+EXPORT_SYMBOL(journal_wipe);
-+EXPORT_SYMBOL(journal_blocks_per_page);
-+EXPORT_SYMBOL(journal_flushpage);
-+EXPORT_SYMBOL(journal_try_to_free_buffers);
-+EXPORT_SYMBOL(journal_bmap);
-+EXPORT_SYMBOL(journal_force_commit);
-+
-+static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
-+
-+/*
-+ * journal_datalist_lock is used to protect data buffers:
-+ *
-+ *    bh->b_transaction
-+ *    bh->b_tprev
-+ *    bh->b_tnext
-+ *
-+ * journal_free_buffer() is called from journal_try_to_free_buffer(), and is
-+ * async wrt everything else.
-+ *
-+ * It is also used for checkpoint data, also to protect against
-+ * journal_try_to_free_buffer():
-+ *
-+ *    bh->b_cp_transaction
-+ *    bh->b_cpnext
-+ *    bh->b_cpprev
-+ *    transaction->t_checkpoint_list
-+ *    transaction->t_cpnext
-+ *    transaction->t_cpprev
-+ *    journal->j_checkpoint_transactions
-+ *
-+ * It is global at this time rather than per-journal because it's
-+ * impossible for __journal_free_buffer to go from a buffer_head
-+ * back to a journal_t unracily (well, not true.  Fix later)
-+ *
-+ *
-+ * The `datalist' and `checkpoint list' functions are quite
-+ * separate and we could use two spinlocks here.
-+ *
-+ * lru_list_lock nests inside journal_datalist_lock.
-+ */
-+spinlock_t journal_datalist_lock = SPIN_LOCK_UNLOCKED;
-+
-+/*
-+ * jh_splice_lock needs explantion.
-+ *
-+ * In a number of places we want to do things like:
-+ *
-+ *    if (buffer_jbd(bh) && bh2jh(bh)->foo)
-+ *
-+ * This is racy on SMP, because another CPU could remove the journal_head
-+ * in the middle of this expression.  We need locking.
-+ *
-+ * But we can greatly optimise the locking cost by testing BH_JBD
-+ * outside the lock.  So, effectively:
-+ *
-+ *    ret = 0;
-+ *    if (buffer_jbd(bh)) {
-+ *            spin_lock(&jh_splice_lock);
-+ *            if (buffer_jbd(bh)) {    (* Still there? *)
-+ *                    ret = bh2jh(bh)->foo;
-+ *            }
-+ *            spin_unlock(&jh_splice_lock);
-+ *    }
-+ *    return ret;
-+ *
-+ * Now, that protects us from races where another CPU can remove the
-+ * journal_head.  But it doesn't defend us from the situation where another
-+ * CPU can *add* a journal_head.  This is a correctness issue.  But it's not
-+ * a problem because a) the calling code was *already* racy and b) it often
-+ * can't happen at the call site and c) the places where we add journal_heads
-+ * tend to be under external locking.
-+ */
-+spinlock_t jh_splice_lock = SPIN_LOCK_UNLOCKED;
-+
-+/*
-+ * List of all journals in the system.  Protected by the BKL.
-+ */
-+static LIST_HEAD(all_journals);
-+
-+/*
-+ * Helper function used to manage commit timeouts
-+ */
-+
-+static void commit_timeout(unsigned long __data)
-+{
-+      struct task_struct * p = (struct task_struct *) __data;
-+
-+      wake_up_process(p);
-+}
-+
-+/* Static check for data structure consistency.  There's no code
-+ * invoked --- we'll just get a linker failure if things aren't right.
-+ */
-+void __journal_internal_check(void)
-+{
-+      extern void journal_bad_superblock_size(void);
-+      if (sizeof(struct journal_superblock_s) != 1024)
-+              journal_bad_superblock_size();
-+}
-+
-+/*
-+ * kjournald: The main thread function used to manage a logging device
-+ * journal.
-+ *
-+ * This kernel thread is responsible for two things:
-+ *
-+ * 1) COMMIT:  Every so often we need to commit the current state of the
-+ *    filesystem to disk.  The journal thread is responsible for writing
-+ *    all of the metadata buffers to disk.
-+ *
-+ * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
-+ *    of the data in that part of the log has been rewritten elsewhere on
-+ *    the disk.  Flushing these old buffers to reclaim space in the log is
-+ *    known as checkpointing, and this thread is responsible for that job.
-+ */
-+
-+journal_t *current_journal;           // AKPM: debug
-+
-+int kjournald(void *arg)
-+{
-+      journal_t *journal = (journal_t *) arg;
-+      transaction_t *transaction;
-+      struct timer_list timer;
-+
-+      current_journal = journal;
-+
-+      lock_kernel();
-+      daemonize();
-+      reparent_to_init();
-+      spin_lock_irq(&current->sigmask_lock);
-+      sigfillset(&current->blocked);
-+      recalc_sigpending(current);
-+      spin_unlock_irq(&current->sigmask_lock);
-+
-+      sprintf(current->comm, "kjournald");
-+
-+      /* Set up an interval timer which can be used to trigger a
-+           commit wakeup after the commit interval expires */
-+      init_timer(&timer);
-+      timer.data = (unsigned long) current;
-+      timer.function = commit_timeout;
-+      journal->j_commit_timer = &timer;
-+
-+      /* Record that the journal thread is running */
-+      journal->j_task = current;
-+      wake_up(&journal->j_wait_done_commit);
-+
-+      printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
-+                      journal->j_commit_interval / HZ);
-+      list_add(&journal->j_all_journals, &all_journals);
-+
-+      /* And now, wait forever for commit wakeup events. */
-+      while (1) {
-+              if (journal->j_flags & JFS_UNMOUNT)
-+                      break;
-+
-+              jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
-+                      journal->j_commit_sequence, journal->j_commit_request);
-+
-+              if (journal->j_commit_sequence != journal->j_commit_request) {
-+                      jbd_debug(1, "OK, requests differ\n");
-+                      if (journal->j_commit_timer_active) {
-+                              journal->j_commit_timer_active = 0;
-+                              del_timer(journal->j_commit_timer);
-+                      }
-+
-+                      journal_commit_transaction(journal);
-+                      continue;
-+              }
-+
-+              wake_up(&journal->j_wait_done_commit);
-+              interruptible_sleep_on(&journal->j_wait_commit);
-+
-+              jbd_debug(1, "kjournald wakes\n");
-+
-+              /* Were we woken up by a commit wakeup event? */
-+              if ((transaction = journal->j_running_transaction) != NULL &&
-+                  time_after_eq(jiffies, transaction->t_expires)) {
-+                      journal->j_commit_request = transaction->t_tid;
-+                      jbd_debug(1, "woke because of timeout\n");
-+              }
-+      }
-+
-+      if (journal->j_commit_timer_active) {
-+              journal->j_commit_timer_active = 0;
-+              del_timer_sync(journal->j_commit_timer);
-+      }
-+
-+      list_del(&journal->j_all_journals);
-+
-+      journal->j_task = NULL;
-+      wake_up(&journal->j_wait_done_commit);
-+      unlock_kernel();
-+      jbd_debug(1, "Journal thread exiting.\n");
-+      return 0;
-+}
-+
-+static void journal_start_thread(journal_t *journal)
-+{
-+      kernel_thread(kjournald, (void *) journal,
-+                    CLONE_VM | CLONE_FS | CLONE_FILES);
-+      while (!journal->j_task)
-+              sleep_on(&journal->j_wait_done_commit);
-+}
-+
-+static void journal_kill_thread(journal_t *journal)
-+{
-+      journal->j_flags |= JFS_UNMOUNT;
-+
-+      while (journal->j_task) {
-+              wake_up(&journal->j_wait_commit);
-+              sleep_on(&journal->j_wait_done_commit);
-+      }
-+}
-+
-+#if 0
-+
-+This is no longer needed - we do it in commit quite efficiently.
-+Note that if this function is resurrected, the loop needs to
-+be reorganised into the next_jh/last_jh algorithm.
-+
-+/*
-+ * journal_clean_data_list: cleanup after data IO.
-+ *
-+ * Once the IO system has finished writing the buffers on the transaction's
-+ * data list, we can remove those buffers from the list.  This function
-+ * scans the list for such buffers and removes them cleanly.
-+ *
-+ * We assume that the journal is already locked.
-+ * We are called with journal_datalist_lock held.
-+ *
-+ * AKPM: This function looks inefficient.  Approximately O(n^2)
-+ * for potentially thousands of buffers.  It no longer shows on profiles
-+ * because these buffers are mainly dropped in journal_commit_transaction().
-+ */
-+
-+void __journal_clean_data_list(transaction_t *transaction)
-+{
-+      struct journal_head *jh, *next;
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+
-+restart:
-+      jh = transaction->t_sync_datalist;
-+      if (!jh)
-+              goto out;
-+      do {
-+              next = jh->b_tnext;
-+              if (!buffer_locked(jh2bh(jh)) && !buffer_dirty(jh2bh(jh))) {
-+                      struct buffer_head *bh = jh2bh(jh);
-+                      BUFFER_TRACE(bh, "data writeout complete: unfile");
-+                      __journal_unfile_buffer(jh);
-+                      jh->b_transaction = NULL;
-+                      __journal_remove_journal_head(bh);
-+                      refile_buffer(bh);
-+                      __brelse(bh);
-+                      goto restart;
-+              }
-+              jh = next;
-+      } while (transaction->t_sync_datalist &&
-+                      jh != transaction->t_sync_datalist);
-+out:
-+      return;
-+}
-+#endif
-+
-+/*
-+ * journal_write_metadata_buffer: write a metadata buffer to the journal.
-+ *
-+ * Writes a metadata buffer to a given disk block.  The actual IO is not
-+ * performed but a new buffer_head is constructed which labels the data
-+ * to be written with the correct destination disk block.
-+ *
-+ * Any magic-number escaping which needs to be done will cause a
-+ * copy-out here.  If the buffer happens to start with the
-+ * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
-+ * magic number is only written to the log for descripter blocks.  In
-+ * this case, we copy the data and replace the first word with 0, and we
-+ * return a result code which indicates that this buffer needs to be
-+ * marked as an escaped buffer in the corresponding log descriptor
-+ * block.  The missing word can then be restored when the block is read
-+ * during recovery.
-+ *
-+ * If the source buffer has already been modified by a new transaction
-+ * since we took the last commit snapshot, we use the frozen copy of
-+ * that data for IO.  If we end up using the existing buffer_head's data
-+ * for the write, then we *have* to lock the buffer to prevent anyone
-+ * else from using and possibly modifying it while the IO is in
-+ * progress.
-+ *
-+ * The function returns a pointer to the buffer_heads to be used for IO.
-+ *
-+ * We assume that the journal has already been locked in this function.
-+ *
-+ * Return value:
-+ *  <0: Error
-+ * >=0: Finished OK
-+ *
-+ * On success:
-+ * Bit 0 set == escape performed on the data
-+ * Bit 1 set == buffer copy-out performed (kfree the data after IO)
-+ */
-+
-+static inline unsigned long virt_to_offset(void *p) 
-+{return ((unsigned long) p) & ~PAGE_MASK;}
-+                                             
-+int journal_write_metadata_buffer(transaction_t *transaction,
-+                                struct journal_head  *jh_in,
-+                                struct journal_head **jh_out,
-+                                int blocknr)
-+{
-+      int need_copy_out = 0;
-+      int done_copy_out = 0;
-+      int do_escape = 0;
-+      char *mapped_data;
-+      struct buffer_head *new_bh;
-+      struct journal_head * new_jh;
-+      struct page *new_page;
-+      unsigned int new_offset;
-+
-+      /*
-+       * The buffer really shouldn't be locked: only the current committing
-+       * transaction is allowed to write it, so nobody else is allowed
-+       * to do any IO.
-+       *
-+       * akpm: except if we're journalling data, and write() output is
-+       * also part of a shared mapping, and another thread has
-+       * decided to launch a writepage() against this buffer.
-+       */
-+      J_ASSERT_JH(jh_in, buffer_jdirty(jh2bh(jh_in)));
-+
-+      /*
-+       * If a new transaction has already done a buffer copy-out, then
-+       * we use that version of the data for the commit.
-+       */
-+
-+      if (jh_in->b_frozen_data) {
-+              done_copy_out = 1;
-+              new_page = virt_to_page(jh_in->b_frozen_data);
-+              new_offset = virt_to_offset(jh_in->b_frozen_data);
-+      } else {
-+              new_page = jh2bh(jh_in)->b_page;
-+              new_offset = virt_to_offset(jh2bh(jh_in)->b_data);
-+      }
-+
-+      mapped_data = ((char *) kmap(new_page)) + new_offset;
-+
-+      /*
-+       * Check for escaping
-+       */
-+      if (* ((unsigned int *) mapped_data) == htonl(JFS_MAGIC_NUMBER)) {
-+              need_copy_out = 1;
-+              do_escape = 1;
-+      }
-+
-+      /*
-+       * Do we need to do a data copy?
-+       */
-+
-+      if (need_copy_out && !done_copy_out) {
-+              char *tmp;
-+              tmp = jbd_rep_kmalloc(jh2bh(jh_in)->b_size, GFP_NOFS);
-+
-+              jh_in->b_frozen_data = tmp;
-+              memcpy (tmp, mapped_data, jh2bh(jh_in)->b_size);
-+              
-+              /* If we get to this path, we'll always need the new
-+                 address kmapped so that we can clear the escaped
-+                 magic number below. */
-+              kunmap(new_page);
-+              new_page = virt_to_page(tmp);
-+              new_offset = virt_to_offset(tmp);
-+              mapped_data = ((char *) kmap(new_page)) + new_offset;
-+              
-+              done_copy_out = 1;
-+      }
-+
-+      /*
-+       * Right, time to make up the new buffer_head.
-+       */
-+      do {
-+              new_bh = get_unused_buffer_head(0);
-+              if (!new_bh) {
-+                      printk (KERN_NOTICE __FUNCTION__
-+                              ": ENOMEM at get_unused_buffer_head, "
-+                              "trying again.\n");
-+                      current->policy |= SCHED_YIELD;
-+                      schedule();
-+              }
-+      } while (!new_bh);
-+      /* keep subsequent assertions sane */
-+      new_bh->b_prev_free = 0;
-+      new_bh->b_next_free = 0;
-+      new_bh->b_state = 0;
-+      init_buffer(new_bh, NULL, NULL);
-+      atomic_set(&new_bh->b_count, 1);
-+      new_jh = journal_add_journal_head(new_bh);
-+
-+      set_bh_page(new_bh, new_page, new_offset);
-+
-+      new_jh->b_transaction = NULL;
-+      new_bh->b_size = jh2bh(jh_in)->b_size;
-+      new_bh->b_dev = transaction->t_journal->j_dev;
-+      new_bh->b_blocknr = blocknr;
-+      new_bh->b_state |= (1 << BH_Mapped) | (1 << BH_Dirty);
-+
-+      *jh_out = new_jh;
-+
-+      /*
-+       * Did we need to do an escaping?  Now we've done all the
-+       * copying, we can finally do so.
-+       */
-+
-+      if (do_escape)
-+              * ((unsigned int *) mapped_data) = 0;
-+      kunmap(new_page);
-+      
-+      /*
-+       * The to-be-written buffer needs to get moved to the io queue,
-+       * and the original buffer whose contents we are shadowing or
-+       * copying is moved to the transaction's shadow queue.
-+       */
-+      JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-+      journal_file_buffer(jh_in, transaction, BJ_Shadow);
-+      JBUFFER_TRACE(new_jh, "file as BJ_IO");
-+      journal_file_buffer(new_jh, transaction, BJ_IO);
-+
-+      return do_escape | (done_copy_out << 1);
-+}
-+
-+/*
-+ * Allocation code for the journal file.  Manage the space left in the
-+ * journal, so that we can begin checkpointing when appropriate.
-+ */
-+
-+/*
-+ * log_space_left: Return the number of free blocks left in the journal.
-+ *
-+ * Called with the journal already locked.
-+ */
-+
-+int log_space_left (journal_t *journal)
-+{
-+      int left = journal->j_free;
-+
-+      /* Be pessimistic here about the number of those free blocks
-+       * which might be required for log descriptor control blocks. */
-+
-+#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
-+
-+      left -= MIN_LOG_RESERVED_BLOCKS;
-+
-+      if (left <= 0)
-+              return 0;
-+      left -= (left >> 3);
-+      return left;
-+}
-+
-+/*
-+ * This function must be non-allocating for PF_MEMALLOC tasks
-+ */
-+tid_t log_start_commit (journal_t *journal, transaction_t *transaction)
-+{
-+      tid_t target = journal->j_commit_request;
-+
-+      lock_kernel(); /* Protect journal->j_running_transaction */
-+      
-+      /*
-+       * A NULL transaction asks us to commit the currently running
-+       * transaction, if there is one.  
-+       */
-+      if (transaction)
-+              target = transaction->t_tid;
-+      else {
-+              transaction = journal->j_running_transaction;
-+              if (!transaction)
-+                      goto out;
-+              target = transaction->t_tid;
-+      }
-+              
-+      /*
-+       * Are we already doing a recent enough commit?
-+       */
-+      if (tid_geq(journal->j_commit_request, target))
-+              goto out;
-+
-+      /*
-+       * We want a new commit: OK, mark the request and wakup the
-+       * commit thread.  We do _not_ do the commit ourselves.
-+       */
-+
-+      journal->j_commit_request = target;
-+      jbd_debug(1, "JBD: requesting commit %d/%d\n",
-+                journal->j_commit_request,
-+                journal->j_commit_sequence);
-+      wake_up(&journal->j_wait_commit);
-+
-+out:
-+      unlock_kernel();
-+      return target;
-+}
-+
-+/*
-+ * Wait for a specified commit to complete.
-+ * The caller may not hold the journal lock.
-+ */
-+void log_wait_commit (journal_t *journal, tid_t tid)
-+{
-+      lock_kernel();
-+#ifdef CONFIG_JBD_DEBUG
-+      lock_journal(journal);
-+      if (!tid_geq(journal->j_commit_request, tid)) {
-+              printk(KERN_EMERG __FUNCTION__
-+                      ": error: j_commit_request=%d, tid=%d\n",
-+                      journal->j_commit_request, tid);
-+      }
-+      unlock_journal(journal);
-+#endif
-+      while (tid_gt(tid, journal->j_commit_sequence)) {
-+              jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
-+                                tid, journal->j_commit_sequence);
-+              wake_up(&journal->j_wait_commit);
-+              sleep_on(&journal->j_wait_done_commit);
-+      }
-+      unlock_kernel();
-+}
-+
-+/*
-+ * Log buffer allocation routines:
-+ */
-+
-+int journal_next_log_block(journal_t *journal, unsigned long *retp)
-+{
-+      unsigned long blocknr;
-+
-+      J_ASSERT(journal->j_free > 1);
-+
-+      blocknr = journal->j_head;
-+      journal->j_head++;
-+      journal->j_free--;
-+      if (journal->j_head == journal->j_last)
-+              journal->j_head = journal->j_first;
-+      return journal_bmap(journal, blocknr, retp);
-+}
-+
-+/*
-+ * Conversion of logical to physical block numbers for the journal
-+ *
-+ * On external journals the journal blocks are identity-mapped, so
-+ * this is a no-op.  If needed, we can use j_blk_offset - everything is
-+ * ready.
-+ */
-+int journal_bmap(journal_t *journal, unsigned long blocknr, 
-+               unsigned long *retp)
-+{
-+      int err = 0;
-+      unsigned long ret;
-+
-+      if (journal->j_inode) {
-+              ret = bmap(journal->j_inode, blocknr);
-+              if (ret)
-+                      *retp = ret;
-+              else {
-+                      printk (KERN_ALERT __FUNCTION__ 
-+                              ": journal block not found "
-+                              "at offset %lu on %s\n",
-+                              blocknr, bdevname(journal->j_dev));
-+                      err = -EIO;
-+                      __journal_abort_soft(journal, err);
-+              }
-+      } else {
-+              *retp = blocknr; /* +journal->j_blk_offset */
-+      }
-+      return err;
-+}
-+
-+/*
-+ * We play buffer_head aliasing tricks to write data/metadata blocks to
-+ * the journal without copying their contents, but for journal
-+ * descriptor blocks we do need to generate bona fide buffers.
-+ *
-+ * We return a jh whose bh is locked and ready to be populated.
-+ */
-+
-+struct journal_head * journal_get_descriptor_buffer(journal_t *journal)
-+{
-+      struct buffer_head *bh;
-+      unsigned long blocknr;
-+      int err;
-+
-+      err = journal_next_log_block(journal, &blocknr);
-+
-+      if (err)
-+              return NULL;
-+
-+      bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
-+      lock_buffer(bh);
-+      BUFFER_TRACE(bh, "return this buffer");
-+      return journal_add_journal_head(bh);
-+}
-+
-+/*
-+ * Management for journal control blocks: functions to create and
-+ * destroy journal_t structures, and to initialise and read existing
-+ * journal blocks from disk.  */
-+
-+/* First: create and setup a journal_t object in memory.  We initialise
-+ * very few fields yet: that has to wait until we have created the
-+ * journal structures from from scratch, or loaded them from disk. */
-+
-+static journal_t * journal_init_common (void)
-+{
-+      journal_t *journal;
-+      int err;
-+
-+      MOD_INC_USE_COUNT;
-+
-+      journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
-+      if (!journal)
-+              goto fail;
-+      memset(journal, 0, sizeof(*journal));
-+
-+      init_waitqueue_head(&journal->j_wait_transaction_locked);
-+      init_waitqueue_head(&journal->j_wait_logspace);
-+      init_waitqueue_head(&journal->j_wait_done_commit);
-+      init_waitqueue_head(&journal->j_wait_checkpoint);
-+      init_waitqueue_head(&journal->j_wait_commit);
-+      init_waitqueue_head(&journal->j_wait_updates);
-+      init_MUTEX(&journal->j_barrier);
-+      init_MUTEX(&journal->j_checkpoint_sem);
-+      init_MUTEX(&journal->j_sem);
-+
-+      journal->j_commit_interval = (HZ * 5);
-+
-+      /* The journal is marked for error until we succeed with recovery! */
-+      journal->j_flags = JFS_ABORT;
-+
-+      /* Set up a default-sized revoke table for the new mount. */
-+      err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
-+      if (err) {
-+              kfree(journal);
-+              goto fail;
-+      }
-+      return journal;
-+fail:
-+      MOD_DEC_USE_COUNT;
-+      return NULL;
-+}
-+
-+/* journal_init_dev and journal_init_inode:
-+ *
-+ * Create a journal structure assigned some fixed set of disk blocks to
-+ * the journal.  We don't actually touch those disk blocks yet, but we
-+ * need to set up all of the mapping information to tell the journaling
-+ * system where the journal blocks are.
-+ *
-+ * journal_init_dev creates a journal which maps a fixed contiguous
-+ * range of blocks on an arbitrary block device.
-+ *
-+ * journal_init_inode creates a journal which maps an on-disk inode as
-+ * the journal.  The inode must exist already, must support bmap() and
-+ * must have all data blocks preallocated.
-+ */
-+
-+journal_t * journal_init_dev(kdev_t dev, kdev_t fs_dev,
-+                      int start, int len, int blocksize)
-+{
-+      journal_t *journal = journal_init_common();
-+      struct buffer_head *bh;
-+
-+      if (!journal)
-+              return NULL;
-+
-+      journal->j_dev = dev;
-+      journal->j_fs_dev = fs_dev;
-+      journal->j_blk_offset = start;
-+      journal->j_maxlen = len;
-+      journal->j_blocksize = blocksize;
-+
-+      bh = getblk(journal->j_dev, start, journal->j_blocksize);
-+      J_ASSERT(bh != NULL);
-+      journal->j_sb_buffer = bh;
-+      journal->j_superblock = (journal_superblock_t *)bh->b_data;
-+
-+      return journal;
-+}
-+
-+journal_t * journal_init_inode (struct inode *inode)
-+{
-+      struct buffer_head *bh;
-+      journal_t *journal = journal_init_common();
-+      int err;
-+      unsigned long blocknr;
-+
-+      if (!journal)
-+              return NULL;
-+
-+      journal->j_dev = inode->i_dev;
-+      journal->j_fs_dev = inode->i_dev;
-+      journal->j_inode = inode;
-+      jbd_debug(1,
-+                "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
-+                journal, bdevname(inode->i_dev), inode->i_ino, 
-+                (long long) inode->i_size,
-+                inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
-+
-+      journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
-+      journal->j_blocksize = inode->i_sb->s_blocksize;
-+
-+      err = journal_bmap(journal, 0, &blocknr);
-+      /* If that failed, give up */
-+      if (err) {
-+              printk(KERN_ERR __FUNCTION__ ": Cannnot locate journal "
-+                     "superblock\n");
-+              kfree(journal);
-+              return NULL;
-+      }
-+      
-+      bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
-+      J_ASSERT(bh != NULL);
-+      journal->j_sb_buffer = bh;
-+      journal->j_superblock = (journal_superblock_t *)bh->b_data;
-+
-+      return journal;
-+}
-+
-+/* 
-+ * If the journal init or create aborts, we need to mark the journal
-+ * superblock as being NULL to prevent the journal destroy from writing
-+ * back a bogus superblock. 
-+ */
-+static void journal_fail_superblock (journal_t *journal)
-+{
-+      struct buffer_head *bh = journal->j_sb_buffer;
-+      brelse(bh);
-+      journal->j_sb_buffer = NULL;
-+}
-+
-+/*
-+ * Given a journal_t structure, initialise the various fields for
-+ * startup of a new journaling session.  We use this both when creating
-+ * a journal, and after recovering an old journal to reset it for
-+ * subsequent use.
-+ */
-+
-+static int journal_reset (journal_t *journal)
-+{
-+      journal_superblock_t *sb = journal->j_superblock;
-+      unsigned int first, last;
-+
-+      first = ntohl(sb->s_first);
-+      last = ntohl(sb->s_maxlen);
-+
-+      journal->j_first = first;
-+      journal->j_last = last;
-+
-+      journal->j_head = first;
-+      journal->j_tail = first;
-+      journal->j_free = last - first;
-+
-+      journal->j_tail_sequence = journal->j_transaction_sequence;
-+      journal->j_commit_sequence = journal->j_transaction_sequence - 1;
-+      journal->j_commit_request = journal->j_commit_sequence;
-+
-+      journal->j_max_transaction_buffers = journal->j_maxlen / 4;
-+
-+      /* Add the dynamic fields and write it to disk. */
-+      journal_update_superblock(journal, 1);
-+
-+      lock_journal(journal);
-+      journal_start_thread(journal);
-+      unlock_journal(journal);
-+
-+      return 0;
-+}
-+
-+/*
-+ * Given a journal_t structure which tells us which disk blocks we can
-+ * use, create a new journal superblock and initialise all of the
-+ * journal fields from scratch.  */
-+
-+int journal_create (journal_t *journal)
-+{
-+      unsigned long blocknr;
-+      struct buffer_head *bh;
-+      journal_superblock_t *sb;
-+      int i, err;
-+
-+      if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
-+              printk (KERN_ERR "Journal length (%d blocks) too short.\n",
-+                      journal->j_maxlen);
-+              journal_fail_superblock(journal);
-+              return -EINVAL;
-+      }
-+
-+      if (journal->j_inode == NULL) {
-+              /*
-+               * We don't know what block to start at!
-+               */
-+              printk(KERN_EMERG __FUNCTION__
-+                      ": creation of journal on external device!\n");
-+              BUG();
-+      }
-+
-+      /* Zero out the entire journal on disk.  We cannot afford to
-+         have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
-+      jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
-+      for (i = 0; i < journal->j_maxlen; i++) {
-+              err = journal_bmap(journal, i, &blocknr);
-+              if (err)
-+                      return err;
-+              bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
-+              wait_on_buffer(bh);
-+              memset (bh->b_data, 0, journal->j_blocksize);
-+              BUFFER_TRACE(bh, "marking dirty");
-+              mark_buffer_dirty(bh);
-+              BUFFER_TRACE(bh, "marking uptodate");
-+              mark_buffer_uptodate(bh, 1);
-+              __brelse(bh);
-+      }
-+
-+      sync_dev(journal->j_dev);
-+      jbd_debug(1, "JBD: journal cleared.\n");
-+
-+      /* OK, fill in the initial static fields in the new superblock */
-+      sb = journal->j_superblock;
-+
-+      sb->s_header.h_magic     = htonl(JFS_MAGIC_NUMBER);
-+      sb->s_header.h_blocktype = htonl(JFS_SUPERBLOCK_V2);
-+
-+      sb->s_blocksize = htonl(journal->j_blocksize);
-+      sb->s_maxlen    = htonl(journal->j_maxlen);
-+      sb->s_first     = htonl(1);
-+
-+      journal->j_transaction_sequence = 1;
-+
-+      journal->j_flags &= ~JFS_ABORT;
-+      journal->j_format_version = 2;
-+
-+      return journal_reset(journal);
-+}
-+
-+/*
-+ * Update a journal's dynamic superblock fields and write it to disk,
-+ * optionally waiting for the IO to complete.
-+*/
-+
-+void journal_update_superblock(journal_t *journal, int wait)
-+{
-+      journal_superblock_t *sb = journal->j_superblock;
-+      struct buffer_head *bh = journal->j_sb_buffer;
-+
-+      jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
-+                journal->j_tail, journal->j_tail_sequence, journal->j_errno);
-+
-+      sb->s_sequence = htonl(journal->j_tail_sequence);
-+      sb->s_start    = htonl(journal->j_tail);
-+      sb->s_errno    = htonl(journal->j_errno);
-+
-+      BUFFER_TRACE(bh, "marking dirty");
-+      mark_buffer_dirty(bh);
-+      ll_rw_block(WRITE, 1, &bh);
-+      if (wait)
-+              wait_on_buffer(bh);
-+
-+      /* If we have just flushed the log (by marking s_start==0), then
-+       * any future commit will have to be careful to update the
-+       * superblock again to re-record the true start of the log. */
-+
-+      if (sb->s_start)
-+              journal->j_flags &= ~JFS_FLUSHED;
-+      else
-+              journal->j_flags |= JFS_FLUSHED;
-+}
-+
-+
-+/*
-+ * Read the superblock for a given journal, performing initial
-+ * validation of the format.
-+ */
-+
-+static int journal_get_superblock(journal_t *journal)
-+{
-+      struct buffer_head *bh;
-+      journal_superblock_t *sb;
-+      int err = -EIO;
-+      
-+      bh = journal->j_sb_buffer;
-+
-+      J_ASSERT(bh != NULL);
-+      if (!buffer_uptodate(bh)) {
-+              ll_rw_block(READ, 1, &bh);
-+              wait_on_buffer(bh);
-+              if (!buffer_uptodate(bh)) {
-+                      printk (KERN_ERR
-+                              "JBD: IO error reading journal superblock\n");
-+                      goto out;
-+              }
-+      }
-+
-+      sb = journal->j_superblock;
-+
-+      err = -EINVAL;
-+      
-+      if (sb->s_header.h_magic != htonl(JFS_MAGIC_NUMBER) ||
-+          sb->s_blocksize != htonl(journal->j_blocksize)) {
-+              printk(KERN_WARNING "JBD: no valid journal superblock found\n");
-+              goto out;
-+      }
-+
-+      switch(ntohl(sb->s_header.h_blocktype)) {
-+      case JFS_SUPERBLOCK_V1:
-+              journal->j_format_version = 1;
-+              break;
-+      case JFS_SUPERBLOCK_V2:
-+              journal->j_format_version = 2;
-+              break;
-+      default:
-+              printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
-+              goto out;
-+      }
-+
-+      if (ntohl(sb->s_maxlen) < journal->j_maxlen)
-+              journal->j_maxlen = ntohl(sb->s_maxlen);
-+      else if (ntohl(sb->s_maxlen) > journal->j_maxlen) {
-+              printk (KERN_WARNING "JBD: journal file too short\n");
-+              goto out;
-+      }
-+
-+      return 0;
-+
-+out:
-+      journal_fail_superblock(journal);
-+      return err;
-+}
-+
-+/*
-+ * Load the on-disk journal superblock and read the key fields into the
-+ * journal_t.
-+ */
-+
-+static int load_superblock(journal_t *journal)
-+{
-+      int err;
-+      journal_superblock_t *sb;
-+
-+      err = journal_get_superblock(journal);
-+      if (err)
-+              return err;
-+
-+      sb = journal->j_superblock;
-+
-+      journal->j_tail_sequence = ntohl(sb->s_sequence);
-+      journal->j_tail = ntohl(sb->s_start);
-+      journal->j_first = ntohl(sb->s_first);
-+      journal->j_last = ntohl(sb->s_maxlen);
-+      journal->j_errno = ntohl(sb->s_errno);
-+
-+      return 0;
-+}
-+
-+
-+/*
-+ * Given a journal_t structure which tells us which disk blocks contain
-+ * a journal, read the journal from disk to initialise the in-memory
-+ * structures.
-+ */
-+
-+int journal_load(journal_t *journal)
-+{
-+      int err;
-+
-+      err = load_superblock(journal);
-+      if (err)
-+              return err;
-+
-+      /* If this is a V2 superblock, then we have to check the
-+       * features flags on it. */
-+
-+      if (journal->j_format_version >= 2) {
-+              journal_superblock_t *sb = journal->j_superblock;
-+
-+              if ((sb->s_feature_ro_compat &
-+                   ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
-+                  (sb->s_feature_incompat &
-+                   ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
-+                      printk (KERN_WARNING
-+                              "JBD: Unrecognised features on journal\n");
-+                      return -EINVAL;
-+              }
-+      }
-+
-+      /* Let the recovery code check whether it needs to recover any
-+       * data from the journal. */
-+      if (journal_recover(journal))
-+              goto recovery_error;
-+
-+      /* OK, we've finished with the dynamic journal bits:
-+       * reinitialise the dynamic contents of the superblock in memory
-+       * and reset them on disk. */
-+      if (journal_reset(journal))
-+              goto recovery_error;
-+
-+      journal->j_flags &= ~JFS_ABORT;
-+      journal->j_flags |= JFS_LOADED;
-+      return 0;
-+
-+recovery_error:
-+      printk (KERN_WARNING "JBD: recovery failed\n");
-+      return -EIO;
-+}
-+
-+/*
-+ * Release a journal_t structure once it is no longer in use by the
-+ * journaled object.
-+ */
-+
-+void journal_destroy (journal_t *journal)
-+{
-+      /* Wait for the commit thread to wake up and die. */
-+      journal_kill_thread(journal);
-+
-+      /* Force a final log commit */
-+      if (journal->j_running_transaction)
-+              journal_commit_transaction(journal);
-+
-+      /* Force any old transactions to disk */
-+      lock_journal(journal);
-+      while (journal->j_checkpoint_transactions != NULL)
-+              log_do_checkpoint(journal, 1);
-+
-+      J_ASSERT(journal->j_running_transaction == NULL);
-+      J_ASSERT(journal->j_committing_transaction == NULL);
-+      J_ASSERT(journal->j_checkpoint_transactions == NULL);
-+
-+      /* We can now mark the journal as empty. */
-+      journal->j_tail = 0;
-+      journal->j_tail_sequence = ++journal->j_transaction_sequence;
-+      if (journal->j_sb_buffer) {
-+              journal_update_superblock(journal, 1);
-+              brelse(journal->j_sb_buffer);
-+      }
-+
-+      if (journal->j_inode)
-+              iput(journal->j_inode);
-+      if (journal->j_revoke)
-+              journal_destroy_revoke(journal);
-+
-+      unlock_journal(journal);
-+      kfree(journal);
-+      MOD_DEC_USE_COUNT;
-+}
-+
-+
-+/* Published API: Check whether the journal uses all of a given set of
-+ * features.  Return true (non-zero) if it does. */
-+
-+int journal_check_used_features (journal_t *journal, unsigned long compat,
-+                               unsigned long ro, unsigned long incompat)
-+{
-+      journal_superblock_t *sb;
-+
-+      if (!compat && !ro && !incompat)
-+              return 1;
-+      if (journal->j_format_version == 1)
-+              return 0;
-+
-+      sb = journal->j_superblock;
-+
-+      if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
-+          ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
-+          ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
-+              return 1;
-+
-+      return 0;
-+}
-+
-+/* Published API: Check whether the journaling code supports the use of
-+ * all of a given set of features on this journal.  Return true
-+ * (non-zero) if it can. */
-+
-+int journal_check_available_features (journal_t *journal, unsigned long compat,
-+                                    unsigned long ro, unsigned long incompat)
-+{
-+      journal_superblock_t *sb;
-+
-+      if (!compat && !ro && !incompat)
-+              return 1;
-+
-+      sb = journal->j_superblock;
-+
-+      /* We can support any known requested features iff the
-+       * superblock is in version 2.  Otherwise we fail to support any
-+       * extended sb features. */
-+
-+      if (journal->j_format_version != 2)
-+              return 0;
-+
-+      if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
-+          (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
-+          (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
-+              return 1;
-+
-+      return 0;
-+}
-+
-+/* Published API: Mark a given journal feature as present on the
-+ * superblock.  Returns true if the requested features could be set. */
-+
-+int journal_set_features (journal_t *journal, unsigned long compat,
-+                        unsigned long ro, unsigned long incompat)
-+{
-+      journal_superblock_t *sb;
-+
-+      if (journal_check_used_features(journal, compat, ro, incompat))
-+              return 1;
-+
-+      if (!journal_check_available_features(journal, compat, ro, incompat))
-+              return 0;
-+
-+      jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
-+                compat, ro, incompat);
-+
-+      sb = journal->j_superblock;
-+
-+      sb->s_feature_compat    |= cpu_to_be32(compat);
-+      sb->s_feature_ro_compat |= cpu_to_be32(ro);
-+      sb->s_feature_incompat  |= cpu_to_be32(incompat);
-+
-+      return 1;
-+}
-+
-+
-+/*
-+ * Published API:
-+ * Given an initialised but unloaded journal struct, poke about in the
-+ * on-disk structure to update it to the most recent supported version.
-+ */
-+
-+int journal_update_format (journal_t *journal)
-+{
-+      journal_superblock_t *sb;
-+      int err;
-+
-+      err = journal_get_superblock(journal);
-+      if (err)
-+              return err;
-+
-+      sb = journal->j_superblock;
-+
-+      switch (ntohl(sb->s_header.h_blocktype)) {
-+      case JFS_SUPERBLOCK_V2:
-+              return 0;
-+      case JFS_SUPERBLOCK_V1:
-+              return journal_convert_superblock_v1(journal, sb);
-+      default:
-+              break;
-+      }
-+      return -EINVAL;
-+}
-+
-+static int journal_convert_superblock_v1(journal_t *journal,
-+                                       journal_superblock_t *sb)
-+{
-+      int offset, blocksize;
-+      struct buffer_head *bh;
-+
-+      printk(KERN_WARNING
-+              "JBD: Converting superblock from version 1 to 2.\n");
-+
-+      /* Pre-initialise new fields to zero */
-+      offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
-+      blocksize = ntohl(sb->s_blocksize);
-+      memset(&sb->s_feature_compat, 0, blocksize-offset);
-+
-+      sb->s_nr_users = cpu_to_be32(1);
-+      sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
-+      journal->j_format_version = 2;
-+
-+      bh = journal->j_sb_buffer;
-+      BUFFER_TRACE(bh, "marking dirty");
-+      mark_buffer_dirty(bh);
-+      ll_rw_block(WRITE, 1, &bh);
-+      wait_on_buffer(bh);
-+      return 0;
-+}
-+
-+
-+/*
-+ * Flush all data for a given journal to disk and empty the journal.
-+ * Filesystems can use this when remounting readonly to ensure that
-+ * recovery does not need to happen on remount.
-+ */
-+
-+int journal_flush (journal_t *journal)
-+{
-+      int err = 0;
-+      transaction_t *transaction = NULL;
-+      unsigned long old_tail;
-+
-+      lock_kernel();
-+      
-+      /* Force everything buffered to the log... */
-+      if (journal->j_running_transaction) {
-+              transaction = journal->j_running_transaction;
-+              log_start_commit(journal, transaction);
-+      } else if (journal->j_committing_transaction)
-+              transaction = journal->j_committing_transaction;
-+
-+      /* Wait for the log commit to complete... */
-+      if (transaction)
-+              log_wait_commit(journal, transaction->t_tid);
-+
-+      /* ...and flush everything in the log out to disk. */
-+      lock_journal(journal);
-+      while (!err && journal->j_checkpoint_transactions != NULL)
-+              err = log_do_checkpoint(journal, journal->j_maxlen);
-+      cleanup_journal_tail(journal);
-+
-+      /* Finally, mark the journal as really needing no recovery.
-+       * This sets s_start==0 in the underlying superblock, which is
-+       * the magic code for a fully-recovered superblock.  Any future
-+       * commits of data to the journal will restore the current
-+       * s_start value. */
-+      old_tail = journal->j_tail;
-+      journal->j_tail = 0;
-+      journal_update_superblock(journal, 1);
-+      journal->j_tail = old_tail;
-+
-+      unlock_journal(journal);
-+
-+      J_ASSERT(!journal->j_running_transaction);
-+      J_ASSERT(!journal->j_committing_transaction);
-+      J_ASSERT(!journal->j_checkpoint_transactions);
-+      J_ASSERT(journal->j_head == journal->j_tail);
-+      J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
-+
-+      unlock_kernel();
-+      
-+      return err;
-+}
-+
-+/*
-+ * Wipe out all of the contents of a journal, safely.  This will produce
-+ * a warning if the journal contains any valid recovery information.
-+ * Must be called between journal_init_*() and journal_load().
-+ *
-+ * If (write) is non-zero, then we wipe out the journal on disk; otherwise
-+ * we merely suppress recovery.
-+ */
-+
-+int journal_wipe (journal_t *journal, int write)
-+{
-+      journal_superblock_t *sb;
-+      int err = 0;
-+
-+      J_ASSERT (!(journal->j_flags & JFS_LOADED));
-+
-+      err = load_superblock(journal);
-+      if (err)
-+              return err;
-+
-+      sb = journal->j_superblock;
-+
-+      if (!journal->j_tail)
-+              goto no_recovery;
-+
-+      printk (KERN_WARNING "JBD: %s recovery information on journal\n",
-+              write ? "Clearing" : "Ignoring");
-+
-+      err = journal_skip_recovery(journal);
-+      if (write)
-+              journal_update_superblock(journal, 1);
-+
-+ no_recovery:
-+      return err;
-+}
-+
-+/*
-+ * journal_dev_name: format a character string to describe on what
-+ * device this journal is present.
-+ */
-+
-+const char * journal_dev_name(journal_t *journal)
-+{
-+      kdev_t dev;
-+
-+      if (journal->j_inode)
-+              dev = journal->j_inode->i_dev;
-+      else
-+              dev = journal->j_dev;
-+
-+      return bdevname(dev);
-+}
-+
-+/*
-+ * journal_abort: perform a complete, immediate shutdown of the ENTIRE
-+ * journal (not of a single transaction).  This operation cannot be
-+ * undone without closing and reopening the journal.
-+ *
-+ * The journal_abort function is intended to support higher level error
-+ * recovery mechanisms such as the ext2/ext3 remount-readonly error
-+ * mode.
-+ *
-+ * Journal abort has very specific semantics.  Any existing dirty,
-+ * unjournaled buffers in the main filesystem will still be written to
-+ * disk by bdflush, but the journaling mechanism will be suspended
-+ * immediately and no further transaction commits will be honoured.
-+ *
-+ * Any dirty, journaled buffers will be written back to disk without
-+ * hitting the journal.  Atomicity cannot be guaranteed on an aborted
-+ * filesystem, but we _do_ attempt to leave as much data as possible
-+ * behind for fsck to use for cleanup.
-+ *
-+ * Any attempt to get a new transaction handle on a journal which is in
-+ * ABORT state will just result in an -EROFS error return.  A
-+ * journal_stop on an existing handle will return -EIO if we have
-+ * entered abort state during the update.
-+ *
-+ * Recursive transactions are not disturbed by journal abort until the
-+ * final journal_stop, which will receive the -EIO error.
-+ *
-+ * Finally, the journal_abort call allows the caller to supply an errno
-+ * which will be recored (if possible) in the journal superblock.  This
-+ * allows a client to record failure conditions in the middle of a
-+ * transaction without having to complete the transaction to record the
-+ * failure to disk.  ext3_error, for example, now uses this
-+ * functionality.
-+ *
-+ * Errors which originate from within the journaling layer will NOT
-+ * supply an errno; a null errno implies that absolutely no further
-+ * writes are done to the journal (unless there are any already in
-+ * progress).
-+ */
-+
-+/* Quick version for internal journal use (doesn't lock the journal).
-+ * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
-+ * and don't attempt to make any other journal updates. */
-+void __journal_abort_hard (journal_t *journal)
-+{
-+      transaction_t *transaction;
-+
-+      if (journal->j_flags & JFS_ABORT)
-+              return;
-+
-+      printk (KERN_ERR "Aborting journal on device %s.\n",
-+              journal_dev_name(journal));
-+
-+      journal->j_flags |= JFS_ABORT;
-+      transaction = journal->j_running_transaction;
-+      if (transaction)
-+              log_start_commit(journal, transaction);
-+}
-+
-+/* Soft abort: record the abort error status in the journal superblock,
-+ * but don't do any other IO. */
-+void __journal_abort_soft (journal_t *journal, int errno)
-+{
-+      if (journal->j_flags & JFS_ABORT)
-+              return;
-+
-+      if (!journal->j_errno)
-+              journal->j_errno = errno;
-+
-+      __journal_abort_hard(journal);
-+
-+      if (errno)
-+              journal_update_superblock(journal, 1);
-+}
-+
-+/* Full version for external use */
-+void journal_abort (journal_t *journal, int errno)
-+{
-+      lock_journal(journal);
-+      __journal_abort_soft(journal, errno);
-+      unlock_journal(journal);
-+}
-+
-+int journal_errno (journal_t *journal)
-+{
-+      int err;
-+
-+      lock_journal(journal);
-+      if (journal->j_flags & JFS_ABORT)
-+              err = -EROFS;
-+      else
-+              err = journal->j_errno;
-+      unlock_journal(journal);
-+      return err;
-+}
-+
-+int journal_clear_err (journal_t *journal)
-+{
-+      int err = 0;
-+
-+      lock_journal(journal);
-+      if (journal->j_flags & JFS_ABORT)
-+              err = -EROFS;
-+      else
-+              journal->j_errno = 0;
-+      unlock_journal(journal);
-+      return err;
-+}
-+
-+void journal_ack_err (journal_t *journal)
-+{
-+      lock_journal(journal);
-+      if (journal->j_errno)
-+              journal->j_flags |= JFS_ACK_ERR;
-+      unlock_journal(journal);
-+}
-+
-+int journal_blocks_per_page(struct inode *inode)
-+{
-+      return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-+}
-+
-+/*
-+ * shrink_journal_memory().
-+ * Called when we're under memory pressure.  Free up all the written-back
-+ * checkpointed metadata buffers.
-+ */
-+void shrink_journal_memory(void)
-+{
-+      struct list_head *list;
-+
-+      lock_kernel();
-+      list_for_each(list, &all_journals) {
-+              journal_t *journal =
-+                      list_entry(list, journal_t, j_all_journals);
-+              spin_lock(&journal_datalist_lock);
-+              __journal_clean_checkpoint_list(journal);
-+              spin_unlock(&journal_datalist_lock);
-+      }
-+      unlock_kernel();
-+}
-+
-+/*
-+ * Simple support for retying memory allocations.  Introduced to help to
-+ * debug different VM deadlock avoidance strategies. 
-+ */
-+/*
-+ * Simple support for retying memory allocations.  Introduced to help to
-+ * debug different VM deadlock avoidance strategies. 
-+ */
-+void * __jbd_kmalloc (char *where, size_t size, int flags, int retry)
-+{
-+      void *p;
-+      static unsigned long last_warning;
-+      
-+      while (1) {
-+              p = kmalloc(size, flags);
-+              if (p)
-+                      return p;
-+              if (!retry)
-+                      return NULL;
-+              /* Log every retry for debugging.  Also log them to the
-+               * syslog, but do rate-limiting on the non-debugging
-+               * messages. */
-+              jbd_debug(1, "ENOMEM in %s, retrying.\n", where);
-+
-+              if (time_after(jiffies, last_warning + 5*HZ)) {
-+                      printk(KERN_NOTICE
-+                             "ENOMEM in %s, retrying.\n", where);
-+                      last_warning = jiffies;
-+              }
-+              
-+              current->policy |= SCHED_YIELD;
-+              schedule();
-+      }
-+}
-+
-+/*
-+ * Journal_head storage management
-+ */
-+static kmem_cache_t *journal_head_cache;
-+#ifdef CONFIG_JBD_DEBUG
-+static atomic_t nr_journal_heads = ATOMIC_INIT(0);
-+#endif
-+
-+static int journal_init_journal_head_cache(void)
-+{
-+      int retval;
-+
-+      J_ASSERT(journal_head_cache == 0);
-+      journal_head_cache = kmem_cache_create("journal_head",
-+                              sizeof(struct journal_head),
-+                              0,              /* offset */
-+                              0,              /* flags */
-+                              NULL,           /* ctor */
-+                              NULL);          /* dtor */
-+      retval = 0;
-+      if (journal_head_cache == 0) {
-+              retval = -ENOMEM;
-+              printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
-+      }
-+      return retval;
-+}
-+
-+static void journal_destroy_journal_head_cache(void)
-+{
-+      J_ASSERT(journal_head_cache != NULL);
-+      kmem_cache_destroy(journal_head_cache);
-+      journal_head_cache = 0;
-+}
-+
-+/*
-+ * journal_head splicing and dicing
-+ */
-+static struct journal_head *journal_alloc_journal_head(void)
-+{
-+      struct journal_head *ret;
-+      static unsigned long last_warning;
-+
-+#ifdef CONFIG_JBD_DEBUG
-+      atomic_inc(&nr_journal_heads);
-+#endif
-+      ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
-+      if (ret == 0) {
-+              jbd_debug(1, "out of memory for journal_head\n");
-+              if (time_after(jiffies, last_warning + 5*HZ)) {
-+                      printk(KERN_NOTICE "ENOMEM in " __FUNCTION__
-+                             ", retrying.\n");
-+                      last_warning = jiffies;
-+              }
-+              while (ret == 0) {
-+                      current->policy |= SCHED_YIELD;
-+                      schedule();
-+                      ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
-+              }
-+      }
-+      return ret;
-+}
-+
-+static void journal_free_journal_head(struct journal_head *jh)
-+{
-+#ifdef CONFIG_JBD_DEBUG
-+      atomic_dec(&nr_journal_heads);
-+      memset(jh, 0x5b, sizeof(*jh));
-+#endif
-+      kmem_cache_free(journal_head_cache, jh);
-+}
-+
-+/*
-+ * A journal_head is attached to a buffer_head whenever JBD has an
-+ * interest in the buffer.
-+ *
-+ * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
-+ * is set.  This bit is tested in core kernel code where we need to take
-+ * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
-+ * there.
-+ *
-+ * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
-+ *
-+ * When a buffer has its BH_JBD bit set it is immune from being released by
-+ * core kernel code, mainly via ->b_count.
-+ *
-+ * A journal_head may be detached from its buffer_head when the journal_head's
-+ * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
-+ * Various places in JBD call journal_remove_journal_head() to indicate that the
-+ * journal_head can be dropped if needed.
-+ *
-+ * Various places in the kernel want to attach a journal_head to a buffer_head
-+ * _before_ attaching the journal_head to a transaction.  To protect the
-+ * journal_head in this situation, journal_add_journal_head elevates the
-+ * journal_head's b_jcount refcount by one.  The caller must call
-+ * journal_unlock_journal_head() to undo this.
-+ *
-+ * So the typical usage would be:
-+ *
-+ *    (Attach a journal_head if needed.  Increments b_jcount)
-+ *    struct journal_head *jh = journal_add_journal_head(bh);
-+ *    ...
-+ *    jh->b_transaction = xxx;
-+ *    journal_unlock_journal_head(jh);
-+ *
-+ * Now, the journal_head's b_jcount is zero, but it is safe from being released
-+ * because it has a non-zero b_transaction.
-+ */
-+
-+/*
-+ * Give a buffer_head a journal_head.
-+ *
-+ * Doesn't need the journal lock.
-+ * May sleep.
-+ * Cannot be called with journal_datalist_lock held.
-+ */
-+struct journal_head *journal_add_journal_head(struct buffer_head *bh)
-+{
-+      struct journal_head *jh;
-+
-+      spin_lock(&journal_datalist_lock);
-+      if (buffer_jbd(bh)) {
-+              jh = bh2jh(bh);
-+      } else {
-+              J_ASSERT_BH(bh,
-+                      (atomic_read(&bh->b_count) > 0) ||
-+                      (bh->b_page && bh->b_page->mapping));
-+              spin_unlock(&journal_datalist_lock);
-+              jh = journal_alloc_journal_head();
-+              memset(jh, 0, sizeof(*jh));
-+              spin_lock(&journal_datalist_lock);
-+
-+              if (buffer_jbd(bh)) {
-+                      /* Someone did it for us! */
-+                      J_ASSERT_BH(bh, bh->b_private != NULL);
-+                      journal_free_journal_head(jh);
-+                      jh = bh->b_private;
-+              } else {
-+                      /*
-+                       * We actually don't need jh_splice_lock when
-+                       * adding a journal_head - only on removal.
-+                       */
-+                      spin_lock(&jh_splice_lock);
-+                      set_bit(BH_JBD, &bh->b_state);
-+                      bh->b_private = jh;
-+                      jh->b_bh = bh;
-+                      atomic_inc(&bh->b_count);
-+                      spin_unlock(&jh_splice_lock);
-+                      BUFFER_TRACE(bh, "added journal_head");
-+              }
-+      }
-+      jh->b_jcount++;
-+      spin_unlock(&journal_datalist_lock);
-+      return bh->b_private;
-+}
-+
-+/*
-+ * journal_remove_journal_head(): if the buffer isn't attached to a transaction
-+ * and has a zero b_jcount then remove and release its journal_head.   If we did
-+ * see that the buffer is not used by any transaction we also "logically"
-+ * decrement ->b_count.
-+ *
-+ * We in fact take an additional increment on ->b_count as a convenience,
-+ * because the caller usually wants to do additional things with the bh
-+ * after calling here.
-+ * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
-+ * time.  Once the caller has run __brelse(), the buffer is eligible for
-+ * reaping by try_to_free_buffers().
-+ *
-+ * Requires journal_datalist_lock.
-+ */
-+void __journal_remove_journal_head(struct buffer_head *bh)
-+{
-+      struct journal_head *jh = bh2jh(bh);
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+      J_ASSERT_JH(jh, jh->b_jcount >= 0);
-+      atomic_inc(&bh->b_count);
-+      if (jh->b_jcount == 0) {
-+              if (jh->b_transaction == NULL &&
-+                              jh->b_next_transaction == NULL &&
-+                              jh->b_cp_transaction == NULL) {
-+                      J_ASSERT_BH(bh, buffer_jbd(bh));
-+                      J_ASSERT_BH(bh, jh2bh(jh) == bh);
-+                      BUFFER_TRACE(bh, "remove journal_head");
-+                      spin_lock(&jh_splice_lock);
-+                      bh->b_private = NULL;
-+                      jh->b_bh = NULL;        /* debug, really */
-+                      clear_bit(BH_JBD, &bh->b_state);
-+                      __brelse(bh);
-+                      spin_unlock(&jh_splice_lock);
-+                      journal_free_journal_head(jh);
-+              } else {
-+                      BUFFER_TRACE(bh, "journal_head was locked");
-+              }
-+      }
-+}
-+
-+void journal_unlock_journal_head(struct journal_head *jh)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      J_ASSERT_JH(jh, jh->b_jcount > 0);
-+      --jh->b_jcount;
-+      if (!jh->b_jcount && !jh->b_transaction) {
-+              struct buffer_head *bh;
-+              bh = jh2bh(jh);
-+              __journal_remove_journal_head(bh);
-+              __brelse(bh);
-+      }
-+      
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+void journal_remove_journal_head(struct buffer_head *bh)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      __journal_remove_journal_head(bh);
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+/*
-+ * /proc tunables
-+ */
-+#if defined(CONFIG_JBD_DEBUG)
-+int journal_enable_debug;
-+EXPORT_SYMBOL(journal_enable_debug);
-+#endif
-+
-+#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
-+
-+static struct proc_dir_entry *proc_jbd_debug;
-+
-+int read_jbd_debug(char *page, char **start, off_t off,
-+                        int count, int *eof, void *data)
-+{
-+      int ret;
-+
-+      ret = sprintf(page + off, "%d\n", journal_enable_debug);
-+      *eof = 1;
-+      return ret;
-+}
-+
-+int write_jbd_debug(struct file *file, const char *buffer,
-+                         unsigned long count, void *data)
-+{
-+      char buf[32];
-+
-+      if (count > ARRAY_SIZE(buf) - 1)
-+              count = ARRAY_SIZE(buf) - 1;
-+      if (copy_from_user(buf, buffer, count))
-+              return -EFAULT;
-+      buf[ARRAY_SIZE(buf) - 1] = '\0';
-+      journal_enable_debug = simple_strtoul(buf, NULL, 10);
-+      return count;
-+}
-+
-+#define JBD_PROC_NAME "sys/fs/jbd-debug"
-+
-+static void __init create_jbd_proc_entry(void)
-+{
-+      proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-+      if (proc_jbd_debug) {
-+              /* Why is this so hard? */
-+              proc_jbd_debug->read_proc = read_jbd_debug;
-+              proc_jbd_debug->write_proc = write_jbd_debug;
-+      }
-+}
-+
-+static void __exit remove_jbd_proc_entry(void)
-+{
-+      if (proc_jbd_debug)
-+              remove_proc_entry(JBD_PROC_NAME, NULL);
-+}
-+
-+#else
-+
-+#define create_jbd_proc_entry() do {} while (0)
-+#define remove_jbd_proc_entry() do {} while (0)
-+
-+#endif
-+
-+/*
-+ * Module startup and shutdown
-+ */
-+
-+static int __init journal_init_caches(void)
-+{
-+      int ret;
-+
-+      ret = journal_init_revoke_caches();
-+      if (ret == 0)
-+              ret = journal_init_journal_head_cache();
-+      return ret;
-+}
-+
-+static void journal_destroy_caches(void)
-+{
-+      journal_destroy_revoke_caches();
-+      journal_destroy_journal_head_cache();
-+}
-+
-+static int __init journal_init(void)
-+{
-+      int ret;
-+
-+      printk(KERN_INFO "Journalled Block Device driver loaded\n");
-+      ret = journal_init_caches();
-+      if (ret != 0)
-+              journal_destroy_caches();
-+      create_jbd_proc_entry();
-+      return ret;
-+}
-+
-+static void __exit journal_exit(void)
-+{
-+#ifdef CONFIG_JBD_DEBUG
-+      int n = atomic_read(&nr_journal_heads);
-+      if (n)
-+              printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
-+#endif
-+      remove_jbd_proc_entry();
-+      journal_destroy_caches();
-+}
-+
-+MODULE_LICENSE("GPL");
-+module_init(journal_init);
-+module_exit(journal_exit);
-+
-diff -ruP linux.mcp2/fs/jbd/recovery.c linuxppc_2.4.19_final/fs/jbd/recovery.c
---- linux.mcp2/fs/jbd/recovery.c       1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/recovery.c    2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,589 @@
-+/*
-+ * linux/fs/recovery.c
-+ * 
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
-+ *
-+ * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Journal recovery routines for the generic filesystem journaling code;
-+ * part of the ext2fs journaling system.  
-+ */
-+
-+#ifndef __KERNEL__
-+#include "jfs_user.h"
-+#else
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+#endif
-+
-+/*
-+ * Maintain information about the progress of the recovery job, so that
-+ * the different passes can carry information between them. 
-+ */
-+struct recovery_info 
-+{
-+      tid_t           start_transaction;      
-+      tid_t           end_transaction;
-+      
-+      int             nr_replays;
-+      int             nr_revokes;
-+      int             nr_revoke_hits;
-+};
-+
-+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
-+static int do_one_pass(journal_t *journal,
-+                              struct recovery_info *info, enum passtype pass);
-+static int scan_revoke_records(journal_t *, struct buffer_head *,
-+                              tid_t, struct recovery_info *);
-+
-+#ifdef __KERNEL__
-+
-+/* Release readahead buffers after use */
-+void journal_brelse_array(struct buffer_head *b[], int n)
-+{
-+      while (--n >= 0)
-+              brelse (b[n]);
-+}
-+
-+
-+/*
-+ * When reading from the journal, we are going through the block device
-+ * layer directly and so there is no readahead being done for us.  We
-+ * need to implement any readahead ourselves if we want it to happen at
-+ * all.  Recovery is basically one long sequential read, so make sure we
-+ * do the IO in reasonably large chunks.
-+ *
-+ * This is not so critical that we need to be enormously clever about
-+ * the readahead size, though.  128K is a purely arbitrary, good-enough
-+ * fixed value.
-+ */
-+
-+#define MAXBUF 8
-+static int do_readahead(journal_t *journal, unsigned int start)
-+{
-+      int err;
-+      unsigned int max, nbufs, next;
-+      unsigned long blocknr;
-+      struct buffer_head *bh;
-+      
-+      struct buffer_head * bufs[MAXBUF];
-+      
-+      /* Do up to 128K of readahead */
-+      max = start + (128 * 1024 / journal->j_blocksize);
-+      if (max > journal->j_maxlen)
-+              max = journal->j_maxlen;
-+
-+      /* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
-+       * a time to the block device IO layer. */
-+      
-+      nbufs = 0;
-+      
-+      for (next = start; next < max; next++) {
-+              err = journal_bmap(journal, next, &blocknr);
-+
-+              if (err) {
-+                      printk (KERN_ERR "JBD: bad block at offset %u\n",
-+                              next);
-+                      goto failed;
-+              }
-+
-+              bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
-+              if (!bh) {
-+                      err = -ENOMEM;
-+                      goto failed;
-+              }
-+
-+              if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
-+                      bufs[nbufs++] = bh;
-+                      if (nbufs == MAXBUF) {
-+                              ll_rw_block(READ, nbufs, bufs);
-+                              journal_brelse_array(bufs, nbufs);
-+                              nbufs = 0;
-+                      }
-+              } else
-+                      brelse(bh);
-+      }
-+
-+      if (nbufs)
-+              ll_rw_block(READ, nbufs, bufs);
-+      err = 0;
-+
-+failed:       
-+      if (nbufs) 
-+              journal_brelse_array(bufs, nbufs);
-+      return err;
-+}
-+
-+#endif /* __KERNEL__ */
-+
-+
-+/*
-+ * Read a block from the journal
-+ */
-+
-+static int jread(struct buffer_head **bhp, journal_t *journal, 
-+               unsigned int offset)
-+{
-+      int err;
-+      unsigned long blocknr;
-+      struct buffer_head *bh;
-+
-+      *bhp = NULL;
-+
-+      J_ASSERT (offset < journal->j_maxlen);
-+      
-+      err = journal_bmap(journal, offset, &blocknr);
-+
-+      if (err) {
-+              printk (KERN_ERR "JBD: bad block at offset %u\n",
-+                      offset);
-+              return err;
-+      }
-+
-+      bh = getblk(journal->j_dev, blocknr, journal->j_blocksize);
-+      if (!bh)
-+              return -ENOMEM;
-+
-+      if (!buffer_uptodate(bh)) {
-+              /* If this is a brand new buffer, start readahead.
-+                   Otherwise, we assume we are already reading it.  */
-+              if (!buffer_req(bh))
-+                      do_readahead(journal, offset);
-+              wait_on_buffer(bh);
-+      }
-+
-+      if (!buffer_uptodate(bh)) {
-+              printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
-+                      offset);
-+              brelse(bh);
-+              return -EIO;
-+      }
-+
-+      *bhp = bh;
-+      return 0;
-+}
-+
-+
-+/*
-+ * Count the number of in-use tags in a journal descriptor block.
-+ */
-+
-+static int count_tags(struct buffer_head *bh, int size)
-+{
-+      char *                  tagp;
-+      journal_block_tag_t *   tag;
-+      int                     nr = 0;
-+
-+      tagp = &bh->b_data[sizeof(journal_header_t)];
-+
-+      while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
-+              tag = (journal_block_tag_t *) tagp;
-+
-+              nr++;
-+              tagp += sizeof(journal_block_tag_t);
-+              if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID)))
-+                      tagp += 16;
-+
-+              if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG))
-+                      break;
-+      }
-+
-+      return nr;
-+}
-+
-+
-+/* Make sure we wrap around the log correctly! */
-+#define wrap(journal, var)                                            \
-+do {                                                                  \
-+      if (var >= (journal)->j_last)                                   \
-+              var -= ((journal)->j_last - (journal)->j_first);        \
-+} while (0)
-+
-+/*
-+ * journal_recover
-+ *
-+ * The primary function for recovering the log contents when mounting a
-+ * journaled device.  
-+ * 
-+ * Recovery is done in three passes.  In the first pass, we look for the
-+ * end of the log.  In the second, we assemble the list of revoke
-+ * blocks.  In the third and final pass, we replay any un-revoked blocks
-+ * in the log.  
-+ */
-+
-+int journal_recover(journal_t *journal)
-+{
-+      int                     err;
-+      journal_superblock_t *  sb;
-+
-+      struct recovery_info    info;
-+      
-+      memset(&info, 0, sizeof(info));
-+      sb = journal->j_superblock;
-+      
-+      /* 
-+       * The journal superblock's s_start field (the current log head)
-+       * is always zero if, and only if, the journal was cleanly
-+       * unmounted.  
-+       */
-+
-+      if (!sb->s_start) {
-+              jbd_debug(1, "No recovery required, last transaction %d\n",
-+                        ntohl(sb->s_sequence));
-+              journal->j_transaction_sequence = ntohl(sb->s_sequence) + 1;
-+              return 0;
-+      }
-+      
-+
-+      err = do_one_pass(journal, &info, PASS_SCAN);
-+      if (!err)
-+              err = do_one_pass(journal, &info, PASS_REVOKE);
-+      if (!err)
-+              err = do_one_pass(journal, &info, PASS_REPLAY);
-+
-+      jbd_debug(0, "JBD: recovery, exit status %d, "
-+                "recovered transactions %u to %u\n",
-+                err, info.start_transaction, info.end_transaction);
-+      jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", 
-+                info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
-+
-+      /* Restart the log at the next transaction ID, thus invalidating
-+       * any existing commit records in the log. */
-+      journal->j_transaction_sequence = ++info.end_transaction;
-+              
-+      journal_clear_revoke(journal);
-+      fsync_no_super(journal->j_fs_dev);
-+      return err;
-+}
-+
-+/*
-+ * journal_skip_recovery
-+ *
-+ * Locate any valid recovery information from the journal and set up the
-+ * journal structures in memory to ignore it (presumably because the
-+ * caller has evidence that it is out of date).  
-+ *
-+ * We perform one pass over the journal to allow us to tell the user how
-+ * much recovery information is being erased, and to let us initialise
-+ * the journal transaction sequence numbers to the next unused ID. 
-+ */
-+
-+int journal_skip_recovery(journal_t *journal)
-+{
-+      int                     err;
-+      journal_superblock_t *  sb;
-+
-+      struct recovery_info    info;
-+      
-+      memset (&info, 0, sizeof(info));
-+      sb = journal->j_superblock;
-+      
-+      err = do_one_pass(journal, &info, PASS_SCAN);
-+
-+      if (err) {
-+              printk(KERN_ERR "JBD: error %d scanning journal\n", err);
-+              ++journal->j_transaction_sequence;
-+      } else {
-+#ifdef CONFIG_JBD_DEBUG
-+              int dropped = info.end_transaction - ntohl(sb->s_sequence);
-+#endif
-+              
-+              jbd_debug(0, 
-+                        "JBD: ignoring %d transaction%s from the journal.\n",
-+                        dropped, (dropped == 1) ? "" : "s");
-+              journal->j_transaction_sequence = ++info.end_transaction;
-+      }
-+
-+      journal->j_tail = 0;
-+      
-+      return err;
-+}
-+
-+static int do_one_pass(journal_t *journal,
-+                      struct recovery_info *info, enum passtype pass)
-+{
-+      
-+      unsigned int            first_commit_ID, next_commit_ID;
-+      unsigned long           next_log_block;
-+      int                     err, success = 0;
-+      journal_superblock_t *  sb;
-+      journal_header_t *      tmp;
-+      struct buffer_head *    bh;
-+      unsigned int            sequence;
-+      int                     blocktype;
-+      
-+      /* Precompute the maximum metadata descriptors in a descriptor block */
-+      int                     MAX_BLOCKS_PER_DESC;
-+      MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-+                             / sizeof(journal_block_tag_t));
-+
-+      /* 
-+       * First thing is to establish what we expect to find in the log
-+       * (in terms of transaction IDs), and where (in terms of log
-+       * block offsets): query the superblock.  
-+       */
-+
-+      sb = journal->j_superblock;
-+      next_commit_ID = ntohl(sb->s_sequence);
-+      next_log_block = ntohl(sb->s_start);
-+
-+      first_commit_ID = next_commit_ID;
-+      if (pass == PASS_SCAN)
-+              info->start_transaction = first_commit_ID;
-+
-+      jbd_debug(1, "Starting recovery pass %d\n", pass);
-+
-+      /*
-+       * Now we walk through the log, transaction by transaction,
-+       * making sure that each transaction has a commit block in the
-+       * expected place.  Each complete transaction gets replayed back
-+       * into the main filesystem. 
-+       */
-+
-+      while (1) {
-+              int                     flags;
-+              char *                  tagp;
-+              journal_block_tag_t *   tag;
-+              struct buffer_head *    obh;
-+              struct buffer_head *    nbh;
-+              
-+              /* If we already know where to stop the log traversal,
-+               * check right now that we haven't gone past the end of
-+               * the log. */
-+              
-+              if (pass != PASS_SCAN)
-+                      if (tid_geq(next_commit_ID, info->end_transaction))
-+                              break;
-+
-+              jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
-+                        next_commit_ID, next_log_block, journal->j_last);
-+
-+              /* Skip over each chunk of the transaction looking
-+               * either the next descriptor block or the final commit
-+               * record. */
-+              
-+              jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
-+              err = jread(&bh, journal, next_log_block);
-+              if (err)
-+                      goto failed;
-+
-+              next_log_block++;
-+              wrap(journal, next_log_block);
-+              
-+              /* What kind of buffer is it? 
-+               * 
-+               * If it is a descriptor block, check that it has the
-+               * expected sequence number.  Otherwise, we're all done
-+               * here. */
-+
-+              tmp = (journal_header_t *)bh->b_data;
-+              
-+              if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) {
-+                      brelse(bh);
-+                      break;
-+              }
-+
-+              blocktype = ntohl(tmp->h_blocktype);
-+              sequence = ntohl(tmp->h_sequence);
-+              jbd_debug(3, "Found magic %d, sequence %d\n", 
-+                        blocktype, sequence);
-+              
-+              if (sequence != next_commit_ID) {
-+                      brelse(bh);
-+                      break;
-+              }
-+              
-+              /* OK, we have a valid descriptor block which matches
-+               * all of the sequence number checks.  What are we going
-+               * to do with it?  That depends on the pass... */
-+
-+              switch(blocktype) {
-+              case JFS_DESCRIPTOR_BLOCK:
-+                      /* If it is a valid descriptor block, replay it
-+                       * in pass REPLAY; otherwise, just skip over the
-+                       * blocks it describes. */
-+                      if (pass != PASS_REPLAY) {
-+                              next_log_block +=
-+                                      count_tags(bh, journal->j_blocksize);
-+                              wrap(journal, next_log_block);
-+                              brelse(bh);
-+                              continue;
-+                      }
-+
-+                      /* A descriptor block: we can now write all of
-+                       * the data blocks.  Yay, useful work is finally
-+                       * getting done here! */
-+
-+                      tagp = &bh->b_data[sizeof(journal_header_t)];
-+                      while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
-+                             <= journal->j_blocksize) {
-+                              unsigned long io_block;
-+
-+                              tag = (journal_block_tag_t *) tagp;
-+                              flags = ntohl(tag->t_flags);
-+                              
-+                              io_block = next_log_block++;
-+                              wrap(journal, next_log_block);
-+                              err = jread(&obh, journal, io_block);
-+                              if (err) {
-+                                      /* Recover what we can, but
-+                                       * report failure at the end. */
-+                                      success = err;
-+                                      printk (KERN_ERR 
-+                                              "JBD: IO error %d recovering "
-+                                              "block %ld in log\n",
-+                                              err, io_block);
-+                              } else {
-+                                      unsigned long blocknr;
-+                                      
-+                                      J_ASSERT(obh != NULL);
-+                                      blocknr = ntohl(tag->t_blocknr);
-+
-+                                      /* If the block has been
-+                                       * revoked, then we're all done
-+                                       * here. */
-+                                      if (journal_test_revoke
-+                                          (journal, blocknr, 
-+                                           next_commit_ID)) {
-+                                              brelse(obh);
-+                                              ++info->nr_revoke_hits;
-+                                              goto skip_write;
-+                                      }
-+                                                              
-+                                      /* Find a buffer for the new
-+                                       * data being restored */
-+                                      nbh = getblk(journal->j_fs_dev, blocknr,
-+                                                   journal->j_blocksize);
-+                                      if (nbh == NULL) {
-+                                              printk(KERN_ERR 
-+                                                     "JBD: Out of memory "
-+                                                     "during recovery.\n");
-+                                              err = -ENOMEM;
-+                                              brelse(bh);
-+                                              brelse(obh);
-+                                              goto failed;
-+                                      }
-+
-+                                      lock_buffer(nbh);
-+                                      memcpy(nbh->b_data, obh->b_data,
-+                                                      journal->j_blocksize);
-+                                      if (flags & JFS_FLAG_ESCAPE) {
-+                                              *((unsigned int *)bh->b_data) =
-+                                                      htonl(JFS_MAGIC_NUMBER);
-+                                      }
-+
-+                                      BUFFER_TRACE(nbh, "marking dirty");
-+                                      mark_buffer_dirty(nbh);
-+                                      BUFFER_TRACE(nbh, "marking uptodate");
-+                                      mark_buffer_uptodate(nbh, 1);
-+                                      unlock_buffer(nbh);
-+                                      ++info->nr_replays;
-+                                      /* ll_rw_block(WRITE, 1, &nbh); */
-+                                      brelse(obh);
-+                                      brelse(nbh);
-+                              }
-+                              
-+                      skip_write:
-+                              tagp += sizeof(journal_block_tag_t);
-+                              if (!(flags & JFS_FLAG_SAME_UUID))
-+                                      tagp += 16;
-+
-+                              if (flags & JFS_FLAG_LAST_TAG)
-+                                      break;
-+                      }
-+                      
-+                      brelse(bh);
-+                      continue;
-+
-+              case JFS_COMMIT_BLOCK:
-+                      /* Found an expected commit block: not much to
-+                       * do other than move on to the next sequence
-+                       * number. */
-+                      brelse(bh);
-+                      next_commit_ID++;
-+                      continue;
-+
-+              case JFS_REVOKE_BLOCK:
-+                      /* If we aren't in the REVOKE pass, then we can
-+                       * just skip over this block. */
-+                      if (pass != PASS_REVOKE) {
-+                              brelse(bh);
-+                              continue;
-+                      }
-+
-+                      err = scan_revoke_records(journal, bh,
-+                                                next_commit_ID, info);
-+                      brelse(bh);
-+                      if (err)
-+                              goto failed;
-+                      continue;
-+
-+              default:
-+                      jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
-+                                blocktype);
-+                      goto done;
-+              }
-+      }
-+
-+ done:
-+      /* 
-+       * We broke out of the log scan loop: either we came to the
-+       * known end of the log or we found an unexpected block in the
-+       * log.  If the latter happened, then we know that the "current"
-+       * transaction marks the end of the valid log.
-+       */
-+      
-+      if (pass == PASS_SCAN)
-+              info->end_transaction = next_commit_ID;
-+      else {
-+              /* It's really bad news if different passes end up at
-+               * different places (but possible due to IO errors). */
-+              if (info->end_transaction != next_commit_ID) {
-+                      printk (KERN_ERR "JBD: recovery pass %d ended at "
-+                              "transaction %u, expected %u\n",
-+                              pass, next_commit_ID, info->end_transaction);
-+                      if (!success)
-+                              success = -EIO;
-+              }
-+      }
-+
-+      return success;
-+
-+ failed:
-+      return err;
-+}
-+
-+
-+/* Scan a revoke record, marking all blocks mentioned as revoked. */
-+
-+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 
-+                             tid_t sequence, struct recovery_info *info)
-+{
-+      journal_revoke_header_t *header;
-+      int offset, max;
-+
-+      header = (journal_revoke_header_t *) bh->b_data;
-+      offset = sizeof(journal_revoke_header_t);
-+      max = ntohl(header->r_count);
-+      
-+      while (offset < max) {
-+              unsigned long blocknr;
-+              int err;
-+              
-+              blocknr = ntohl(* ((unsigned int *) (bh->b_data+offset)));
-+              offset += 4;
-+              err = journal_set_revoke(journal, blocknr, sequence);
-+              if (err)
-+                      return err;
-+              ++info->nr_revokes;
-+      }
-+      return 0;
-+}
-diff -ruP linux.mcp2/fs/jbd/revoke.c linuxppc_2.4.19_final/fs/jbd/revoke.c
---- linux.mcp2/fs/jbd/revoke.c 1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/revoke.c      2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,636 @@
-+/*
-+ * linux/fs/revoke.c
-+ * 
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
-+ *
-+ * Copyright 2000 Red Hat corp --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Journal revoke routines for the generic filesystem journaling code;
-+ * part of the ext2fs journaling system.
-+ *
-+ * Revoke is the mechanism used to prevent old log records for deleted
-+ * metadata from being replayed on top of newer data using the same
-+ * blocks.  The revoke mechanism is used in two separate places:
-+ * 
-+ * + Commit: during commit we write the entire list of the current
-+ *   transaction's revoked blocks to the journal
-+ * 
-+ * + Recovery: during recovery we record the transaction ID of all
-+ *   revoked blocks.  If there are multiple revoke records in the log
-+ *   for a single block, only the last one counts, and if there is a log
-+ *   entry for a block beyond the last revoke, then that log entry still
-+ *   gets replayed.
-+ *
-+ * We can get interactions between revokes and new log data within a
-+ * single transaction:
-+ *
-+ * Block is revoked and then journaled:
-+ *   The desired end result is the journaling of the new block, so we 
-+ *   cancel the revoke before the transaction commits.
-+ *
-+ * Block is journaled and then revoked:
-+ *   The revoke must take precedence over the write of the block, so we
-+ *   need either to cancel the journal entry or to write the revoke
-+ *   later in the log than the log block.  In this case, we choose the
-+ *   latter: journaling a block cancels any revoke record for that block
-+ *   in the current transaction, so any revoke for that block in the
-+ *   transaction must have happened after the block was journaled and so
-+ *   the revoke must take precedence.
-+ *
-+ * Block is revoked and then written as data: 
-+ *   The data write is allowed to succeed, but the revoke is _not_
-+ *   cancelled.  We still need to prevent old log records from
-+ *   overwriting the new data.  We don't even need to clear the revoke
-+ *   bit here.
-+ *
-+ * Revoke information on buffers is a tri-state value:
-+ *
-+ * RevokeValid clear: no cached revoke status, need to look it up
-+ * RevokeValid set, Revoked clear:
-+ *                    buffer has not been revoked, and cancel_revoke
-+ *                    need do nothing.
-+ * RevokeValid set, Revoked set:
-+ *                    buffer has been revoked.  
-+ */
-+
-+#ifndef __KERNEL__
-+#include "jfs_user.h"
-+#else
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+#include <linux/list.h>
-+#include <linux/smp_lock.h>
-+#include <linux/init.h>
-+#endif
-+
-+static kmem_cache_t *revoke_record_cache;
-+static kmem_cache_t *revoke_table_cache;
-+
-+/* Each revoke record represents one single revoked block.  During
-+   journal replay, this involves recording the transaction ID of the
-+   last transaction to revoke this block. */
-+
-+struct jbd_revoke_record_s 
-+{
-+      struct list_head  hash;
-+      tid_t             sequence;     /* Used for recovery only */
-+      unsigned long     blocknr;      
-+};
-+
-+
-+/* The revoke table is just a simple hash table of revoke records. */
-+struct jbd_revoke_table_s
-+{
-+      /* It is conceivable that we might want a larger hash table
-+       * for recovery.  Must be a power of two. */
-+      int               hash_size; 
-+      int               hash_shift; 
-+      struct list_head *hash_table;
-+};
-+
-+
-+#ifdef __KERNEL__
-+static void write_one_revoke_record(journal_t *, transaction_t *,
-+                                  struct journal_head **, int *,
-+                                  struct jbd_revoke_record_s *);
-+static void flush_descriptor(journal_t *, struct journal_head *, int);
-+#endif
-+
-+/* Utility functions to maintain the revoke table */
-+
-+/* Borrowed from buffer.c: this is a tried and tested block hash function */
-+static inline int hash(journal_t *journal, unsigned long block)
-+{
-+      struct jbd_revoke_table_s *table = journal->j_revoke;
-+      int hash_shift = table->hash_shift;
-+      
-+      return ((block << (hash_shift - 6)) ^
-+              (block >> 13) ^
-+              (block << (hash_shift - 12))) & (table->hash_size - 1);
-+}
-+
-+int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq)
-+{
-+      struct list_head *hash_list;
-+      struct jbd_revoke_record_s *record;
-+
-+repeat:
-+      record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
-+      if (!record)
-+              goto oom;
-+
-+      record->sequence = seq;
-+      record->blocknr = blocknr;
-+      hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
-+      list_add(&record->hash, hash_list);
-+      return 0;
-+
-+oom:
-+      if (!journal_oom_retry)
-+              return -ENOMEM;
-+      jbd_debug(1, "ENOMEM in " __FUNCTION__ ", retrying.\n");
-+      current->policy |= SCHED_YIELD;
-+      schedule();
-+      goto repeat;
-+}
-+
-+/* Find a revoke record in the journal's hash table. */
-+
-+static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
-+                                                    unsigned long blocknr)
-+{
-+      struct list_head *hash_list;
-+      struct jbd_revoke_record_s *record;
-+      
-+      hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
-+
-+      record = (struct jbd_revoke_record_s *) hash_list->next;
-+      while (&(record->hash) != hash_list) {
-+              if (record->blocknr == blocknr)
-+                      return record;
-+              record = (struct jbd_revoke_record_s *) record->hash.next;
-+      }
-+      return NULL;
-+}
-+
-+int __init journal_init_revoke_caches(void)
-+{
-+      revoke_record_cache = kmem_cache_create("revoke_record",
-+                                         sizeof(struct jbd_revoke_record_s),
-+                                         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-+      if (revoke_record_cache == 0)
-+              return -ENOMEM;
-+
-+      revoke_table_cache = kmem_cache_create("revoke_table",
-+                                         sizeof(struct jbd_revoke_table_s),
-+                                         0, 0, NULL, NULL);
-+      if (revoke_table_cache == 0) {
-+              kmem_cache_destroy(revoke_record_cache);
-+              revoke_record_cache = NULL;
-+              return -ENOMEM;
-+      }
-+      return 0;
-+}     
-+
-+void journal_destroy_revoke_caches(void)
-+{
-+      kmem_cache_destroy(revoke_record_cache);
-+      revoke_record_cache = 0;
-+      kmem_cache_destroy(revoke_table_cache);
-+      revoke_table_cache = 0;
-+}
-+
-+/* Initialise the revoke table for a given journal to a given size. */
-+
-+int journal_init_revoke(journal_t *journal, int hash_size)
-+{
-+      int shift, tmp;
-+      
-+      J_ASSERT (journal->j_revoke == NULL);
-+      
-+      journal->j_revoke = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
-+      if (!journal->j_revoke)
-+              return -ENOMEM;
-+      
-+      /* Check that the hash_size is a power of two */
-+      J_ASSERT ((hash_size & (hash_size-1)) == 0);
-+
-+      journal->j_revoke->hash_size = hash_size;
-+
-+      shift = 0;
-+      tmp = hash_size;
-+      while((tmp >>= 1UL) != 0UL)
-+              shift++;
-+      journal->j_revoke->hash_shift = shift;
-+
-+      journal->j_revoke->hash_table =
-+              kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-+      if (!journal->j_revoke->hash_table) {
-+              kmem_cache_free(revoke_table_cache, journal->j_revoke);
-+              journal->j_revoke = NULL;
-+              return -ENOMEM;
-+      }
-+      
-+      for (tmp = 0; tmp < hash_size; tmp++)
-+              INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
-+      
-+      return 0;
-+}
-+
-+/* Destoy a journal's revoke table.  The table must already be empty! */
-+
-+void journal_destroy_revoke(journal_t *journal)
-+{
-+      struct jbd_revoke_table_s *table;
-+      struct list_head *hash_list;
-+      int i;
-+      
-+      table = journal->j_revoke;
-+      if (!table)
-+              return;
-+      
-+      for (i=0; i<table->hash_size; i++) {
-+              hash_list = &table->hash_table[i];
-+              J_ASSERT (list_empty(hash_list));
-+      }
-+      
-+      kfree(table->hash_table);
-+      kmem_cache_free(revoke_table_cache, table);
-+      journal->j_revoke = NULL;
-+}
-+
-+
-+#ifdef __KERNEL__
-+
-+/* 
-+ * journal_revoke: revoke a given buffer_head from the journal.  This
-+ * prevents the block from being replayed during recovery if we take a
-+ * crash after this current transaction commits.  Any subsequent
-+ * metadata writes of the buffer in this transaction cancel the
-+ * revoke.  
-+ *
-+ * Note that this call may block --- it is up to the caller to make
-+ * sure that there are no further calls to journal_write_metadata
-+ * before the revoke is complete.  In ext3, this implies calling the
-+ * revoke before clearing the block bitmap when we are deleting
-+ * metadata. 
-+ *
-+ * Revoke performs a journal_forget on any buffer_head passed in as a
-+ * parameter, but does _not_ forget the buffer_head if the bh was only
-+ * found implicitly. 
-+ *
-+ * bh_in may not be a journalled buffer - it may have come off
-+ * the hash tables without an attached journal_head.
-+ *
-+ * If bh_in is non-zero, journal_revoke() will decrement its b_count
-+ * by one.
-+ */
-+
-+int journal_revoke(handle_t *handle, unsigned long blocknr, 
-+                 struct buffer_head *bh_in)
-+{
-+      struct buffer_head *bh = NULL;
-+      journal_t *journal;
-+      kdev_t dev;
-+      int err;
-+
-+      if (bh_in)
-+              BUFFER_TRACE(bh_in, "enter");
-+
-+      journal = handle->h_transaction->t_journal;
-+      if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
-+              J_ASSERT (!"Cannot set revoke feature!");
-+              return -EINVAL;
-+      }
-+
-+      dev = journal->j_fs_dev;
-+      bh = bh_in;
-+
-+      if (!bh) {
-+              bh = get_hash_table(dev, blocknr, journal->j_blocksize);
-+              if (bh)
-+                      BUFFER_TRACE(bh, "found on hash");
-+      }
-+#ifdef JBD_EXPENSIVE_CHECKING
-+      else {
-+              struct buffer_head *bh2;
-+
-+              /* If there is a different buffer_head lying around in
-+               * memory anywhere... */
-+              bh2 = get_hash_table(dev, blocknr, journal->j_blocksize);
-+              if (bh2) {
-+                      /* ... and it has RevokeValid status... */
-+                      if ((bh2 != bh) &&
-+                          test_bit(BH_RevokeValid, &bh2->b_state))
-+                              /* ...then it better be revoked too,
-+                               * since it's illegal to create a revoke
-+                               * record against a buffer_head which is
-+                               * not marked revoked --- that would
-+                               * risk missing a subsequent revoke
-+                               * cancel. */
-+                              J_ASSERT_BH(bh2, test_bit(BH_Revoked, &
-+                                                        bh2->b_state));
-+                      __brelse(bh2);
-+              }
-+      }
-+#endif
-+
-+      /* We really ought not ever to revoke twice in a row without
-+           first having the revoke cancelled: it's illegal to free a
-+           block twice without allocating it in between! */
-+      if (bh) {
-+              J_ASSERT_BH(bh, !test_bit(BH_Revoked, &bh->b_state));
-+              set_bit(BH_Revoked, &bh->b_state);
-+              set_bit(BH_RevokeValid, &bh->b_state);
-+              if (bh_in) {
-+                      BUFFER_TRACE(bh_in, "call journal_forget");
-+                      journal_forget(handle, bh_in);
-+              } else {
-+                      BUFFER_TRACE(bh, "call brelse");
-+                      __brelse(bh);
-+              }
-+      }
-+
-+      lock_journal(journal);
-+      jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
-+      err = insert_revoke_hash(journal, blocknr,
-+                              handle->h_transaction->t_tid);
-+      unlock_journal(journal);
-+      BUFFER_TRACE(bh_in, "exit");
-+      return err;
-+}
-+
-+/*
-+ * Cancel an outstanding revoke.  For use only internally by the
-+ * journaling code (called from journal_get_write_access).
-+ *
-+ * We trust the BH_Revoked bit on the buffer if the buffer is already
-+ * being journaled: if there is no revoke pending on the buffer, then we
-+ * don't do anything here.
-+ *
-+ * This would break if it were possible for a buffer to be revoked and
-+ * discarded, and then reallocated within the same transaction.  In such
-+ * a case we would have lost the revoked bit, but when we arrived here
-+ * the second time we would still have a pending revoke to cancel.  So,
-+ * do not trust the Revoked bit on buffers unless RevokeValid is also
-+ * set.
-+ *
-+ * The caller must have the journal locked.
-+ */
-+int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
-+{
-+      struct jbd_revoke_record_s *record;
-+      journal_t *journal = handle->h_transaction->t_journal;
-+      int need_cancel;
-+      int did_revoke = 0;     /* akpm: debug */
-+      struct buffer_head *bh = jh2bh(jh);
-+      
-+      jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
-+
-+      /* Is the existing Revoke bit valid?  If so, we trust it, and
-+       * only perform the full cancel if the revoke bit is set.  If
-+       * not, we can't trust the revoke bit, and we need to do the
-+       * full search for a revoke record. */
-+      if (test_and_set_bit(BH_RevokeValid, &bh->b_state))
-+              need_cancel = (test_and_clear_bit(BH_Revoked, &bh->b_state));
-+      else {
-+              need_cancel = 1;
-+              clear_bit(BH_Revoked, &bh->b_state);
-+      }
-+
-+      if (need_cancel) {
-+              record = find_revoke_record(journal, bh->b_blocknr);
-+              if (record) {
-+                      jbd_debug(4, "cancelled existing revoke on "
-+                                "blocknr %lu\n", bh->b_blocknr);
-+                      list_del(&record->hash);
-+                      kmem_cache_free(revoke_record_cache, record);
-+                      did_revoke = 1;
-+              }
-+      }
-+
-+#ifdef JBD_EXPENSIVE_CHECKING
-+      /* There better not be one left behind by now! */
-+      record = find_revoke_record(journal, bh->b_blocknr);
-+      J_ASSERT_JH(jh, record == NULL);
-+#endif
-+
-+      /* Finally, have we just cleared revoke on an unhashed
-+       * buffer_head?  If so, we'd better make sure we clear the
-+       * revoked status on any hashed alias too, otherwise the revoke
-+       * state machine will get very upset later on. */
-+      if (need_cancel && !bh->b_pprev) {
-+              struct buffer_head *bh2;
-+              bh2 = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
-+              if (bh2) {
-+                      clear_bit(BH_Revoked, &bh2->b_state);
-+                      __brelse(bh2);
-+              }
-+      }
-+      
-+      return did_revoke;
-+}
-+
-+
-+/*
-+ * Write revoke records to the journal for all entries in the current
-+ * revoke hash, deleting the entries as we go.
-+ *
-+ * Called with the journal lock held.
-+ */
-+
-+void journal_write_revoke_records(journal_t *journal, 
-+                                transaction_t *transaction)
-+{
-+      struct journal_head *descriptor;
-+      struct jbd_revoke_record_s *record;
-+      struct jbd_revoke_table_s *revoke;
-+      struct list_head *hash_list;
-+      int i, offset, count;
-+
-+      descriptor = NULL; 
-+      offset = 0;
-+      count = 0;
-+      revoke = journal->j_revoke;
-+      
-+      for (i = 0; i < revoke->hash_size; i++) {
-+              hash_list = &revoke->hash_table[i];
-+
-+              while (!list_empty(hash_list)) {
-+                      record = (struct jbd_revoke_record_s *) 
-+                              hash_list->next;
-+                      write_one_revoke_record(journal, transaction,
-+                                              &descriptor, &offset, 
-+                                              record);
-+                      count++;
-+                      list_del(&record->hash);
-+                      kmem_cache_free(revoke_record_cache, record);
-+              }
-+      }
-+      if (descriptor) 
-+              flush_descriptor(journal, descriptor, offset);
-+      jbd_debug(1, "Wrote %d revoke records\n", count);
-+}
-+
-+/* 
-+ * Write out one revoke record.  We need to create a new descriptor
-+ * block if the old one is full or if we have not already created one.  
-+ */
-+
-+static void write_one_revoke_record(journal_t *journal, 
-+                                  transaction_t *transaction,
-+                                  struct journal_head **descriptorp, 
-+                                  int *offsetp,
-+                                  struct jbd_revoke_record_s *record)
-+{
-+      struct journal_head *descriptor;
-+      int offset;
-+      journal_header_t *header;
-+
-+      /* If we are already aborting, this all becomes a noop.  We
-+           still need to go round the loop in
-+           journal_write_revoke_records in order to free all of the
-+           revoke records: only the IO to the journal is omitted. */
-+      if (is_journal_aborted(journal))
-+              return;
-+
-+      descriptor = *descriptorp;
-+      offset = *offsetp;
-+
-+      /* Make sure we have a descriptor with space left for the record */
-+      if (descriptor) {
-+              if (offset == journal->j_blocksize) {
-+                      flush_descriptor(journal, descriptor, offset);
-+                      descriptor = NULL;
-+              }
-+      }
-+      
-+      if (!descriptor) {
-+              descriptor = journal_get_descriptor_buffer(journal);
-+              if (!descriptor)
-+                      return;
-+              header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
-+              header->h_magic     = htonl(JFS_MAGIC_NUMBER);
-+              header->h_blocktype = htonl(JFS_REVOKE_BLOCK);
-+              header->h_sequence  = htonl(transaction->t_tid);
-+
-+              /* Record it so that we can wait for IO completion later */
-+              JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
-+              journal_file_buffer(descriptor, transaction, BJ_LogCtl);
-+
-+              offset = sizeof(journal_revoke_header_t);
-+              *descriptorp = descriptor;
-+      }
-+      
-+      * ((unsigned int *)(&jh2bh(descriptor)->b_data[offset])) = 
-+              htonl(record->blocknr);
-+      offset += 4;
-+      *offsetp = offset;
-+}
-+
-+/* 
-+ * Flush a revoke descriptor out to the journal.  If we are aborting,
-+ * this is a noop; otherwise we are generating a buffer which needs to
-+ * be waited for during commit, so it has to go onto the appropriate
-+ * journal buffer list.
-+ */
-+
-+static void flush_descriptor(journal_t *journal, 
-+                           struct journal_head *descriptor, 
-+                           int offset)
-+{
-+      journal_revoke_header_t *header;
-+
-+      if (is_journal_aborted(journal)) {
-+              JBUFFER_TRACE(descriptor, "brelse");
-+              unlock_buffer(jh2bh(descriptor));
-+              __brelse(jh2bh(descriptor));
-+              return;
-+      }
-+      
-+      header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
-+      header->r_count = htonl(offset);
-+      set_bit(BH_JWrite, &jh2bh(descriptor)->b_state);
-+      {
-+              struct buffer_head *bh = jh2bh(descriptor);
-+              BUFFER_TRACE(bh, "write");
-+              clear_bit(BH_Dirty, &bh->b_state);
-+              bh->b_end_io = journal_end_buffer_io_sync;
-+              submit_bh(WRITE, bh);
-+      }
-+}
-+
-+#endif
-+
-+/* 
-+ * Revoke support for recovery.
-+ *
-+ * Recovery needs to be able to:
-+ *
-+ *  record all revoke records, including the tid of the latest instance
-+ *  of each revoke in the journal
-+ *
-+ *  check whether a given block in a given transaction should be replayed
-+ *  (ie. has not been revoked by a revoke record in that or a subsequent
-+ *  transaction)
-+ * 
-+ *  empty the revoke table after recovery.
-+ */
-+
-+/*
-+ * First, setting revoke records.  We create a new revoke record for
-+ * every block ever revoked in the log as we scan it for recovery, and
-+ * we update the existing records if we find multiple revokes for a
-+ * single block. 
-+ */
-+
-+int journal_set_revoke(journal_t *journal, 
-+                     unsigned long blocknr, 
-+                     tid_t sequence)
-+{
-+      struct jbd_revoke_record_s *record;
-+      
-+      record = find_revoke_record(journal, blocknr);
-+      if (record) {
-+              /* If we have multiple occurences, only record the
-+               * latest sequence number in the hashed record */
-+              if (tid_gt(sequence, record->sequence))
-+                      record->sequence = sequence;
-+              return 0;
-+      } 
-+      return insert_revoke_hash(journal, blocknr, sequence);
-+}
-+
-+/* 
-+ * Test revoke records.  For a given block referenced in the log, has
-+ * that block been revoked?  A revoke record with a given transaction
-+ * sequence number revokes all blocks in that transaction and earlier
-+ * ones, but later transactions still need replayed.
-+ */
-+
-+int journal_test_revoke(journal_t *journal, 
-+                      unsigned long blocknr,
-+                      tid_t sequence)
-+{
-+      struct jbd_revoke_record_s *record;
-+      
-+      record = find_revoke_record(journal, blocknr);
-+      if (!record)
-+              return 0;
-+      if (tid_gt(sequence, record->sequence))
-+              return 0;
-+      return 1;
-+}
-+
-+/*
-+ * Finally, once recovery is over, we need to clear the revoke table so
-+ * that it can be reused by the running filesystem.
-+ */
-+
-+void journal_clear_revoke(journal_t *journal)
-+{
-+      int i;
-+      struct list_head *hash_list;
-+      struct jbd_revoke_record_s *record;
-+      struct jbd_revoke_table_s *revoke;
-+      
-+      revoke = journal->j_revoke;
-+      
-+      for (i = 0; i < revoke->hash_size; i++) {
-+              hash_list = &revoke->hash_table[i];
-+              while (!list_empty(hash_list)) {
-+                      record = (struct jbd_revoke_record_s*) hash_list->next;
-+                      list_del(&record->hash);
-+                      kmem_cache_free(revoke_record_cache, record);
-+              }
-+      }
-+}
-+
-diff -ruP linux.mcp2/fs/jbd/transaction.c linuxppc_2.4.19_final/fs/jbd/transaction.c
---- linux.mcp2/fs/jbd/transaction.c    1969-12-31 16:00:00.000000000 -0800
-+++ linuxppc_2.4.19_final/fs/jbd/transaction.c 2004-05-17 13:56:17.000000000 -0700
-@@ -0,0 +1,2055 @@
-+/*
-+ * linux/fs/transaction.c
-+ * 
-+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
-+ *
-+ * Copyright 1998 Red Hat corp --- All Rights Reserved
-+ *
-+ * This file is part of the Linux kernel and is made available under
-+ * the terms of the GNU General Public License, version 2, or at your
-+ * option, any later version, incorporated herein by reference.
-+ *
-+ * Generic filesystem transaction handling code; part of the ext2fs
-+ * journaling system.  
-+ *
-+ * This file manages transactions (compound commits managed by the
-+ * journaling code) and handles (individual atomic operations by the
-+ * filesystem).
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/errno.h>
-+#include <linux/slab.h>
-+#include <linux/locks.h>
-+#include <linux/timer.h>
-+#include <linux/smp_lock.h>
-+#include <linux/mm.h>
-+
-+extern spinlock_t journal_datalist_lock;
-+
-+/*
-+ * get_transaction: obtain a new transaction_t object.
-+ *
-+ * Simply allocate and initialise a new transaction.  Create it in
-+ * RUNNING state and add it to the current journal (which should not
-+ * have an existing running transaction: we only make a new transaction
-+ * once we have started to commit the old one).
-+ *
-+ * Preconditions:
-+ *    The journal MUST be locked.  We don't perform atomic mallocs on the
-+ *    new transaction and we can't block without protecting against other
-+ *    processes trying to touch the journal while it is in transition.
-+ */
-+
-+static transaction_t * get_transaction (journal_t * journal, int is_try)
-+{
-+      transaction_t * transaction;
-+
-+      transaction = jbd_kmalloc (sizeof (transaction_t), GFP_NOFS);
-+      if (!transaction)
-+              return NULL;
-+      
-+      memset (transaction, 0, sizeof (transaction_t));
-+      
-+      transaction->t_journal = journal;
-+      transaction->t_state = T_RUNNING;
-+      transaction->t_tid = journal->j_transaction_sequence++;
-+      transaction->t_expires = jiffies + journal->j_commit_interval;
-+
-+      /* Set up the commit timer for the new transaction. */
-+      J_ASSERT (!journal->j_commit_timer_active);
-+      journal->j_commit_timer_active = 1;
-+      journal->j_commit_timer->expires = transaction->t_expires;
-+      add_timer(journal->j_commit_timer);
-+      
-+      J_ASSERT (journal->j_running_transaction == NULL);
-+      journal->j_running_transaction = transaction;
-+
-+      return transaction;
-+}
-+
-+/*
-+ * Handle management.
-+ *
-+ * A handle_t is an object which represents a single atomic update to a
-+ * filesystem, and which tracks all of the modifications which form part
-+ * of that one update.
-+ */
-+
-+/*
-+ * start_this_handle: Given a handle, deal with any locking or stalling
-+ * needed to make sure that there is enough journal space for the handle
-+ * to begin.  Attach the handle to a transaction and set up the
-+ * transaction's buffer credits.  
-+ */
-+
-+static int start_this_handle(journal_t *journal, handle_t *handle)
-+{
-+      transaction_t *transaction;
-+      int needed;
-+      int nblocks = handle->h_buffer_credits;
-+      
-+      jbd_debug(3, "New handle %p going live.\n", handle);
-+
-+repeat:
-+
-+      lock_journal(journal);
-+
-+repeat_locked:
-+
-+      if (is_journal_aborted(journal) ||
-+          (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
-+              unlock_journal(journal);
-+              return -EROFS; 
-+      }
-+
-+      /* Wait on the journal's transaction barrier if necessary */
-+      if (journal->j_barrier_count) {
-+              unlock_journal(journal);
-+              sleep_on(&journal->j_wait_transaction_locked);
-+              goto repeat;
-+      }
-+      
-+      if (!journal->j_running_transaction)
-+              get_transaction(journal, 0);
-+      /* @@@ Error? */
-+      J_ASSERT(journal->j_running_transaction);
-+      
-+      transaction = journal->j_running_transaction;
-+
-+      /* If the current transaction is locked down for commit, wait
-+       * for the lock to be released. */
-+
-+      if (transaction->t_state == T_LOCKED) {
-+              unlock_journal(journal);
-+              jbd_debug(3, "Handle %p stalling...\n", handle);
-+              sleep_on(&journal->j_wait_transaction_locked);
-+              goto repeat;
-+      }
-+      
-+      /* If there is not enough space left in the log to write all
-+       * potential buffers requested by this operation, we need to
-+       * stall pending a log checkpoint to free some more log
-+       * space. */
-+
-+      needed = transaction->t_outstanding_credits + nblocks;
-+
-+      if (needed > journal->j_max_transaction_buffers) {
-+              /* If the current transaction is already too large, then
-+               * start to commit it: we can then go back and attach
-+               * this handle to a new transaction. */
-+              
-+              jbd_debug(2, "Handle %p starting new commit...\n", handle);
-+              log_start_commit(journal, transaction);
-+              unlock_journal(journal);
-+              sleep_on(&journal->j_wait_transaction_locked);
-+              lock_journal(journal);
-+              goto repeat_locked;
-+      }
-+
-+      /* 
-+       * The commit code assumes that it can get enough log space
-+       * without forcing a checkpoint.  This is *critical* for
-+       * correctness: a checkpoint of a buffer which is also
-+       * associated with a committing transaction creates a deadlock,
-+       * so commit simply cannot force through checkpoints.
-+       *
-+       * We must therefore ensure the necessary space in the journal
-+       * *before* starting to dirty potentially checkpointed buffers
-+       * in the new transaction. 
-+       *
-+       * The worst part is, any transaction currently committing can
-+       * reduce the free space arbitrarily.  Be careful to account for
-+       * those buffers when checkpointing.
-+       */
-+
-+      /*
-+       * @@@ AKPM: This seems rather over-defensive.  We're giving commit
-+       * a _lot_ of headroom: 1/4 of the journal plus the size of
-+       * the committing transaction.  Really, we only need to give it
-+       * committing_transaction->t_outstanding_credits plus "enough" for
-+       * the log control blocks.
-+       * Also, this test is inconsitent with the matching one in
-+       * journal_extend().
-+       */
-+      needed = journal->j_max_transaction_buffers;
-+      if (journal->j_committing_transaction) 
-+              needed += journal->j_committing_transaction->
-+                                      t_outstanding_credits;
-+      
-+      if (log_space_left(journal) < needed) {
-+              jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
-+              log_wait_for_space(journal, needed);
-+              goto repeat_locked;
-+      }
-+
-+      /* OK, account for the buffers that this operation expects to
-+       * use and add the handle to the running transaction. */
-+
-+      handle->h_transaction = transaction;
-+      transaction->t_outstanding_credits += nblocks;
-+      transaction->t_updates++;
-+      transaction->t_handle_count++;
-+      jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
-+                handle, nblocks, transaction->t_outstanding_credits,
-+                log_space_left(journal));
-+
-+      unlock_journal(journal);
-+      
-+      return 0;
-+}
-+
-+/*
-+ * Obtain a new handle.  
-+ *
-+ * We make sure that the transaction can guarantee at least nblocks of
-+ * modified buffers in the log.  We block until the log can guarantee
-+ * that much space.  
-+ *
-+ * This function is visible to journal users (like ext2fs), so is not
-+ * called with the journal already locked.
-+ *
-+ * Return a pointer to a newly allocated handle, or NULL on failure
-+ */
-+
-+handle_t *journal_start(journal_t *journal, int nblocks)
-+{
-+      handle_t *handle = journal_current_handle();
-+      int err;
-+      
-+      if (!journal)
-+              return ERR_PTR(-EROFS);
-+
-+      if (handle) {
-+              J_ASSERT(handle->h_transaction->t_journal == journal);
-+              handle->h_ref++;
-+              return handle;
-+      }
-+      
-+      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+      if (!handle)
-+              return ERR_PTR(-ENOMEM);
-+      memset (handle, 0, sizeof (handle_t));
-+
-+      handle->h_buffer_credits = nblocks;
-+      handle->h_ref = 1;
-+      current->journal_info = handle;
-+
-+      err = start_this_handle(journal, handle);
-+      if (err < 0) {
-+              kfree(handle);
-+              current->journal_info = NULL;
-+              return ERR_PTR(err);
-+      }
-+
-+      return handle;
-+}
-+
-+/*
-+ * Return zero on success
-+ */
-+static int try_start_this_handle(journal_t *journal, handle_t *handle)
-+{
-+      transaction_t *transaction;
-+      int needed;
-+      int nblocks = handle->h_buffer_credits;
-+      int ret = 0;
-+
-+      jbd_debug(3, "New handle %p maybe going live.\n", handle);
-+
-+      lock_journal(journal);
-+
-+      if (is_journal_aborted(journal) ||
-+          (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
-+              ret = -EROFS;
-+              goto fail_unlock;
-+      }
-+
-+      if (journal->j_barrier_count)
-+              goto fail_unlock;
-+
-+      if (!journal->j_running_transaction && get_transaction(journal, 1) == 0)
-+              goto fail_unlock;
-+      
-+      transaction = journal->j_running_transaction;
-+      if (transaction->t_state == T_LOCKED)
-+              goto fail_unlock;
-+      
-+      needed = transaction->t_outstanding_credits + nblocks;
-+      /* We could run log_start_commit here */
-+      if (needed > journal->j_max_transaction_buffers)
-+              goto fail_unlock;
-+
-+      needed = journal->j_max_transaction_buffers;
-+      if (journal->j_committing_transaction) 
-+              needed += journal->j_committing_transaction->
-+                                              t_outstanding_credits;
-+      
-+      if (log_space_left(journal) < needed)
-+              goto fail_unlock;
-+
-+      handle->h_transaction = transaction;
-+      transaction->t_outstanding_credits += nblocks;
-+      transaction->t_updates++;
-+      jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
-+                handle, nblocks, transaction->t_outstanding_credits,
-+                log_space_left(journal));
-+      unlock_journal(journal);
-+      return 0;
-+
-+fail_unlock:
-+      unlock_journal(journal);
-+      if (ret >= 0)
-+              ret = -1;
-+      return ret;
-+}
-+
-+/*
-+ * Try to start a handle, but non-blockingly.  If we weren't able
-+ * to, return an ERR_PTR value.
-+ */
-+handle_t *journal_try_start(journal_t *journal, int nblocks)
-+{
-+      handle_t *handle = journal_current_handle();
-+      int err;
-+      
-+      if (!journal)
-+              return ERR_PTR(-EROFS);
-+
-+      if (handle) {
-+              jbd_debug(4, "h_ref %d -> %d\n",
-+                              handle->h_ref,
-+                              handle->h_ref + 1);
-+              J_ASSERT(handle->h_transaction->t_journal == journal);
-+              if (is_handle_aborted(handle))
-+                      return ERR_PTR(-EIO);
-+              handle->h_ref++;
-+              return handle;
-+      } else {
-+              jbd_debug(4, "no current transaction\n");
-+      }
-+      
-+      if (is_journal_aborted(journal))
-+              return ERR_PTR(-EIO);
-+      
-+      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+      if (!handle)
-+              return ERR_PTR(-ENOMEM);
-+      memset (handle, 0, sizeof (handle_t));
-+
-+      handle->h_buffer_credits = nblocks;
-+      handle->h_ref = 1;
-+      current->journal_info = handle;
-+
-+      err = try_start_this_handle(journal, handle);
-+      if (err < 0) {
-+              kfree(handle);
-+              current->journal_info = NULL;
-+              return ERR_PTR(err);
-+      }
-+
-+      return handle;
-+}
-+
-+/*
-+ * journal_extend: extend buffer credits.
-+ *
-+ * Some transactions, such as large extends and truncates, can be done
-+ * atomically all at once or in several stages.  The operation requests
-+ * a credit for a number of buffer modications in advance, but can
-+ * extend its credit if it needs more.  
-+ *
-+ * journal_extend tries to give the running handle more buffer credits.
-+ * It does not guarantee that allocation: this is a best-effort only.
-+ * The calling process MUST be able to deal cleanly with a failure to
-+ * extend here.
-+ *
-+ * Return 0 on success, non-zero on failure.
-+ *
-+ * return code < 0 implies an error
-+ * return code > 0 implies normal transaction-full status.
-+ */
-+
-+int journal_extend (handle_t *handle, int nblocks)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      int result;
-+      int wanted;
-+
-+      lock_journal (journal);
-+
-+      result = -EIO;
-+      if (is_handle_aborted(handle))
-+              goto error_out;
-+
-+      result = 1;
-+             
-+      /* Don't extend a locked-down transaction! */
-+      if (handle->h_transaction->t_state != T_RUNNING) {
-+              jbd_debug(3, "denied handle %p %d blocks: "
-+                        "transaction not running\n", handle, nblocks);
-+              goto error_out;
-+      }
-+      
-+      wanted = transaction->t_outstanding_credits + nblocks;
-+      
-+      if (wanted > journal->j_max_transaction_buffers) {
-+              jbd_debug(3, "denied handle %p %d blocks: "
-+                        "transaction too large\n", handle, nblocks);
-+              goto error_out;
-+      }
-+
-+      if (wanted > log_space_left(journal)) {
-+              jbd_debug(3, "denied handle %p %d blocks: "
-+                        "insufficient log space\n", handle, nblocks);
-+              goto error_out;
-+      }
-+      
-+      handle->h_buffer_credits += nblocks;
-+      transaction->t_outstanding_credits += nblocks;
-+      result = 0;
-+
-+      jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
-+      
-+error_out:
-+      unlock_journal (journal);
-+      return result;
-+}
-+
-+
-+/*
-+ * journal_restart: restart a handle for a multi-transaction filesystem
-+ * operation.
-+ *
-+ * If the journal_extend() call above fails to grant new buffer credits
-+ * to a running handle, a call to journal_restart will commit the
-+ * handle's transaction so far and reattach the handle to a new
-+ * transaction capabable of guaranteeing the requested number of
-+ * credits.
-+ */
-+
-+int journal_restart(handle_t *handle, int nblocks)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      int ret;
-+
-+      /* If we've had an abort of any type, don't even think about
-+       * actually doing the restart! */
-+      if (is_handle_aborted(handle))
-+              return 0;
-+      
-+      /* First unlink the handle from its current transaction, and
-+       * start the commit on that. */
-+      
-+      J_ASSERT (transaction->t_updates > 0);
-+      J_ASSERT (journal_current_handle() == handle);
-+
-+      transaction->t_outstanding_credits -= handle->h_buffer_credits;
-+      transaction->t_updates--;
-+
-+      if (!transaction->t_updates)
-+              wake_up(&journal->j_wait_updates);
-+
-+      jbd_debug(2, "restarting handle %p\n", handle);
-+      log_start_commit(journal, transaction);
-+
-+      handle->h_buffer_credits = nblocks;
-+      ret = start_this_handle(journal, handle);
-+      return ret;
-+}
-+
-+
-+/* 
-+ * Barrier operation: establish a transaction barrier. 
-+ *
-+ * This locks out any further updates from being started, and blocks
-+ * until all existing updates have completed, returning only once the
-+ * journal is in a quiescent state with no updates running.
-+ *
-+ * The journal lock should not be held on entry.
-+ */
-+
-+void journal_lock_updates (journal_t *journal)
-+{
-+      lock_journal(journal);
-+      ++journal->j_barrier_count;
-+
-+      /* Wait until there are no running updates */
-+      while (1) {
-+              transaction_t *transaction = journal->j_running_transaction;
-+              if (!transaction)
-+                      break;
-+              if (!transaction->t_updates)
-+                      break;
-+              
-+              unlock_journal(journal);
-+              sleep_on(&journal->j_wait_updates);
-+              lock_journal(journal);
-+      }
-+
-+      unlock_journal(journal);
-+
-+      /* We have now established a barrier against other normal
-+       * updates, but we also need to barrier against other
-+       * journal_lock_updates() calls to make sure that we serialise
-+       * special journal-locked operations too. */
-+      down(&journal->j_barrier);
-+}
-+
-+/*
-+ * Release a transaction barrier obtained with journal_lock_updates().
-+ *
-+ * Should be called without the journal lock held.
-+ */
-+
-+void journal_unlock_updates (journal_t *journal)
-+{
-+      lock_journal(journal);
-+
-+      J_ASSERT (journal->j_barrier_count != 0);
-+      
-+      up(&journal->j_barrier);
-+      --journal->j_barrier_count;
-+      wake_up(&journal->j_wait_transaction_locked);
-+      unlock_journal(journal);
-+}
-+
-+/*
-+ * journal_get_write_access: notify intent to modify a buffer for metadata
-+ * (not data) update.
-+ *
-+ * If the buffer is already part of the current transaction, then there
-+ * is nothing we need to do.  If it is already part of a prior
-+ * transaction which we are still committing to disk, then we need to
-+ * make sure that we do not overwrite the old copy: we do copy-out to
-+ * preserve the copy going to disk.  We also account the buffer against
-+ * the handle's metadata buffer credits (unless the buffer is already
-+ * part of the transaction, that is).
-+ *
-+ * Returns an error code or 0 on success.
-+ *
-+ * In full data journalling mode the buffer may be of type BJ_AsyncData,
-+ * because we're write()ing a buffer which is also part of a shared mapping.
-+ */
-+
-+static int
-+do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) 
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      int error;
-+      char *frozen_buffer = NULL;
-+      int need_copy = 0;
-+
-+      jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
-+
-+      JBUFFER_TRACE(jh, "entry");
-+repeat:
-+      /* @@@ Need to check for errors here at some point. */
-+
-+      /*
-+       * AKPM: neither bdflush nor kupdate run with the BKL.   There's
-+       * nothing we can do to prevent them from starting writeout of a
-+       * BUF_DIRTY buffer at any time.  And checkpointing buffers are on
-+       * BUF_DIRTY.  So.  We no longer assert that the buffer is unlocked.
-+       *
-+       * However.  It is very wrong for us to allow ext3 to start directly
-+       * altering the ->b_data of buffers which may at that very time be
-+       * undergoing writeout to the client filesystem.  This can leave
-+       * the filesystem in an inconsistent, transient state if we crash.
-+       * So what we do is to steal the buffer if it is in checkpoint
-+       * mode and dirty.  The journal lock will keep out checkpoint-mode
-+       * state transitions within journal_remove_checkpoint() and the buffer
-+       * is locked to keep bdflush/kupdate/whoever away from it as well.
-+       *
-+       * AKPM: we have replaced all the lock_journal_bh_wait() stuff with a
-+       * simple lock_journal().  This code here will care for locked buffers.
-+       */
-+      /*
-+       * The buffer_locked() || buffer_dirty() tests here are simply an
-+       * optimisation tweak.  If anyone else in the system decides to
-+       * lock this buffer later on, we'll blow up.  There doesn't seem
-+       * to be a good reason why they should do this.
-+       */
-+      if (jh->b_cp_transaction &&
-+          (buffer_locked(jh2bh(jh)) || buffer_dirty(jh2bh(jh)))) {
-+              unlock_journal(journal);
-+              lock_buffer(jh2bh(jh));
-+              spin_lock(&journal_datalist_lock);
-+              if (jh->b_cp_transaction && buffer_dirty(jh2bh(jh))) {
-+                      /* OK, we need to steal it */
-+                      JBUFFER_TRACE(jh, "stealing from checkpoint mode");
-+                      J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+                      J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
-+
-+                      J_ASSERT(handle->h_buffer_credits > 0);
-+                      handle->h_buffer_credits--;
-+
-+                      /* This will clear BH_Dirty and set BH_JBDDirty. */
-+                      JBUFFER_TRACE(jh, "file as BJ_Reserved");
-+                      __journal_file_buffer(jh, transaction, BJ_Reserved);
-+
-+                      /* And pull it off BUF_DIRTY, onto BUF_CLEAN */
-+                      refile_buffer(jh2bh(jh));
-+
-+                      /*
-+                       * The buffer is now hidden from bdflush.   It is
-+                       * metadata against the current transaction.
-+                       */
-+                      JBUFFER_TRACE(jh, "steal from cp mode is complete");
-+              }
-+              spin_unlock(&journal_datalist_lock);
-+              unlock_buffer(jh2bh(jh));
-+              lock_journal(journal);
-+              goto repeat;
-+      }
-+
-+      J_ASSERT_JH(jh, !buffer_locked(jh2bh(jh)));
-+
-+      error = -EROFS;
-+      if (is_handle_aborted(handle)) 
-+              goto out_unlocked;
-+      error = 0;
-+
-+      spin_lock(&journal_datalist_lock);
-+
-+      /* The buffer is already part of this transaction if
-+       * b_transaction or b_next_transaction points to it. */
-+
-+      if (jh->b_transaction == transaction ||
-+          jh->b_next_transaction == transaction)
-+              goto done_locked;
-+
-+      /* If there is already a copy-out version of this buffer, then
-+       * we don't need to make another one. */
-+
-+      if (jh->b_frozen_data) {
-+              JBUFFER_TRACE(jh, "has frozen data");
-+              J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+              jh->b_next_transaction = transaction;
-+
-+              J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
-+              handle->h_buffer_credits--;
-+              goto done_locked;
-+      }
-+      
-+      /* Is there data here we need to preserve? */
-+
-+      if (jh->b_transaction && jh->b_transaction != transaction) {
-+              JBUFFER_TRACE(jh, "owned by older transaction");
-+              J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+              J_ASSERT_JH(jh, jh->b_transaction ==
-+                                      journal->j_committing_transaction);
-+
-+              /* There is one case we have to be very careful about.
-+               * If the committing transaction is currently writing
-+               * this buffer out to disk and has NOT made a copy-out,
-+               * then we cannot modify the buffer contents at all
-+               * right now.  The essence of copy-out is that it is the
-+               * extra copy, not the primary copy, which gets
-+               * journaled.  If the primary copy is already going to
-+               * disk then we cannot do copy-out here. */
-+
-+              if (jh->b_jlist == BJ_Shadow) {
-+                      JBUFFER_TRACE(jh, "on shadow: sleep");
-+                      spin_unlock(&journal_datalist_lock);
-+                      unlock_journal(journal);
-+                      /* commit wakes up all shadow buffers after IO */
-+                      sleep_on(&jh2bh(jh)->b_wait);
-+                      lock_journal(journal);
-+                      goto repeat;
-+              }
-+                      
-+              /* Only do the copy if the currently-owning transaction
-+               * still needs it.  If it is on the Forget list, the
-+               * committing transaction is past that stage.  The
-+               * buffer had better remain locked during the kmalloc,
-+               * but that should be true --- we hold the journal lock
-+               * still and the buffer is already on the BUF_JOURNAL
-+               * list so won't be flushed. 
-+               *
-+               * Subtle point, though: if this is a get_undo_access,
-+               * then we will be relying on the frozen_data to contain
-+               * the new value of the committed_data record after the
-+               * transaction, so we HAVE to force the frozen_data copy
-+               * in that case. */
-+
-+              if (jh->b_jlist != BJ_Forget || force_copy) {
-+                      JBUFFER_TRACE(jh, "generate frozen data");
-+                      if (!frozen_buffer) {
-+                              JBUFFER_TRACE(jh, "allocate memory for buffer");
-+                              spin_unlock(&journal_datalist_lock);
-+                              unlock_journal(journal);
-+                              frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
-+                                                          GFP_NOFS);
-+                              lock_journal(journal);
-+                              if (!frozen_buffer) {
-+                                      printk(KERN_EMERG __FUNCTION__
-+                                              "OOM for frozen_buffer\n");
-+                                      JBUFFER_TRACE(jh, "oom!");
-+                                      error = -ENOMEM;
-+                                      spin_lock(&journal_datalist_lock);
-+                                      goto done_locked;
-+                              }
-+                              goto repeat;
-+                      }
-+
-+                      jh->b_frozen_data = frozen_buffer;
-+                      frozen_buffer = NULL;
-+                      need_copy = 1;
-+              }
-+              jh->b_next_transaction = transaction;
-+      }
-+
-+      J_ASSERT(handle->h_buffer_credits > 0);
-+      handle->h_buffer_credits--;
-+
-+      /* Finally, if the buffer is not journaled right now, we need to
-+       * make sure it doesn't get written to disk before the caller
-+       * actually commits the new data. */
-+
-+      if (!jh->b_transaction) {
-+              JBUFFER_TRACE(jh, "no transaction");
-+              J_ASSERT_JH(jh, !jh->b_next_transaction);
-+              jh->b_transaction = transaction;
-+              JBUFFER_TRACE(jh, "file as BJ_Reserved");
-+              __journal_file_buffer(jh, transaction, BJ_Reserved);
-+      }
-+      
-+done_locked:
-+      spin_unlock(&journal_datalist_lock);
-+      if (need_copy) {
-+              struct page *page;
-+              int offset;
-+              char *source;
-+
-+              J_ASSERT_JH(jh, buffer_uptodate(jh2bh(jh)));
-+              page = jh2bh(jh)->b_page;
-+              offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
-+              source = kmap(page);
-+              memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
-+              kunmap(page);
-+      }
-+      
-+
-+      /* If we are about to journal a buffer, then any revoke pending
-+           on it is no longer valid. */
-+      journal_cancel_revoke(handle, jh);
-+
-+out_unlocked:
-+      if (frozen_buffer)
-+              kfree(frozen_buffer);
-+
-+      JBUFFER_TRACE(jh, "exit");
-+      return error;
-+}
-+
-+int journal_get_write_access (handle_t *handle, struct buffer_head *bh) 
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      struct journal_head *jh = journal_add_journal_head(bh);
-+      int rc;
-+
-+      /* We do not want to get caught playing with fields which the
-+       * log thread also manipulates.  Make sure that the buffer
-+       * completes any outstanding IO before proceeding. */
-+      lock_journal(journal);
-+      rc = do_get_write_access(handle, jh, 0);
-+      journal_unlock_journal_head(jh);
-+      unlock_journal(journal);
-+      return rc;
-+}
-+
-+
-+/*
-+ * When the user wants to journal a newly created buffer_head
-+ * (ie. getblk() returned a new buffer and we are going to populate it
-+ * manually rather than reading off disk), then we need to keep the
-+ * buffer_head locked until it has been completely filled with new
-+ * data.  In this case, we should be able to make the assertion that
-+ * the bh is not already part of an existing transaction.  
-+ * 
-+ * The buffer should already be locked by the caller by this point.
-+ * There is no lock ranking violation: it was a newly created,
-+ * unlocked buffer beforehand. */
-+
-+int journal_get_create_access (handle_t *handle, struct buffer_head *bh) 
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      struct journal_head *jh = journal_add_journal_head(bh);
-+      int err;
-+      
-+      jbd_debug(5, "journal_head %p\n", jh);
-+      lock_journal(journal);
-+      err = -EROFS;
-+      if (is_handle_aborted(handle))
-+              goto out;
-+      err = 0;
-+      
-+      JBUFFER_TRACE(jh, "entry");
-+      /* The buffer may already belong to this transaction due to
-+       * pre-zeroing in the filesystem's new_block code.  It may also
-+       * be on the previous, committing transaction's lists, but it
-+       * HAS to be in Forget state in that case: the transaction must
-+       * have deleted the buffer for it to be reused here. */
-+      J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
-+                       jh->b_transaction == NULL ||
-+                       (jh->b_transaction == journal->j_committing_transaction &&
-+                        jh->b_jlist == BJ_Forget)));
-+
-+      J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+      J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
-+
-+      J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
-+      handle->h_buffer_credits--;
-+
-+      spin_lock(&journal_datalist_lock);
-+      if (jh->b_transaction == NULL) {
-+              jh->b_transaction = transaction;
-+              JBUFFER_TRACE(jh, "file as BJ_Reserved");
-+              __journal_file_buffer(jh, transaction, BJ_Reserved);
-+              JBUFFER_TRACE(jh, "refile");
-+              refile_buffer(jh2bh(jh));
-+      } else if (jh->b_transaction == journal->j_committing_transaction) {
-+              JBUFFER_TRACE(jh, "set next transaction");
-+              jh->b_next_transaction = transaction;
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+
-+      /*
-+       * akpm: I added this.  ext3_alloc_branch can pick up new indirect
-+       * blocks which contain freed but then revoked metadata.  We need
-+       * to cancel the revoke in case we end up freeing it yet again
-+       * and the reallocating as data - this would cause a second revoke,
-+       * which hits an assertion error.
-+       */
-+      JBUFFER_TRACE(jh, "cancelling revoke");
-+      journal_cancel_revoke(handle, jh);
-+      journal_unlock_journal_head(jh);
-+out:
-+      unlock_journal(journal);
-+      return err;
-+}
-+
-+
-+
-+/*
-+ * journal_get_undo_access: Notify intent to modify metadata with non-
-+ * rewindable consequences
-+ *
-+ * Sometimes there is a need to distinguish between metadata which has
-+ * been committed to disk and that which has not.  The ext3fs code uses
-+ * this for freeing and allocating space: we have to make sure that we
-+ * do not reuse freed space until the deallocation has been committed,
-+ * since if we overwrote that space we would make the delete
-+ * un-rewindable in case of a crash.
-+ * 
-+ * To deal with that, journal_get_undo_access requests write access to a
-+ * buffer for parts of non-rewindable operations such as delete
-+ * operations on the bitmaps.  The journaling code must keep a copy of
-+ * the buffer's contents prior to the undo_access call until such time
-+ * as we know that the buffer has definitely been committed to disk.
-+ * 
-+ * We never need to know which transaction the committed data is part
-+ * of: buffers touched here are guaranteed to be dirtied later and so
-+ * will be committed to a new transaction in due course, at which point
-+ * we can discard the old committed data pointer.
-+ *
-+ * Returns error number or 0 on success.  
-+ */
-+
-+int journal_get_undo_access (handle_t *handle, struct buffer_head *bh)
-+{
-+      journal_t *journal = handle->h_transaction->t_journal;
-+      int err;
-+      struct journal_head *jh = journal_add_journal_head(bh);
-+
-+      JBUFFER_TRACE(jh, "entry");
-+      lock_journal(journal);
-+
-+      /* Do this first --- it can drop the journal lock, so we want to
-+       * make sure that obtaining the committed_data is done
-+       * atomically wrt. completion of any outstanding commits. */
-+      err = do_get_write_access (handle, jh, 1);
-+      if (err)
-+              goto out;
-+      
-+      if (!jh->b_committed_data) {
-+              /* Copy out the current buffer contents into the
-+               * preserved, committed copy. */
-+              JBUFFER_TRACE(jh, "generate b_committed data");
-+              jh->b_committed_data = jbd_kmalloc(jh2bh(jh)->b_size, 
-+                                                 GFP_NOFS);
-+              if (!jh->b_committed_data) {
-+                      printk(KERN_EMERG __FUNCTION__
-+                              ": No memory for committed data!\n");
-+                      err = -ENOMEM;
-+                      goto out;
-+              }
-+              
-+              memcpy (jh->b_committed_data, jh2bh(jh)->b_data,
-+                              jh2bh(jh)->b_size);
-+      }
-+
-+out:
-+      if (!err)
-+              J_ASSERT_JH(jh, jh->b_committed_data);
-+      journal_unlock_journal_head(jh);
-+      unlock_journal(journal);
-+      return err;
-+}
-+
-+/* 
-+ * journal_dirty_data: mark a buffer as containing dirty data which
-+ * needs to be flushed before we can commit the current transaction.  
-+ *
-+ * The buffer is placed on the transaction's data list and is marked as
-+ * belonging to the transaction.
-+ *
-+ * If `async' is set then the writebask will be initiated by the caller
-+ * using submit_bh -> end_buffer_io_async.  We put the buffer onto
-+ * t_async_datalist.
-+ * 
-+ * Returns error number or 0 on success.  
-+ *
-+ * journal_dirty_data() can be called via page_launder->ext3_writepage
-+ * by kswapd.  So it cannot block.  Happily, there's nothing here
-+ * which needs lock_journal if `async' is set.
-+ *
-+ * When the buffer is on the current transaction we freely move it
-+ * between BJ_AsyncData and BJ_SyncData according to who tried to
-+ * change its state last.
-+ */
-+
-+int journal_dirty_data (handle_t *handle, struct buffer_head *bh, int async)
-+{
-+      journal_t *journal = handle->h_transaction->t_journal;
-+      int need_brelse = 0;
-+      int wanted_jlist = async ? BJ_AsyncData : BJ_SyncData;
-+      struct journal_head *jh;
-+
-+      if (is_handle_aborted(handle))
-+              return 0;
-+      
-+      jh = journal_add_journal_head(bh);
-+      JBUFFER_TRACE(jh, "entry");
-+
-+      /*
-+       * The buffer could *already* be dirty.  Writeout can start
-+       * at any time.
-+       */
-+      jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
-+
-+      /*
-+       * What if the buffer is already part of a running transaction?
-+       * 
-+       * There are two cases:
-+       * 1) It is part of the current running transaction.  Refile it,
-+       *    just in case we have allocated it as metadata, deallocated
-+       *    it, then reallocated it as data. 
-+       * 2) It is part of the previous, still-committing transaction.
-+       *    If all we want to do is to guarantee that the buffer will be
-+       *    written to disk before this new transaction commits, then
-+       *    being sure that the *previous* transaction has this same 
-+       *    property is sufficient for us!  Just leave it on its old
-+       *    transaction.
-+       *
-+       * In case (2), the buffer must not already exist as metadata
-+       * --- that would violate write ordering (a transaction is free
-+       * to write its data at any point, even before the previous
-+       * committing transaction has committed).  The caller must
-+       * never, ever allow this to happen: there's nothing we can do
-+       * about it in this layer.
-+       */
-+      spin_lock(&journal_datalist_lock);
-+      if (jh->b_transaction) {
-+              JBUFFER_TRACE(jh, "has transaction");
-+              if (jh->b_transaction != handle->h_transaction) {
-+                      JBUFFER_TRACE(jh, "belongs to older transaction");
-+                      J_ASSERT_JH(jh, jh->b_transaction ==
-+                                      journal->j_committing_transaction);
-+
-+                      /* @@@ IS THIS TRUE  ? */
-+                      /*
-+                       * Not any more.  Scenario: someone does a write()
-+                       * in data=journal mode.  The buffer's transaction has
-+                       * moved into commit.  Then someone does another
-+                       * write() to the file.  We do the frozen data copyout
-+                       * and set b_next_transaction to point to j_running_t.
-+                       * And while we're in that state, someone does a
-+                       * writepage() in an attempt to pageout the same area
-+                       * of the file via a shared mapping.  At present that
-+                       * calls journal_dirty_data(), and we get right here.
-+                       * It may be too late to journal the data.  Simply
-+                       * falling through to the next test will suffice: the
-+                       * data will be dirty and wil be checkpointed.  The
-+                       * ordering comments in the next comment block still
-+                       * apply.
-+                       */
-+                      //J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-+
-+                      /*
-+                       * If we're journalling data, and this buffer was
-+                       * subject to a write(), it could be metadata, forget
-+                       * or shadow against the committing transaction.  Now,
-+                       * someone has dirtied the same darn page via a mapping
-+                       * and it is being writepage()'d.
-+                       * We *could* just steal the page from commit, with some
-+                       * fancy locking there.  Instead, we just skip it -
-+                       * don't tie the page's buffers to the new transaction
-+                       * at all.
-+                       * Implication: if we crash before the writepage() data
-+                       * is written into the filesystem, recovery will replay
-+                       * the write() data.
-+                       */
-+                      if (jh->b_jlist != BJ_None &&
-+                                      jh->b_jlist != BJ_SyncData &&
-+                                      jh->b_jlist != BJ_AsyncData) {
-+                              JBUFFER_TRACE(jh, "Not stealing");
-+                              goto no_journal;
-+                      }
-+
-+                      /*
-+                       * This buffer may be undergoing writeout in commit.  We
-+                       * can't return from here and let the caller dirty it
-+                       * again because that can cause the write-out loop in
-+                       * commit to never terminate.
-+                       */
-+                      if (!async && buffer_dirty(bh)) {
-+                              atomic_inc(&bh->b_count);
-+                              spin_unlock(&journal_datalist_lock);
-+                              need_brelse = 1;
-+                              ll_rw_block(WRITE, 1, &bh);
-+                              wait_on_buffer(bh);
-+                              spin_lock(&journal_datalist_lock);
-+                              /* The buffer may become locked again at any
-+                                 time if it is redirtied */
-+                      }
-+
-+                      /* journal_clean_data_list() may have got there first */
-+                      if (jh->b_transaction != NULL) {
-+                              JBUFFER_TRACE(jh, "unfile from commit");
-+                              __journal_unfile_buffer(jh);
-+                              jh->b_transaction = NULL;
-+                      }
-+                      /* The buffer will be refiled below */
-+
-+              }
-+              /*
-+               * Special case --- the buffer might actually have been
-+               * allocated and then immediately deallocated in the previous,
-+               * committing transaction, so might still be left on that
-+               * transaction's metadata lists.
-+               */
-+              if (jh->b_jlist != wanted_jlist) {
-+                      JBUFFER_TRACE(jh, "not on correct data list: unfile");
-+                      J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-+                      __journal_unfile_buffer(jh);
-+                      jh->b_transaction = NULL;
-+                      JBUFFER_TRACE(jh, "file as data");
-+                      __journal_file_buffer(jh, handle->h_transaction,
-+                                              wanted_jlist);
-+              }
-+      } else {
-+              JBUFFER_TRACE(jh, "not on a transaction");
-+              __journal_file_buffer(jh, handle->h_transaction, wanted_jlist);
-+      }
-+no_journal:
-+      spin_unlock(&journal_datalist_lock);
-+      if (need_brelse) {
-+              BUFFER_TRACE(bh, "brelse");
-+              __brelse(bh);
-+      }
-+      JBUFFER_TRACE(jh, "exit");
-+      journal_unlock_journal_head(jh);
-+      return 0;
-+}
-+
-+/* 
-+ * journal_dirty_metadata: mark a buffer as containing dirty metadata
-+ * which needs to be journaled as part of the current transaction.
-+ *
-+ * The buffer is placed on the transaction's metadata list and is marked
-+ * as belonging to the transaction.  
-+ *
-+ * Special care needs to be taken if the buffer already belongs to the
-+ * current committing transaction (in which case we should have frozen
-+ * data present for that commit).  In that case, we don't relink the
-+ * buffer: that only gets done when the old transaction finally
-+ * completes its commit.
-+ * 
-+ * Returns error number or 0 on success.  
-+ */
-+
-+int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      struct journal_head *jh = bh2jh(bh);
-+
-+      jbd_debug(5, "journal_head %p\n", jh);
-+      JBUFFER_TRACE(jh, "entry");
-+      lock_journal(journal);
-+      if (is_handle_aborted(handle))
-+              goto out_unlock;
-+      
-+      spin_lock(&journal_datalist_lock);
-+      set_bit(BH_JBDDirty, &bh->b_state);
-+      set_buffer_flushtime(bh);
-+
-+      J_ASSERT_JH(jh, jh->b_transaction != NULL);
-+      
-+      /* 
-+       * Metadata already on the current transaction list doesn't
-+       * need to be filed.  Metadata on another transaction's list must
-+       * be committing, and will be refiled once the commit completes:
-+       * leave it alone for now. 
-+       */
-+
-+      if (jh->b_transaction != transaction) {
-+              JBUFFER_TRACE(jh, "already on other transaction");
-+              J_ASSERT_JH(jh, jh->b_transaction ==
-+                                      journal->j_committing_transaction);
-+              J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
-+              /* And this case is illegal: we can't reuse another
-+               * transaction's data buffer, ever. */
-+              /* FIXME: writepage() should be journalled */
-+              J_ASSERT_JH(jh, jh->b_jlist != BJ_SyncData);
-+              goto done_locked;
-+      }
-+
-+      /* That test should have eliminated the following case: */
-+      J_ASSERT_JH(jh, jh->b_frozen_data == 0);
-+
-+      JBUFFER_TRACE(jh, "file as BJ_Metadata");
-+      __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
-+
-+done_locked:
-+      spin_unlock(&journal_datalist_lock);
-+      JBUFFER_TRACE(jh, "exit");
-+out_unlock:
-+      unlock_journal(journal);
-+      return 0;
-+}
-+
-+#if 0
-+/* 
-+ * journal_release_buffer: undo a get_write_access without any buffer
-+ * updates, if the update decided in the end that it didn't need access.
-+ *
-+ * journal_get_write_access() can block, so it is quite possible for a
-+ * journaling component to decide after the write access is returned
-+ * that global state has changed and the update is no longer required.  */
-+
-+void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      struct journal_head *jh = bh2jh(bh);
-+
-+      lock_journal(journal);
-+      JBUFFER_TRACE(jh, "entry");
-+
-+      /* If the buffer is reserved but not modified by this
-+       * transaction, then it is safe to release it.  In all other
-+       * cases, just leave the buffer as it is. */
-+
-+      spin_lock(&journal_datalist_lock);
-+      if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction &&
-+          !buffer_jdirty(jh2bh(jh))) {
-+              JBUFFER_TRACE(jh, "unused: refiling it");
-+              handle->h_buffer_credits++;
-+              __journal_refile_buffer(jh);
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+
-+      JBUFFER_TRACE(jh, "exit");
-+      unlock_journal(journal);
-+}
-+#endif
-+
-+/* 
-+ * journal_forget: bforget() for potentially-journaled buffers.  We can
-+ * only do the bforget if there are no commits pending against the
-+ * buffer.  If the buffer is dirty in the current running transaction we
-+ * can safely unlink it. 
-+ *
-+ * bh may not be a journalled buffer at all - it may be a non-JBD
-+ * buffer which came off the hashtable.  Check for this.
-+ *
-+ * Decrements bh->b_count by one.
-+ * 
-+ * Allow this call even if the handle has aborted --- it may be part of
-+ * the caller's cleanup after an abort.
-+ */
-+
-+void journal_forget (handle_t *handle, struct buffer_head *bh)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      struct journal_head *jh;
-+
-+      BUFFER_TRACE(bh, "entry");
-+
-+      lock_journal(journal);
-+      spin_lock(&journal_datalist_lock);
-+
-+      if (!buffer_jbd(bh))
-+              goto not_jbd;
-+      jh = bh2jh(bh);
-+
-+      if (jh->b_transaction == handle->h_transaction) {
-+              J_ASSERT_JH(jh, !jh->b_frozen_data);
-+
-+              /* If we are forgetting a buffer which is already part
-+               * of this transaction, then we can just drop it from
-+               * the transaction immediately. */
-+              clear_bit(BH_Dirty, &bh->b_state);
-+              clear_bit(BH_JBDDirty, &bh->b_state);
-+
-+              JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
-+              J_ASSERT_JH(jh, !jh->b_committed_data);
-+
-+              __journal_unfile_buffer(jh);
-+              jh->b_transaction = 0;
-+
-+              /* 
-+               * We are no longer going to journal this buffer.
-+               * However, the commit of this transaction is still
-+               * important to the buffer: the delete that we are now
-+               * processing might obsolete an old log entry, so by
-+               * committing, we can satisfy the buffer's checkpoint.
-+               *
-+               * So, if we have a checkpoint on the buffer, we should
-+               * now refile the buffer on our BJ_Forget list so that
-+               * we know to remove the checkpoint after we commit. 
-+               */
-+
-+              if (jh->b_cp_transaction) {
-+                      __journal_file_buffer(jh, transaction, BJ_Forget);
-+              } else {
-+                      __journal_remove_journal_head(bh);
-+                      __brelse(bh);
-+                      if (!buffer_jbd(bh)) {
-+                              spin_unlock(&journal_datalist_lock);
-+                              unlock_journal(journal);
-+                              __bforget(bh);
-+                              return;
-+                      }
-+              }
-+              
-+      } else if (jh->b_transaction) {
-+              J_ASSERT_JH(jh, (jh->b_transaction == 
-+                               journal->j_committing_transaction));
-+              /* However, if the buffer is still owned by a prior
-+               * (committing) transaction, we can't drop it yet... */
-+              JBUFFER_TRACE(jh, "belongs to older transaction");
-+              /* ... but we CAN drop it from the new transaction if we
-+               * have also modified it since the original commit. */
-+
-+              if (jh->b_next_transaction) {
-+                      J_ASSERT(jh->b_next_transaction == transaction);
-+                      jh->b_next_transaction = NULL;
-+              }
-+      }
-+
-+not_jbd:
-+      spin_unlock(&journal_datalist_lock);
-+      unlock_journal(journal);
-+      __brelse(bh);
-+      return;
-+}
-+
-+#if 0 /* Unused */
-+/*
-+ * journal_sync_buffer: flush a potentially-journaled buffer to disk.
-+ *
-+ * Used for O_SYNC filesystem operations.  If the buffer is journaled,
-+ * we need to complete the O_SYNC by waiting for the transaction to
-+ * complete.  It is an error to call journal_sync_buffer before
-+ * journal_stop!
-+ */
-+
-+void journal_sync_buffer(struct buffer_head *bh)
-+{
-+      transaction_t *transaction;
-+      journal_t *journal;
-+      long sequence;
-+      struct journal_head *jh;
-+
-+      /* If the buffer isn't journaled, this is easy: just sync it to
-+       * disk.  */
-+      BUFFER_TRACE(bh, "entry");
-+
-+      spin_lock(&journal_datalist_lock);
-+      if (!buffer_jbd(bh)) {
-+              spin_unlock(&journal_datalist_lock);
-+              return;
-+      }
-+      jh = bh2jh(bh);
-+      if (jh->b_transaction == NULL) {
-+              /* If the buffer has already been journaled, then this
-+               * is a noop. */
-+              if (jh->b_cp_transaction == NULL) {
-+                      spin_unlock(&journal_datalist_lock);
-+                      return;
-+              }
-+              atomic_inc(&bh->b_count);
-+              spin_unlock(&journal_datalist_lock);
-+              ll_rw_block (WRITE, 1, &bh);
-+              wait_on_buffer(bh);
-+              __brelse(bh);
-+              goto out;
-+      }
-+      
-+      /* Otherwise, just wait until the transaction is synced to disk. */
-+      transaction = jh->b_transaction;
-+      journal = transaction->t_journal;
-+      sequence = transaction->t_tid;
-+      spin_unlock(&journal_datalist_lock);
-+
-+      jbd_debug(2, "requesting commit for jh %p\n", jh);
-+      log_start_commit (journal, transaction);
-+      
-+      while (tid_gt(sequence, journal->j_commit_sequence)) {
-+              wake_up(&journal->j_wait_done_commit);
-+              sleep_on(&journal->j_wait_done_commit);
-+      }
-+      JBUFFER_TRACE(jh, "exit");
-+out:
-+      return;
-+}
-+#endif
-+
-+/*
-+ * All done for a particular handle.
-+ *
-+ * There is not much action needed here.  We just return any remaining
-+ * buffer credits to the transaction and remove the handle.  The only
-+ * complication is that we need to start a commit operation if the
-+ * filesystem is marked for synchronous update.
-+ *
-+ * journal_stop itself will not usually return an error, but it may
-+ * do so in unusual circumstances.  In particular, expect it to 
-+ * return -EIO if a journal_abort has been executed since the
-+ * transaction began.
-+ */
-+
-+int journal_stop(handle_t *handle)
-+{
-+      transaction_t *transaction = handle->h_transaction;
-+      journal_t *journal = transaction->t_journal;
-+      int old_handle_count, err;
-+      
-+      if (!handle)
-+              return 0;
-+
-+      J_ASSERT (transaction->t_updates > 0);
-+      J_ASSERT (journal_current_handle() == handle);
-+      
-+      if (is_handle_aborted(handle))
-+              err = -EIO;
-+      else
-+              err = 0;
-+      
-+      if (--handle->h_ref > 0) {
-+              jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
-+                        handle->h_ref);
-+              return err;
-+      }
-+
-+      jbd_debug(4, "Handle %p going down\n", handle);
-+
-+      /*
-+       * Implement synchronous transaction batching.  If the handle
-+       * was synchronous, don't force a commit immediately.  Let's
-+       * yield and let another thread piggyback onto this transaction.
-+       * Keep doing that while new threads continue to arrive.
-+       * It doesn't cost much - we're about to run a commit and sleep
-+       * on IO anyway.  Speeds up many-threaded, many-dir operations
-+       * by 30x or more...
-+       */
-+      if (handle->h_sync) {
-+              do {
-+                      old_handle_count = transaction->t_handle_count;
-+                      set_current_state(TASK_RUNNING);
-+                      current->policy |= SCHED_YIELD;
-+                      schedule();
-+              } while (old_handle_count != transaction->t_handle_count);
-+      }
-+
-+      current->journal_info = NULL;
-+      transaction->t_outstanding_credits -= handle->h_buffer_credits;
-+      transaction->t_updates--;
-+      if (!transaction->t_updates) {
-+              wake_up(&journal->j_wait_updates);
-+              if (journal->j_barrier_count)
-+                      wake_up(&journal->j_wait_transaction_locked);
-+      }
-+
-+      /* 
-+       * If the handle is marked SYNC, we need to set another commit
-+       * going!  We also want to force a commit if the current
-+       * transaction is occupying too much of the log, or if the
-+       * transaction is too old now.
-+       */
-+      if (handle->h_sync ||
-+                      transaction->t_outstanding_credits >
-+                              journal->j_max_transaction_buffers ||
-+                      time_after_eq(jiffies, transaction->t_expires)) {
-+              /* Do this even for aborted journals: an abort still
-+               * completes the commit thread, it just doesn't write
-+               * anything to disk. */
-+              tid_t tid = transaction->t_tid;
-+              
-+              jbd_debug(2, "transaction too old, requesting commit for "
-+                                      "handle %p\n", handle);
-+              /* This is non-blocking */
-+              log_start_commit(journal, transaction);
-+              
-+              /*
-+               * Special case: JFS_SYNC synchronous updates require us
-+               * to wait for the commit to complete.  
-+               */
-+              if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-+                      log_wait_commit(journal, tid);
-+      }
-+      kfree(handle);
-+      return err;
-+}
-+
-+/*
-+ * For synchronous operations: force any uncommitted trasnactions
-+ * to disk.  May seem kludgy, but it reuses all the handle batching
-+ * code in a very simple manner.
-+ */
-+int journal_force_commit(journal_t *journal)
-+{
-+      handle_t *handle;
-+      int ret = 0;
-+
-+      lock_kernel();
-+      handle = journal_start(journal, 1);
-+      if (IS_ERR(handle)) {
-+              ret = PTR_ERR(handle);
-+              goto out;
-+      }
-+      handle->h_sync = 1;
-+      journal_stop(handle);
-+out:
-+      unlock_kernel();
-+      return ret;
-+}
-+
-+/*
-+ *
-+ * List management code snippets: various functions for manipulating the
-+ * transaction buffer lists.
-+ *
-+ */
-+
-+/*
-+ * Append a buffer to a transaction list, given the transaction's list head
-+ * pointer.
-+ * journal_datalist_lock is held.
-+ */
-+
-+static inline void 
-+__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
-+{
-+      if (!*list) {
-+              jh->b_tnext = jh->b_tprev = jh;
-+              *list = jh;
-+      } else {
-+              /* Insert at the tail of the list to preserve order */
-+              struct journal_head *first = *list, *last = first->b_tprev;
-+              jh->b_tprev = last;
-+              jh->b_tnext = first;
-+              last->b_tnext = first->b_tprev = jh;
-+      }
-+}
-+
-+/* 
-+ * Remove a buffer from a transaction list, given the transaction's list
-+ * head pointer.
-+ *
-+ * Called with journal_datalist_lock held, and the journal may not
-+ * be locked.
-+ */
-+
-+static inline void
-+__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
-+{
-+      if (*list == jh) {
-+              *list = jh->b_tnext;
-+              if (*list == jh)
-+                      *list = 0;
-+      }
-+      jh->b_tprev->b_tnext = jh->b_tnext;
-+      jh->b_tnext->b_tprev = jh->b_tprev;
-+}
-+
-+/* 
-+ * Remove a buffer from the appropriate transaction list.
-+ *
-+ * Note that this function can *change* the value of
-+ * bh->b_transaction->t_sync_datalist, t_async_datalist, t_buffers, t_forget,
-+ * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
-+ * is holding onto a copy of one of thee pointers, it could go bad.
-+ * Generally the caller needs to re-read the pointer from the transaction_t.
-+ *
-+ * If bh->b_jlist is BJ_SyncData or BJ_AsyncData then we may have been called
-+ * via journal_try_to_free_buffer() or journal_clean_data_list().  In that
-+ * case, journal_datalist_lock will be held, and the journal may not be locked.
-+ */
-+void __journal_unfile_buffer(struct journal_head *jh)
-+{
-+      struct journal_head **list = 0;
-+      transaction_t * transaction;
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+      transaction = jh->b_transaction;
-+
-+#ifdef __SMP__
-+      J_ASSERT (current->lock_depth >= 0);
-+#endif
-+      J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
-+
-+      if (jh->b_jlist != BJ_None)
-+              J_ASSERT_JH(jh, transaction != 0);
-+
-+      switch (jh->b_jlist) {
-+      case BJ_None:
-+              return;
-+      case BJ_SyncData:
-+              list = &transaction->t_sync_datalist;
-+              break;
-+      case BJ_AsyncData:
-+              list = &transaction->t_async_datalist;
-+              break;
-+      case BJ_Metadata:
-+              transaction->t_nr_buffers--;
-+              J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
-+              list = &transaction->t_buffers;
-+              break;
-+      case BJ_Forget:
-+              list = &transaction->t_forget;
-+              break;
-+      case BJ_IO:
-+              list = &transaction->t_iobuf_list;
-+              break;
-+      case BJ_Shadow:
-+              list = &transaction->t_shadow_list;
-+              break;
-+      case BJ_LogCtl:
-+              list = &transaction->t_log_list;
-+              break;
-+      case BJ_Reserved:
-+              list = &transaction->t_reserved_list;
-+              break;
-+      }
-+      
-+      __blist_del_buffer(list, jh);
-+      jh->b_jlist = BJ_None;
-+      if (test_and_clear_bit(BH_JBDDirty, &jh2bh(jh)->b_state)) {
-+              set_bit(BH_Dirty, &jh2bh(jh)->b_state);
-+      }
-+}
-+
-+void journal_unfile_buffer(struct journal_head *jh)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      __journal_unfile_buffer(jh);
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+/*
-+ * Called from journal_try_to_free_buffers().  The journal is not
-+ * locked. lru_list_lock is not held.
-+ *
-+ * Here we see why journal_datalist_lock is global and not per-journal.
-+ * We cannot get back to this buffer's journal pointer without locking
-+ * out journal_clean_data_list() in some manner.
-+ *
-+ * One could use journal_datalist_lock to get unracy access to a
-+ * per-journal lock.
-+ *
-+ * Called with journal_datalist_lock held.
-+ *
-+ * Returns non-zero iff we were able to free the journal_head.
-+ */
-+static int __journal_try_to_free_buffer(struct buffer_head *bh,
-+                                      int *locked_or_dirty)
-+{
-+      struct journal_head *jh;
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+
-+      jh = bh2jh(bh);
-+
-+      if (buffer_locked(bh) || buffer_dirty(bh)) {
-+              *locked_or_dirty = 1;
-+              goto out;
-+      }
-+
-+      if (!buffer_uptodate(bh))
-+              goto out;
-+
-+      if (jh->b_next_transaction != 0)
-+              goto out;
-+
-+      if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
-+              if (jh->b_jlist == BJ_SyncData || jh->b_jlist==BJ_AsyncData) {
-+                      /* A written-back ordered data buffer */
-+                      JBUFFER_TRACE(jh, "release data");
-+                      __journal_unfile_buffer(jh);
-+                      jh->b_transaction = 0;
-+                      __journal_remove_journal_head(bh);
-+                      __brelse(bh);
-+              }
-+      }
-+      else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
-+              /* written-back checkpointed metadata buffer */
-+              if (jh->b_jlist == BJ_None) {
-+                      JBUFFER_TRACE(jh, "remove from checkpoint list");
-+                      __journal_remove_checkpoint(jh);
-+                      __journal_remove_journal_head(bh);
-+                      __brelse(bh);
-+              }
-+      }
-+      return !buffer_jbd(bh);
-+
-+out:
-+      return 0;
-+}
-+
-+/*
-+ * journal_try_to_free_buffers().  For all the buffers on this page,
-+ * if they are fully written out ordered data, move them onto BUF_CLEAN
-+ * so try_to_free_buffers() can reap them.  Called with lru_list_lock
-+ * not held.  Does its own locking.
-+ *
-+ * This complicates JBD locking somewhat.  We aren't protected by the
-+ * BKL here.  We wish to remove the buffer from its committing or
-+ * running transaction's ->t_datalist via __journal_unfile_buffer.
-+ *
-+ * This may *change* the value of transaction_t->t_datalist, so anyone
-+ * who looks at t_datalist needs to lock against this function.
-+ *
-+ * Even worse, someone may be doing a journal_dirty_data on this
-+ * buffer.  So we need to lock against that.  journal_dirty_data()
-+ * will come out of the lock with the buffer dirty, which makes it
-+ * ineligible for release here.
-+ *
-+ * Who else is affected by this?  hmm...  Really the only contender
-+ * is do_get_write_access() - it could be looking at the buffer while
-+ * journal_try_to_free_buffer() is changing its state.  But that
-+ * cannot happen because we never reallocate freed data as metadata
-+ * while the data is part of a transaction.  Yes?
-+ *
-+ * This function returns non-zero if we wish try_to_free_buffers()
-+ * to be called. We do this is the page is releasable by try_to_free_buffers().
-+ * We also do it if the page has locked or dirty buffers and the caller wants
-+ * us to perform sync or async writeout.
-+ */
-+int journal_try_to_free_buffers(journal_t *journal, 
-+                              struct page *page, int gfp_mask)
-+{
-+      struct buffer_head *bh;
-+      struct buffer_head *tmp;
-+      int locked_or_dirty = 0;
-+      int call_ttfb = 1;
-+
-+      J_ASSERT(PageLocked(page));
-+
-+      bh = page->buffers;
-+      tmp = bh;
-+      spin_lock(&journal_datalist_lock);
-+      do {
-+              struct buffer_head *p = tmp;
-+
-+              tmp = tmp->b_this_page;
-+              if (buffer_jbd(p))
-+                      if (!__journal_try_to_free_buffer(p, &locked_or_dirty))
-+                              call_ttfb = 0;
-+      } while (tmp != bh);
-+      spin_unlock(&journal_datalist_lock);
-+
-+      if (!(gfp_mask & (__GFP_IO|__GFP_WAIT)))
-+              goto out;
-+      if (!locked_or_dirty)
-+              goto out;
-+      /*
-+       * The VM wants us to do writeout, or to block on IO, or both.
-+       * So we allow try_to_free_buffers to be called even if the page
-+       * still has journalled buffers.
-+       */
-+      call_ttfb = 1;
-+out:
-+      return call_ttfb;
-+}
-+
-+/*
-+ * This buffer is no longer needed.  If it is on an older transaction's
-+ * checkpoint list we need to record it on this transaction's forget list
-+ * to pin this buffer (and hence its checkpointing transaction) down until
-+ * this transaction commits.  If the buffer isn't on a checkpoint list, we
-+ * release it.
-+ * Returns non-zero if JBD no longer has an interest in the buffer.
-+ */
-+static int dispose_buffer(struct journal_head *jh,
-+              transaction_t *transaction)
-+{
-+      int may_free = 1;
-+      struct buffer_head *bh = jh2bh(jh);
-+
-+      spin_lock(&journal_datalist_lock);
-+      __journal_unfile_buffer(jh);
-+      jh->b_transaction = 0;
-+
-+      if (jh->b_cp_transaction) {
-+              JBUFFER_TRACE(jh, "on running+cp transaction");
-+              __journal_file_buffer(jh, transaction, BJ_Forget);
-+              clear_bit(BH_JBDDirty, &bh->b_state);
-+              may_free = 0;
-+      } else {
-+              JBUFFER_TRACE(jh, "on running transaction");
-+              __journal_remove_journal_head(bh);
-+              __brelse(bh);
-+      }
-+      spin_unlock(&journal_datalist_lock);
-+      return may_free;
-+}
-+
-+/*
-+ * journal_flushpage 
-+ *
-+ * This code is tricky.  It has a number of cases to deal with.
-+ *
-+ * There are two invariants which this code relies on:
-+ *
-+ * i_size must be updated on disk before we start calling flushpage on the
-+ * data.
-+ * 
-+ *  This is done in ext3 by defining an ext3_setattr method which
-+ *  updates i_size before truncate gets going.  By maintaining this
-+ *  invariant, we can be sure that it is safe to throw away any buffers
-+ *  attached to the current transaction: once the transaction commits,
-+ *  we know that the data will not be needed.
-+ * 
-+ *  Note however that we can *not* throw away data belonging to the
-+ *  previous, committing transaction!  
-+ *
-+ * Any disk blocks which *are* part of the previous, committing
-+ * transaction (and which therefore cannot be discarded immediately) are
-+ * not going to be reused in the new running transaction
-+ *
-+ *  The bitmap committed_data images guarantee this: any block which is
-+ *  allocated in one transaction and removed in the next will be marked
-+ *  as in-use in the committed_data bitmap, so cannot be reused until
-+ *  the next transaction to delete the block commits.  This means that
-+ *  leaving committing buffers dirty is quite safe: the disk blocks
-+ *  cannot be reallocated to a different file and so buffer aliasing is
-+ *  not possible.
-+ *
-+ *
-+ * The above applies mainly to ordered data mode.  In writeback mode we
-+ * don't make guarantees about the order in which data hits disk --- in
-+ * particular we don't guarantee that new dirty data is flushed before
-+ * transaction commit --- so it is always safe just to discard data
-+ * immediately in that mode.  --sct 
-+ */
-+
-+/*
-+ * The journal_unmap_buffer helper function returns zero if the buffer
-+ * concerned remains pinned as an anonymous buffer belonging to an older
-+ * transaction.
-+ *
-+ * We're outside-transaction here.  Either or both of j_running_transaction
-+ * and j_committing_transaction may be NULL.
-+ */
-+static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
-+{
-+      transaction_t *transaction;
-+      struct journal_head *jh;
-+      int may_free = 1;
-+
-+      BUFFER_TRACE(bh, "entry");
-+
-+      if (!buffer_mapped(bh))
-+              return 1;
-+
-+      /* It is safe to proceed here without the
-+       * journal_datalist_spinlock because the buffers cannot be
-+       * stolen by try_to_free_buffers as long as we are holding the
-+       * page lock. --sct */
-+
-+      if (!buffer_jbd(bh))
-+              goto zap_buffer;
-+
-+      jh = bh2jh(bh);
-+      transaction = jh->b_transaction;
-+      if (transaction == NULL) {
-+              /* First case: not on any transaction.  If it
-+               * has no checkpoint link, then we can zap it:
-+               * it's a writeback-mode buffer so we don't care
-+               * if it hits disk safely. */
-+              if (!jh->b_cp_transaction) {
-+                      JBUFFER_TRACE(jh, "not on any transaction: zap");
-+                      goto zap_buffer;
-+              }
-+              
-+              if (!buffer_dirty(bh)) {
-+                      /* bdflush has written it.  We can drop it now */
-+                      goto zap_buffer;
-+              }
-+
-+              /* OK, it must be in the journal but still not
-+               * written fully to disk: it's metadata or
-+               * journaled data... */
-+
-+              if (journal->j_running_transaction) {
-+                      /* ... and once the current transaction has
-+                       * committed, the buffer won't be needed any
-+                       * longer. */
-+                      JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
-+                      return dispose_buffer(jh,
-+                                      journal->j_running_transaction);
-+              } else {
-+                      /* There is no currently-running transaction. So the
-+                       * orphan record which we wrote for this file must have
-+                       * passed into commit.  We must attach this buffer to
-+                       * the committing transaction, if it exists. */
-+                      if (journal->j_committing_transaction) {
-+                              JBUFFER_TRACE(jh, "give to committing trans");
-+                              return dispose_buffer(jh,
-+                                      journal->j_committing_transaction);
-+                      } else {
-+                              /* The orphan record's transaction has
-+                               * committed.  We can cleanse this buffer */
-+                              clear_bit(BH_JBDDirty, &bh->b_state);
-+                              goto zap_buffer;
-+                      }
-+              }
-+      } else if (transaction == journal->j_committing_transaction) {
-+              /* If it is committing, we simply cannot touch it.  We
-+               * can remove it's next_transaction pointer from the
-+               * running transaction if that is set, but nothing
-+               * else. */
-+              JBUFFER_TRACE(jh, "on committing transaction");
-+              if (jh->b_next_transaction) {
-+                      J_ASSERT(jh->b_next_transaction ==
-+                                      journal->j_running_transaction);
-+                      jh->b_next_transaction = NULL;
-+              }
-+              return 0;
-+      } else {
-+              /* Good, the buffer belongs to the running transaction.
-+               * We are writing our own transaction's data, not any
-+               * previous one's, so it is safe to throw it away
-+               * (remember that we expect the filesystem to have set
-+               * i_size already for this truncate so recovery will not
-+               * expose the disk blocks we are discarding here.) */
-+              J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
-+              may_free = dispose_buffer(jh, transaction);
-+      }
-+
-+zap_buffer:   
-+      if (buffer_dirty(bh))
-+              mark_buffer_clean(bh);
-+      J_ASSERT_BH(bh, !buffer_jdirty(bh));
-+      clear_bit(BH_Uptodate, &bh->b_state);
-+      clear_bit(BH_Mapped, &bh->b_state);
-+      clear_bit(BH_Req, &bh->b_state);
-+      clear_bit(BH_New, &bh->b_state);
-+      return may_free;
-+}
-+
-+/*
-+ * Return non-zero if the page's buffers were successfully reaped
-+ */
-+int journal_flushpage(journal_t *journal, 
-+                    struct page *page, 
-+                    unsigned long offset)
-+{
-+      struct buffer_head *head, *bh, *next;
-+      unsigned int curr_off = 0;
-+      int may_free = 1;
-+              
-+      if (!PageLocked(page))
-+              BUG();
-+      if (!page->buffers)
-+              return 1;
-+
-+      /* We will potentially be playing with lists other than just the
-+       * data lists (especially for journaled data mode), so be
-+       * cautious in our locking. */
-+      lock_journal(journal);
-+
-+      head = bh = page->buffers;
-+      do {
-+              unsigned int next_off = curr_off + bh->b_size;
-+              next = bh->b_this_page;
-+
-+              /* AKPM: doing lock_buffer here may be overly paranoid */
-+              if (offset <= curr_off) {
-+                      /* This block is wholly outside the truncation point */
-+                      lock_buffer(bh);
-+                      may_free &= journal_unmap_buffer(journal, bh);
-+                      unlock_buffer(bh);
-+              }
-+              curr_off = next_off;
-+              bh = next;
-+
-+      } while (bh != head);
-+
-+      unlock_journal(journal);
-+
-+      if (!offset) {
-+              if (!may_free || !try_to_free_buffers(page, 0))
-+                      return 0;
-+              J_ASSERT(page->buffers == NULL);
-+      }
-+      return 1;
-+}
-+
-+/* 
-+ * File a buffer on the given transaction list. 
-+ */
-+void __journal_file_buffer(struct journal_head *jh,
-+                      transaction_t *transaction, int jlist)
-+{
-+      struct journal_head **list = 0;
-+
-+      assert_spin_locked(&journal_datalist_lock);
-+      
-+#ifdef __SMP__
-+      J_ASSERT (current->lock_depth >= 0);
-+#endif
-+      J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
-+      J_ASSERT_JH(jh, jh->b_transaction == transaction ||
-+                              jh->b_transaction == 0);
-+
-+      if (jh->b_transaction) {
-+              if (jh->b_jlist == jlist)
-+                      return;
-+              __journal_unfile_buffer(jh);
-+      } else {
-+              jh->b_transaction = transaction;
-+      }
-+
-+      switch (jlist) {
-+      case BJ_None:
-+              J_ASSERT_JH(jh, !jh->b_committed_data);
-+              J_ASSERT_JH(jh, !jh->b_frozen_data);
-+              return;
-+      case BJ_SyncData:
-+              list = &transaction->t_sync_datalist;
-+              break;
-+      case BJ_AsyncData:
-+              list = &transaction->t_async_datalist;
-+              break;
-+      case BJ_Metadata:
-+              transaction->t_nr_buffers++;
-+              list = &transaction->t_buffers;
-+              break;
-+      case BJ_Forget:
-+              list = &transaction->t_forget;
-+              break;
-+      case BJ_IO:
-+              list = &transaction->t_iobuf_list;
-+              break;
-+      case BJ_Shadow:
-+              list = &transaction->t_shadow_list;
-+              break;
-+      case BJ_LogCtl:
-+              list = &transaction->t_log_list;
-+              break;
-+      case BJ_Reserved:
-+              list = &transaction->t_reserved_list;
-+              break;
-+      }
-+
-+      __blist_add_buffer(list, jh);
-+      jh->b_jlist = jlist;
-+
-+      if (jlist == BJ_Metadata || jlist == BJ_Reserved || 
-+          jlist == BJ_Shadow || jlist == BJ_Forget) {
-+              if (atomic_set_buffer_clean(jh2bh(jh))) {
-+                      set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
-+              }
-+      }
-+}
-+
-+void journal_file_buffer(struct journal_head *jh,
-+                              transaction_t *transaction, int jlist)
-+{
-+      spin_lock(&journal_datalist_lock);
-+      __journal_file_buffer(jh, transaction, jlist);
-+      spin_unlock(&journal_datalist_lock);
-+}
-+
-+/* 
-+ * Remove a buffer from its current buffer list in preparation for
-+ * dropping it from its current transaction entirely.  If the buffer has
-+ * already started to be used by a subsequent transaction, refile the
-+ * buffer on that transaction's metadata list.
-+ */
-+
-+void __journal_refile_buffer(struct journal_head *jh)
-+{
-+      assert_spin_locked(&journal_datalist_lock);
-+#ifdef __SMP__
-+      J_ASSERT_JH(jh, current->lock_depth >= 0);
-+#endif
-+      __journal_unfile_buffer(jh);
-+
-+      /* If the buffer is now unused, just drop it.  If it has been
-+         modified by a later transaction, add it to the new
-+         transaction's metadata list. */
-+
-+      jh->b_transaction = jh->b_next_transaction;
-+      jh->b_next_transaction = NULL;
-+
-+      if (jh->b_transaction != NULL) {
-+              __journal_file_buffer(jh, jh->b_transaction, BJ_Metadata);
-+              J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
-+      } else {
-+              /* Onto BUF_DIRTY for writeback */
-+              refile_buffer(jh2bh(jh));
-+      }
-+}
-+
-+/*
-+ * For the unlocked version of this call, also make sure that any
-+ * hanging journal_head is cleaned up if necessary.
-+ *
-+ * __journal_refile_buffer is usually called as part of a single locked
-+ * operation on a buffer_head, in which the caller is probably going to
-+ * be hooking the journal_head onto other lists.  In that case it is up
-+ * to the caller to remove the journal_head if necessary.  For the
-+ * unlocked journal_refile_buffer call, the caller isn't going to be
-+ * doing anything else to the buffer so we need to do the cleanup
-+ * ourselves to avoid a jh leak. 
-+ *
-+ * *** The journal_head may be freed by this call! ***
-+ */
-+void journal_refile_buffer(struct journal_head *jh)
-+{
-+      struct buffer_head *bh;
-+
-+      spin_lock(&journal_datalist_lock);
-+      bh = jh2bh(jh);
-+
-+      __journal_refile_buffer(jh);
-+      __journal_remove_journal_head(bh);
-+
-+      spin_unlock(&journal_datalist_lock);
-+      __brelse(bh);
-+}
diff --git a/lustre/kernel_patches/patches/add_page_private-2.4.19-bgl.patch b/lustre/kernel_patches/patches/add_page_private-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 9bb754a..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
- include/linux/mm.h |    1 +
- 1 files changed, 1 insertion(+)
-
-Index: linux.mcp2/include/linux/mm.h
-===================================================================
---- linux.mcp2.orig/include/linux/mm.h 2004-05-05 14:32:29.000000000 -0700
-+++ linux.mcp2/include/linux/mm.h      2004-05-05 14:46:54.000000000 -0700
-@@ -162,6 +162,7 @@
-                                          protected by pagemap_lru_lock !! */
-       struct page **pprev_hash;       /* Complement to *next_hash. */
-       struct buffer_head * buffers;   /* Buffer maps us to a disk block. */
-+      unsigned long private;
-       /*
-        * On machines where all RAM is mapped into kernel address space,
diff --git a/lustre/kernel_patches/patches/export-log-2.6-rhel4.patch b/lustre/kernel_patches/patches/export-log-2.6-rhel4.patch
new file mode 100644 (file)
index 0000000..5bfc154
--- /dev/null
@@ -0,0 +1,12 @@
+Index: linux-2.6.3/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.3.orig/fs/jbd/journal.c  2004-01-08 22:59:10.000000000 -0800
++++ linux-2.6.3/fs/jbd/journal.c       2004-02-23 20:09:34.000000000 -0800
+@@ -71,6 +71,7 @@
+ EXPORT_SYMBOL(journal_errno);
+ EXPORT_SYMBOL(journal_ack_err);
+ EXPORT_SYMBOL(journal_clear_err);
++EXPORT_SYMBOL(log_start_commit);
+ EXPORT_SYMBOL(log_wait_commit);
+ EXPORT_SYMBOL(journal_start_commit);
+ EXPORT_SYMBOL(journal_wipe);
diff --git a/lustre/kernel_patches/patches/export-show_task-2.4-bgl.patch b/lustre/kernel_patches/patches/export-show_task-2.4-bgl.patch
deleted file mode 100644 (file)
index a7bdb63..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-Index: linux-bgl/kernel/sched.c
-===================================================================
---- linux-bgl.orig/kernel/sched.c      2003-07-02 08:43:33.000000000 -0700
-+++ linux-bgl/kernel/sched.c   2004-10-26 23:37:44.314193755 -0700
-@@ -1124,7 +1124,7 @@
-       return retval;
- }
--static void show_task(struct task_struct * p)
-+void show_task(struct task_struct * p)
- {
-       unsigned long free = 0;
-       int state;
-Index: linux-bgl/kernel/ksyms.c
-===================================================================
---- linux-bgl.orig/kernel/ksyms.c      2004-10-26 23:23:00.518654978 -0700
-+++ linux-bgl/kernel/ksyms.c   2004-10-26 23:38:29.289071295 -0700
-@@ -76,6 +76,7 @@
- };
- #endif
-+void show_task(struct task_struct *);
- EXPORT_SYMBOL(inter_module_register);
- EXPORT_SYMBOL(inter_module_unregister);
-@@ -595,3 +596,6 @@
- EXPORT_SYMBOL(tasklist_lock);
- EXPORT_SYMBOL(pidhash);
-+
-+/* debug */
-+EXPORT_SYMBOL(show_task);
diff --git a/lustre/kernel_patches/patches/export-truncate-bgl.patch b/lustre/kernel_patches/patches/export-truncate-bgl.patch
deleted file mode 100644 (file)
index 9508215..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
- include/linux/mm.h |    1 +
- mm/filemap.c       |    3 ++-
- 2 files changed, 3 insertions(+), 1 deletion(-)
-
-Index: linux-ion/include/linux/mm.h
-===================================================================
---- linux-ion.orig/include/linux/mm.h  2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/include/linux/mm.h       2004-09-27 15:07:50.000000000 -0700
-@@ -593,6 +593,7 @@
- /* filemap.c */
- extern void remove_inode_page(struct page *);
- extern unsigned long page_unuse(struct page *);
-+extern void truncate_complete_page(struct page *);
- extern void truncate_inode_pages(struct address_space *, loff_t);
- /* generic vm_area_ops exported for stackable file systems */
-Index: linux-ion/mm/filemap.c
-===================================================================
---- linux-ion.orig/mm/filemap.c        2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/mm/filemap.c     2004-09-27 15:08:13.000000000 -0700
-@@ -231,7 +231,7 @@
-               do_flushpage(page, partial);
- }
--static void truncate_complete_page(struct page *page)
-+void truncate_complete_page(struct page *page)
- {
-       /* Leave it on the LRU if it gets converted into anonymous buffers */
-       if (!page->buffers || do_flushpage(page, 0))
-@@ -249,6 +249,7 @@
-       remove_inode_page(page);
-       page_cache_release(page);
- }
-+EXPORT_SYMBOL(truncate_complete_page);
- static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *));
- static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial)
index 8d9ab40..a2b07f8 100644 (file)
@@ -42,15 +42,6 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/ext2_fs_sb.h
  /*
   * second extended-fs super-block data in memory
   */
-Index: linux-2.6.9-5.0.3.EL/kernel/kallsyms.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/kernel/kallsyms.c        2005-02-26 13:24:35.479811840 +0200
-+++ linux-2.6.9-5.0.3.EL/kernel/kallsyms.c     2005-02-26 13:53:13.799587528 +0200
-@@ -310,3 +310,4 @@
- __initcall(kallsyms_init);
- EXPORT_SYMBOL(__print_symbol);
-+EXPORT_SYMBOL(kernel_text_address);
 Index: linux-2.6.9-5.0.3.EL/net/core/sock.c
 ===================================================================
 --- linux-2.6.9-5.0.3.EL.orig/net/core/sock.c  2005-02-26 13:24:35.490810168 +0200
index de1bf20..fbaf63d 100644 (file)
@@ -64,12 +64,3 @@ Index: linux-2.6.5-12.1/kernel/exit.c
  static inline void __put_fs_struct(struct fs_struct *fs)
  {
        /* No need to hold fs->lock if we are killing it */
-Index: linux-2.6.4-51.0/kernel/kallsyms.c
-===================================================================
---- linux-2.6.4-51.0.orig/kernel/kallsyms.c    2004-04-05 12:42:08.000000000 -0400
-+++ linux-2.6.4-51.0/kernel/kallsyms.c 2004-04-15 15:00:56.000000000 -0400
-@@ -329,3 +329,4 @@
- EXPORT_SYMBOL(kallsyms_lookup);
- EXPORT_SYMBOL(__print_symbol);
-+EXPORT_SYMBOL(kernel_text_address);
diff --git a/lustre/kernel_patches/patches/exports_2.4.19-bgl.patch b/lustre/kernel_patches/patches/exports_2.4.19-bgl.patch
deleted file mode 100644 (file)
index 82a0182..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-
-
-
-Index: linux-ion/kernel/ksyms.c
-===================================================================
---- linux-ion.orig/kernel/ksyms.c      2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/kernel/ksyms.c   2004-09-27 15:04:52.000000000 -0700
-@@ -286,6 +286,10 @@
- EXPORT_SYMBOL(dcache_readdir);
- EXPORT_SYMBOL(dcache_dir_ops);
-+/* lustre */
-+EXPORT_SYMBOL(panic_notifier_list);
-+EXPORT_SYMBOL(do_kern_mount);
-+
- /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */
- EXPORT_SYMBOL(default_llseek);
- EXPORT_SYMBOL(dentry_open);
-Index: linux-ion/include/linux/fs.h
-===================================================================
---- linux-ion.orig/include/linux/fs.h  2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/include/linux/fs.h       2004-09-27 15:04:52.000000000 -0700
-@@ -1050,6 +1050,7 @@
- extern struct vfsmount *kern_mount(struct file_system_type *);
- extern int may_umount(struct vfsmount *);
- extern long do_mount(char *, char *, char *, unsigned long, void *);
-+struct vfsmount *do_kern_mount(const char *fstype, int flags, char *name, void *data);
- extern void umount_tree(struct vfsmount *);
- #define kern_umount mntput
-Index: linux-ion/mm/memory.c
-===================================================================
---- linux-ion.orig/mm/memory.c 2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/mm/memory.c      2004-09-27 15:05:56.000000000 -0700
-@@ -401,6 +401,7 @@
-               mm->rss = 0;
-       spin_unlock(&mm->page_table_lock);
- }
-+EXPORT_SYMBOL(zap_page_range);
- /*
-  * Do a quick page-table lookup for a single page. 
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-2.4.19-suse.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-2.4.19-suse.patch
deleted file mode 100644 (file)
index 1cdaa93..0000000
+++ /dev/null
@@ -1,2560 +0,0 @@
- fs/ext3/Makefile           |    2 
- fs/ext3/dir.c              |  299 +++++++++
- fs/ext3/file.c             |    3 
- fs/ext3/hash.c             |  215 ++++++
- fs/ext3/namei.c            | 1388 ++++++++++++++++++++++++++++++++++++++++-----
- fs/ext3/super.c            |    7 
- include/linux/ext3_fs.h    |   85 ++
- include/linux/ext3_fs_sb.h |    2 
- include/linux/ext3_jbd.h   |    2 
- include/linux/rbtree.h     |    2 
- lib/rbtree.c               |   42 +
- 11 files changed, 1887 insertions(+), 160 deletions(-)
-
-Index: linux-2.4.19.SuSE/fs/ext3/Makefile
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/Makefile    2004-05-27 11:07:21.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext3/Makefile 2004-05-27 11:08:28.000000000 -0700
-@@ -12,7 +12,7 @@
- export-objs :=        super.o inode.o
- obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
--              ioctl.o namei.o super.o symlink.o
-+              ioctl.o namei.o super.o symlink.o hash.o
- obj-m    := $(O_TARGET)
- obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
-Index: linux-2.4.19.SuSE/fs/ext3/dir.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/dir.c       2001-11-09 14:25:04.000000000 -0800
-+++ linux-2.4.19.SuSE/fs/ext3/dir.c    2004-05-27 11:08:28.000000000 -0700
-@@ -21,12 +21,16 @@
- #include <linux/fs.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
-+#include <linux/slab.h>
-+#include <linux/rbtree.h>
- static unsigned char ext3_filetype_table[] = {
-       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
- };
- static int ext3_readdir(struct file *, void *, filldir_t);
-+static int ext3_dx_readdir(struct file * filp,
-+                         void * dirent, filldir_t filldir);
- struct file_operations ext3_dir_operations = {
-       read:           generic_read_dir,
-@@ -35,6 +39,17 @@
-       fsync:          ext3_sync_file,         /* BKL held */
- };
-+
-+static unsigned char get_dtype(struct super_block *sb, int filetype)
-+{
-+      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
-+          (filetype >= EXT3_FT_MAX))
-+              return DT_UNKNOWN;
-+
-+      return (ext3_filetype_table[filetype]);
-+}
-+                             
-+
- int ext3_check_dir_entry (const char * function, struct inode * dir,
-                         struct ext3_dir_entry_2 * de,
-                         struct buffer_head * bh,
-@@ -79,6 +94,16 @@
-       sb = inode->i_sb;
-+      if (is_dx(inode)) {
-+              err = ext3_dx_readdir(filp, dirent, filldir);
-+              if (err != ERR_BAD_DX_DIR)
-+                      return err;
-+              /*
-+               * We don't set the inode dirty flag since it's not
-+               * critical that it get flushed back to the disk.
-+               */
-+              EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
-+      }
-       stored = 0;
-       bh = NULL;
-       offset = filp->f_pos & (sb->s_blocksize - 1);
-@@ -162,18 +187,12 @@
-                                * during the copy operation.
-                                */
-                               unsigned long version = filp->f_version;
--                              unsigned char d_type = DT_UNKNOWN;
--                              if (EXT3_HAS_INCOMPAT_FEATURE(sb,
--                                              EXT3_FEATURE_INCOMPAT_FILETYPE)
--                                              && de->file_type < EXT3_FT_MAX)
--                                      d_type =
--                                        ext3_filetype_table[de->file_type];
-                               error = filldir(dirent, de->name,
-                                               de->name_len,
-                                               filp->f_pos,
-                                               le32_to_cpu(de->inode),
--                                              d_type);
-+                                              get_dtype(sb, de->file_type));
-                               if (error)
-                                       break;
-                               if (version != filp->f_version)
-@@ -188,3 +207,269 @@
-       UPDATE_ATIME(inode);
-       return 0;
- }
-+
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * These functions convert from the major/minor hash to an f_pos
-+ * value.
-+ * 
-+ * Currently we only use major hash numer.  This is unfortunate, but
-+ * on 32-bit machines, the same VFS interface is used for lseek and
-+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
-+ * lseek/telldir/seekdir will blow out spectacularly, and from within
-+ * the ext2 low-level routine, we don't know if we're being called by
-+ * a 64-bit version of the system call or the 32-bit version of the
-+ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
-+ * cookie.  Sigh.
-+ */
-+#define hash2pos(major, minor)        (major >> 1)
-+#define pos2maj_hash(pos)     ((pos << 1) & 0xffffffff)
-+#define pos2min_hash(pos)     (0)
-+
-+/*
-+ * This structure holds the nodes of the red-black tree used to store
-+ * the directory entry in hash order.
-+ */
-+struct fname {
-+      __u32           hash;
-+      __u32           minor_hash;
-+      rb_node_t       rb_hash; 
-+      struct fname    *next;
-+      __u32           inode;
-+      __u8            name_len;
-+      __u8            file_type;
-+      char            name[0];
-+};
-+
-+/*
-+ * This functoin implements a non-recursive way of freeing all of the
-+ * nodes in the red-black tree.
-+ */
-+static void free_rb_tree_fname(rb_root_t *root)
-+{
-+      rb_node_t       *n = root->rb_node;
-+      rb_node_t       *parent;
-+      struct fname    *fname;
-+
-+      while (n) {
-+              /* Do the node's children first */
-+              if ((n)->rb_left) {
-+                      n = n->rb_left;
-+                      continue;
-+              }
-+              if (n->rb_right) {
-+                      n = n->rb_right;
-+                      continue;
-+              }
-+              /*
-+               * The node has no children; free it, and then zero
-+               * out parent's link to it.  Finally go to the
-+               * beginning of the loop and try to free the parent
-+               * node.
-+               */
-+              parent = n->rb_parent;
-+              fname = rb_entry(n, struct fname, rb_hash);
-+              kfree(fname);
-+              if (!parent)
-+                      root->rb_node = 0;
-+              else if (parent->rb_left == n)
-+                      parent->rb_left = 0;
-+              else if (parent->rb_right == n)
-+                      parent->rb_right = 0;
-+              n = parent;
-+      }
-+      root->rb_node = 0;
-+}
-+
-+
-+struct dir_private_info *create_dir_info(loff_t pos)
-+{
-+      struct dir_private_info *p;
-+
-+      p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
-+      if (!p)
-+              return NULL;
-+      p->root.rb_node = 0;
-+      p->curr_node = 0;
-+      p->extra_fname = 0;
-+      p->last_pos = 0;
-+      p->curr_hash = pos2maj_hash(pos);
-+      p->curr_minor_hash = pos2min_hash(pos);
-+      p->next_hash = 0;
-+      return p;
-+}
-+
-+void ext3_htree_free_dir_info(struct dir_private_info *p)
-+{
-+      free_rb_tree_fname(&p->root);
-+      kfree(p);
-+}
-+              
-+/*
-+ * Given a directory entry, enter it into the fname rb tree.
-+ */
-+void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
-+                           __u32 minor_hash,
-+                           struct ext3_dir_entry_2 *dirent)
-+{
-+      rb_node_t **p, *parent = NULL;
-+      struct fname * fname, *new_fn;
-+      struct dir_private_info *info;
-+      int len;
-+
-+      info = (struct dir_private_info *) dir_file->private_data;
-+      p = &info->root.rb_node;
-+
-+      /* Create and allocate the fname structure */
-+      len = sizeof(struct fname) + dirent->name_len + 1;
-+      new_fn = kmalloc(len, GFP_KERNEL);
-+      memset(new_fn, 0, len);
-+      new_fn->hash = hash;
-+      new_fn->minor_hash = minor_hash;
-+      new_fn->inode = le32_to_cpu(dirent->inode);
-+      new_fn->name_len = dirent->name_len;
-+      new_fn->file_type = dirent->file_type;
-+      memcpy(new_fn->name, dirent->name, dirent->name_len);
-+      new_fn->name[dirent->name_len] = 0;
-+      
-+      while (*p) {
-+              parent = *p;
-+              fname = rb_entry(parent, struct fname, rb_hash);
-+
-+              /*
-+               * If the hash and minor hash match up, then we put
-+               * them on a linked list.  This rarely happens...
-+               */
-+              if ((new_fn->hash == fname->hash) &&
-+                  (new_fn->minor_hash == fname->minor_hash)) {
-+                      new_fn->next = fname->next;
-+                      fname->next = new_fn;
-+                      return;
-+              }
-+                      
-+              if (new_fn->hash < fname->hash)
-+                      p = &(*p)->rb_left;
-+              else if (new_fn->hash > fname->hash)
-+                      p = &(*p)->rb_right;
-+              else if (new_fn->minor_hash < fname->minor_hash)
-+                      p = &(*p)->rb_left;
-+              else /* if (new_fn->minor_hash > fname->minor_hash) */
-+                      p = &(*p)->rb_right;
-+      }
-+
-+      rb_link_node(&new_fn->rb_hash, parent, p);
-+      rb_insert_color(&new_fn->rb_hash, &info->root);
-+}
-+
-+
-+
-+/*
-+ * This is a helper function for ext3_dx_readdir.  It calls filldir
-+ * for all entres on the fname linked list.  (Normally there is only
-+ * one entry on the linked list, unless there are 62 bit hash collisions.)
-+ */
-+static int call_filldir(struct file * filp, void * dirent,
-+                      filldir_t filldir, struct fname *fname)
-+{
-+      struct dir_private_info *info = filp->private_data;
-+      loff_t  curr_pos;
-+      struct inode *inode = filp->f_dentry->d_inode;
-+      struct super_block * sb;
-+      int error;
-+
-+      sb = inode->i_sb;
-+      
-+      if (!fname) {
-+              printk("call_filldir: called with null fname?!?\n");
-+              return 0;
-+      }
-+      curr_pos = hash2pos(fname->hash, fname->minor_hash);
-+      while (fname) {
-+              error = filldir(dirent, fname->name,
-+                              fname->name_len, curr_pos, 
-+                              fname->inode,
-+                              get_dtype(sb, fname->file_type));
-+              if (error) {
-+                      filp->f_pos = curr_pos;
-+                      info->extra_fname = fname->next;
-+                      return error;
-+              }
-+              fname = fname->next;
-+      }
-+      return 0;
-+}
-+
-+static int ext3_dx_readdir(struct file * filp,
-+                       void * dirent, filldir_t filldir)
-+{
-+      struct dir_private_info *info = filp->private_data;
-+      struct inode *inode = filp->f_dentry->d_inode;
-+      struct fname *fname;
-+      int     ret;
-+
-+      if (!info) {
-+              info = create_dir_info(filp->f_pos);
-+              if (!info)
-+                      return -ENOMEM;
-+              filp->private_data = info;
-+      }
-+
-+      /* Some one has messed with f_pos; reset the world */
-+      if (info->last_pos != filp->f_pos) {
-+              free_rb_tree_fname(&info->root);
-+              info->curr_node = 0;
-+              info->extra_fname = 0;
-+              info->curr_hash = pos2maj_hash(filp->f_pos);
-+              info->curr_minor_hash = pos2min_hash(filp->f_pos);
-+      }
-+
-+      /*
-+       * If there are any leftover names on the hash collision
-+       * chain, return them first.
-+       */
-+      if (info->extra_fname &&
-+          call_filldir(filp, dirent, filldir, info->extra_fname))
-+              goto finished;
-+
-+      if (!info->curr_node)
-+              info->curr_node = rb_get_first(&info->root);
-+
-+      while (1) {
-+              /*
-+               * Fill the rbtree if we have no more entries,
-+               * or the inode has changed since we last read in the
-+               * cached entries. 
-+               */
-+              if ((!info->curr_node) ||
-+                  (filp->f_version != inode->i_version)) {
-+                      info->curr_node = 0;
-+                      free_rb_tree_fname(&info->root);
-+                      filp->f_version = inode->i_version;
-+                      ret = ext3_htree_fill_tree(filp, info->curr_hash,
-+                                                 info->curr_minor_hash,
-+                                                 &info->next_hash);
-+                      if (ret < 0)
-+                              return ret;
-+                      if (ret == 0)
-+                              break;
-+                      info->curr_node = rb_get_first(&info->root);
-+              }
-+
-+              fname = rb_entry(info->curr_node, struct fname, rb_hash);
-+              info->curr_hash = fname->hash;
-+              info->curr_minor_hash = fname->minor_hash;
-+              if (call_filldir(filp, dirent, filldir, fname))
-+                      break;
-+
-+              info->curr_node = rb_get_next(info->curr_node);
-+              if (!info->curr_node) {
-+                      info->curr_hash = info->next_hash;
-+                      info->curr_minor_hash = 0;
-+              }
-+      }
-+finished:
-+      info->last_pos = filp->f_pos;
-+      UPDATE_ATIME(inode);
-+      return 0;
-+}
-+#endif
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     2002-12-04 09:46:03.000000000 -0800
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  2004-05-27 11:08:52.000000000 -0700
-@@ -16,6 +16,12 @@
-  *        David S. Miller (davem@caip.rutgers.edu), 1995
-  *  Directory entry file type support and forward compatibility hooks
-  *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
-+ *  Hash Tree Directory indexing (c)
-+ *    Daniel Phillips, 2001
-+ *  Hash Tree Directory indexing porting
-+ *    Christopher Li, 2002
-+ *  Hash Tree Directory indexing cleanup
-+ *    Theodore Ts'o, 2002
-  */
- #include <linux/fs.h>
-@@ -40,6 +46,630 @@
- #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
-+static struct buffer_head *ext3_append(handle_t *handle,
-+                                      struct inode *inode,
-+                                      u32 *block, int *err)
-+{
-+      struct buffer_head *bh;
-+
-+      *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-+
-+      if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
-+              inode->i_size += inode->i_sb->s_blocksize;
-+              EXT3_I(inode)->i_disksize = inode->i_size;
-+              ext3_journal_get_write_access(handle,bh);
-+      }
-+      return bh;
-+}
-+
-+#ifndef assert
-+#define assert(test) J_ASSERT(test)
-+#endif
-+
-+#ifndef swap
-+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-+#endif
-+
-+typedef struct { u32 v; } le_u32;
-+typedef struct { u16 v; } le_u16;
-+
-+#ifdef DX_DEBUG
-+#define dxtrace(command) command
-+#else
-+#define dxtrace(command) 
-+#endif
-+
-+struct fake_dirent
-+{
-+      /*le*/u32 inode;
-+      /*le*/u16 rec_len;
-+      u8 name_len;
-+      u8 file_type;
-+};
-+
-+struct dx_countlimit
-+{
-+      le_u16 limit;
-+      le_u16 count;
-+};
-+
-+struct dx_entry
-+{
-+      le_u32 hash;
-+      le_u32 block;
-+};
-+
-+/*
-+ * dx_root_info is laid out so that if it should somehow get overlaid by a
-+ * dirent the two low bits of the hash version will be zero.  Therefore, the
-+ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
-+ */
-+
-+struct dx_root
-+{
-+      struct fake_dirent dot;
-+      char dot_name[4];
-+      struct fake_dirent dotdot;
-+      char dotdot_name[4];
-+      struct dx_root_info
-+      {
-+              le_u32 reserved_zero;
-+              u8 hash_version;
-+              u8 info_length; /* 8 */
-+              u8 indirect_levels;
-+              u8 unused_flags;
-+      }
-+      info;
-+      struct dx_entry entries[0];
-+};
-+
-+struct dx_node
-+{
-+      struct fake_dirent fake;
-+      struct dx_entry entries[0];
-+};
-+
-+
-+struct dx_frame
-+{
-+      struct buffer_head *bh;
-+      struct dx_entry *entries;
-+      struct dx_entry *at;
-+};
-+
-+struct dx_map_entry
-+{
-+      u32 hash;
-+      u32 offs;
-+};
-+
-+#ifdef CONFIG_EXT3_INDEX
-+static inline unsigned dx_get_block (struct dx_entry *entry);
-+static void dx_set_block (struct dx_entry *entry, unsigned value);
-+static inline unsigned dx_get_hash (struct dx_entry *entry);
-+static void dx_set_hash (struct dx_entry *entry, unsigned value);
-+static unsigned dx_get_count (struct dx_entry *entries);
-+static unsigned dx_get_limit (struct dx_entry *entries);
-+static void dx_set_count (struct dx_entry *entries, unsigned value);
-+static void dx_set_limit (struct dx_entry *entries, unsigned value);
-+static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
-+static unsigned dx_node_limit (struct inode *dir);
-+static struct dx_frame *dx_probe(struct dentry *dentry,
-+                               struct inode *dir,
-+                               struct dx_hash_info *hinfo,
-+                               struct dx_frame *frame,
-+                               int *err);
-+static void dx_release (struct dx_frame *frames);
-+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
-+                      struct dx_hash_info *hinfo, struct dx_map_entry map[]);
-+static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
-+              struct dx_map_entry *offsets, int count);
-+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
-+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                               struct dx_frame *frame,
-+                               struct dx_frame *frames, int *err,
-+                               __u32 *start_hash);
-+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-+                     struct ext3_dir_entry_2 **res_dir, int *err);
-+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode);
-+
-+/*
-+ * Future: use high four bits of block for coalesce-on-delete flags
-+ * Mask them off for now.
-+ */
-+
-+static inline unsigned dx_get_block (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->block.v) & 0x00ffffff;
-+}
-+
-+static inline void dx_set_block (struct dx_entry *entry, unsigned value)
-+{
-+      entry->block.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_hash (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->hash.v);
-+}
-+
-+static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
-+{
-+      entry->hash.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_count (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
-+}
-+
-+static inline unsigned dx_get_limit (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
-+}
-+
-+static inline void dx_set_count (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
-+}
-+
-+static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
-+}
-+
-+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-+              EXT3_DIR_REC_LEN(2) - infosize;
-+      return 0? 20: entry_space / sizeof(struct dx_entry);
-+}
-+
-+static inline unsigned dx_node_limit (struct inode *dir)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-+      return 0? 22: entry_space / sizeof(struct dx_entry);
-+}
-+
-+/*
-+ * Debug
-+ */
-+#ifdef DX_DEBUG
-+struct stats
-+{ 
-+      unsigned names;
-+      unsigned space;
-+      unsigned bcount;
-+};
-+
-+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
-+                               int size, int show_names)
-+{
-+      unsigned names = 0, space = 0;
-+      char *base = (char *) de;
-+      struct dx_hash_info h = *hinfo;
-+      
-+      printk("names: ");
-+      while ((char *) de < base + size)
-+      {
-+              if (de->inode)
-+              {
-+                      if (show_names)
-+                      {
-+                              int len = de->name_len;
-+                              char *name = de->name;
-+                              while (len--) printk("%c", *name++);
-+                              ext3fs_dirhash(de->name, de->name_len, &h);
-+                              printk(":%x.%u ", h.hash,
-+                                     ((char *) de - base));
-+                      }
-+                      space += EXT3_DIR_REC_LEN(de->name_len);
-+                      names++;
-+              }
-+              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      printk("(%i)\n", names);
-+      return (struct stats) { names, space, 1 };
-+}
-+
-+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
-+                           struct dx_entry *entries, int levels)
-+{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count = dx_get_count (entries), names = 0, space = 0, i;
-+      unsigned bcount = 0;
-+      struct buffer_head *bh;
-+      int err;
-+      printk("%i indexed blocks...\n", count);
-+      for (i = 0; i < count; i++, entries++)
-+      {
-+              u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
-+              u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
-+              struct stats stats;
-+              printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
-+              stats = levels?
-+                 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
-+                 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
-+              names += stats.names;
-+              space += stats.space;
-+              bcount += stats.bcount;
-+              brelse (bh);
-+      }
-+      if (bcount)
-+              printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
-+                      names, space/bcount,(space/bcount)*100/blocksize);
-+      return (struct stats) { names, space, bcount};
-+}
-+#endif /* DX_DEBUG */
-+
-+/*
-+ * Probe for a directory leaf block to search.
-+ *
-+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
-+ * error in the directory index, and the caller should fall back to
-+ * searching the directory normally.  The callers of dx_probe **MUST**
-+ * check for this error code, and make sure it never gets reflected
-+ * back to userspace.
-+ */
-+static struct dx_frame *
-+dx_probe(struct dentry *dentry, struct inode *dir,
-+       struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
-+{
-+      unsigned count, indirect;
-+      struct dx_entry *at, *entries, *p, *q, *m;
-+      struct dx_root *root;
-+      struct buffer_head *bh;
-+      struct dx_frame *frame = frame_in;
-+      u32 hash;
-+
-+      frame->bh = NULL;
-+      if (dentry)
-+              dir = dentry->d_parent->d_inode;
-+      if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
-+              goto fail;
-+      root = (struct dx_root *) bh->b_data;
-+      if (root->info.hash_version != DX_HASH_TEA &&
-+          root->info.hash_version != DX_HASH_HALF_MD4 &&
-+          root->info.hash_version != DX_HASH_LEGACY) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unrecognised inode hash code %d",
-+                           root->info.hash_version);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+      hinfo->hash_version = root->info.hash_version;
-+      hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
-+      if (dentry)
-+              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
-+      hash = hinfo->hash;
-+
-+      if (root->info.unused_flags & 1) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unimplemented inode hash flags: %#06x",
-+                           root->info.unused_flags);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+
-+      if ((indirect = root->info.indirect_levels) > 1) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unimplemented inode hash depth: %#06x",
-+                           root->info.indirect_levels);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+
-+      entries = (struct dx_entry *) (((char *)&root->info) +
-+                                     root->info.info_length);
-+      assert(dx_get_limit(entries) == dx_root_limit(dir,
-+                                                    root->info.info_length));
-+      dxtrace (printk("Look up %x", hash));
-+      while (1)
-+      {
-+              count = dx_get_count(entries);
-+              assert (count && count <= dx_get_limit(entries));
-+              p = entries + 1;
-+              q = entries + count - 1;
-+              while (p <= q)
-+              {
-+                      m = p + (q - p)/2;
-+                      dxtrace(printk("."));
-+                      if (dx_get_hash(m) > hash)
-+                              q = m - 1;
-+                      else
-+                              p = m + 1;
-+              }
-+
-+              if (0) // linear search cross check
-+              {
-+                      unsigned n = count - 1;
-+                      at = entries;
-+                      while (n--)
-+                      {
-+                              dxtrace(printk(","));
-+                              if (dx_get_hash(++at) > hash)
-+                              {
-+                                      at--;
-+                                      break;
-+                              }
-+                      }
-+                      assert (at == p - 1);
-+              }
-+
-+              at = p - 1;
-+              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-+              frame->bh = bh;
-+              frame->entries = entries;
-+              frame->at = at;
-+              if (!indirect--) return frame;
-+              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
-+                      goto fail2;
-+              at = entries = ((struct dx_node *) bh->b_data)->entries;
-+              assert (dx_get_limit(entries) == dx_node_limit (dir));
-+              frame++;
-+      }
-+fail2:
-+      while (frame >= frame_in) {
-+              brelse(frame->bh);
-+              frame--;
-+      }
-+fail:
-+      return NULL;
-+}
-+
-+static void dx_release (struct dx_frame *frames)
-+{
-+      if (frames[0].bh == NULL)
-+              return;
-+
-+      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
-+              brelse(frames[1].bh);
-+      brelse(frames[0].bh);
-+}
-+
-+/*
-+ * This function increments the frame pointer to search the next leaf
-+ * block, and reads in the necessary intervening nodes if the search
-+ * should be necessary.  Whether or not the search is necessary is
-+ * controlled by the hash parameter.  If the hash value is even, then
-+ * the search is only continued if the next block starts with that
-+ * hash value.  This is used if we are searching for a specific file.
-+ *
-+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
-+ *
-+ * This function returns 1 if the caller should continue to search,
-+ * or 0 if it should not.  If there is an error reading one of the
-+ * index blocks, it will return -1.
-+ *
-+ * If start_hash is non-null, it will be filled in with the starting
-+ * hash of the next page.
-+ */
-+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                               struct dx_frame *frame,
-+                               struct dx_frame *frames, int *err,
-+                               __u32 *start_hash)
-+{
-+      struct dx_frame *p;
-+      struct buffer_head *bh;
-+      int num_frames = 0;
-+      __u32 bhash;
-+
-+      *err = ENOENT;
-+      p = frame;
-+      /*
-+       * Find the next leaf page by incrementing the frame pointer.
-+       * If we run out of entries in the interior node, loop around and
-+       * increment pointer in the parent node.  When we break out of
-+       * this loop, num_frames indicates the number of interior
-+       * nodes need to be read.
-+       */
-+      while (1) {
-+              if (++(p->at) < p->entries + dx_get_count(p->entries))
-+                      break;
-+              if (p == frames)
-+                      return 0;
-+              num_frames++;
-+              p--;
-+      }
-+
-+      /*
-+       * If the hash is 1, then continue only if the next page has a
-+       * continuation hash of any value.  This is used for readdir
-+       * handling.  Otherwise, check to see if the hash matches the
-+       * desired contiuation hash.  If it doesn't, return since
-+       * there's no point to read in the successive index pages.
-+       */
-+      bhash = dx_get_hash(p->at);
-+      if (start_hash)
-+              *start_hash = bhash;
-+      if ((hash & 1) == 0) {
-+              if ((bhash & ~1) != hash)
-+                      return 0;
-+      }
-+      /*
-+       * If the hash is HASH_NB_ALWAYS, we always go to the next
-+       * block so no check is necessary
-+       */
-+      while (num_frames--) {
-+              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
-+                                    0, err)))
-+                      return -1; /* Failure */
-+              p++;
-+              brelse (p->bh);
-+              p->bh = bh;
-+              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
-+      }
-+      return 1;
-+}
-+
-+
-+/*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
-+{
-+      return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
-+}
-+
-+/*
-+ * This function fills a red-black tree with information from a
-+ * directory.  We start scanning the directory in hash order, starting
-+ * at start_hash and start_minor_hash.
-+ *
-+ * This function returns the number of entries inserted into the tree,
-+ * or a negative error code.
-+ */
-+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-+                       __u32 start_minor_hash, __u32 *next_hash)
-+{
-+      struct dx_hash_info hinfo;
-+      struct buffer_head *bh;
-+      struct ext3_dir_entry_2 *de, *top;
-+      static struct dx_frame frames[2], *frame;
-+      struct inode *dir;
-+      int block, err;
-+      int count = 0;
-+      int ret;
-+      __u32 hashval;
-+      
-+      dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
-+                     start_minor_hash));
-+      dir = dir_file->f_dentry->d_inode;
-+      hinfo.hash = start_hash;
-+      hinfo.minor_hash = 0;
-+      frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
-+      if (!frame)
-+              return err;
-+
-+      while (1) {
-+              block = dx_get_block(frame->at);
-+              dxtrace(printk("Reading block %d\n", block));
-+              if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
-+                      goto errout;
-+      
-+              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+              top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize -
-+                                     EXT3_DIR_REC_LEN(0));
-+              for (; de < top; de = ext3_next_entry(de)) {
-+                      ext3fs_dirhash(de->name, de->name_len, &hinfo);
-+                      if ((hinfo.hash < start_hash) ||
-+                          ((hinfo.hash == start_hash) &&
-+                           (hinfo.minor_hash < start_minor_hash)))
-+                              continue;
-+                      ext3_htree_store_dirent(dir_file, hinfo.hash,
-+                                              hinfo.minor_hash, de);
-+                      count++;
-+              }
-+              brelse (bh);
-+              hashval = ~1;
-+              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
-+                                          frame, frames, &err, &hashval);
-+              if (next_hash)
-+                      *next_hash = hashval;
-+              if (ret == -1)
-+                      goto errout;
-+              /*
-+               * Stop if:  (a) there are no more entries, or
-+               * (b) we have inserted at least one entry and the
-+               * next hash value is not a continuation
-+               */
-+              if ((ret == 0) ||
-+                  (count && ((hashval & 1) == 0)))
-+                      break;
-+      }
-+      dx_release(frames);
-+      dxtrace(printk("Fill tree: returned %d entries\n", count));
-+      return count;
-+errout:
-+      dx_release(frames);
-+      return (err);
-+}
-+
-+
-+/*
-+ * Directory block splitting, compacting
-+ */
-+
-+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
-+                      struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
-+{
-+      int count = 0;
-+      char *base = (char *) de;
-+      struct dx_hash_info h = *hinfo;
-+      
-+      while ((char *) de < base + size)
-+      {
-+              if (de->name_len && de->inode) {
-+                      ext3fs_dirhash(de->name, de->name_len, &h);
-+                      map_tail--;
-+                      map_tail->hash = h.hash;
-+                      map_tail->offs = (u32) ((char *) de - base);
-+                      count++;
-+              }
-+              /* XXX: do we need to check rec_len == 0 case? -Chris */
-+              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      return count;
-+}
-+
-+static void dx_sort_map (struct dx_map_entry *map, unsigned count)
-+{
-+      struct dx_map_entry *p, *q, *top = map + count - 1;
-+      int more;
-+      /* Combsort until bubble sort doesn't suck */
-+      while (count > 2)
-+      {
-+              count = count*10/13;
-+              if (count - 9 < 2) /* 9, 10 -> 11 */
-+                      count = 11;
-+              for (p = top, q = p - count; q >= map; p--, q--)
-+                      if (p->hash < q->hash)
-+                              swap(*p, *q);
-+      }
-+      /* Garden variety bubble sort */
-+      do {
-+              more = 0;
-+              q = top;
-+              while (q-- > map)
-+              {
-+                      if (q[1].hash >= q[0].hash)
-+                              continue;
-+                      swap(*(q+1), *q);
-+                      more = 1;
-+              }
-+      } while(more);
-+}
-+
-+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
-+{
-+      struct dx_entry *entries = frame->entries;
-+      struct dx_entry *old = frame->at, *new = old + 1;
-+      int count = dx_get_count(entries);
-+
-+      assert(count < dx_get_limit(entries));
-+      assert(old < entries + count);
-+      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
-+      dx_set_hash(new, hash);
-+      dx_set_block(new, block);
-+      dx_set_count(entries, count + 1);
-+}
-+#endif
-+
-+
-+static void ext3_update_dx_flag(struct inode *inode)
-+{
-+      if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-+                                   EXT3_FEATURE_COMPAT_DIR_INDEX))
-+              EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
-+}
-+
- /*
-  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
-  *
-@@ -96,6 +726,7 @@
-       return 0;
- }
-+
- /*
-  *    ext3_find_entry()
-  *
-@@ -107,6 +738,8 @@
-  * The returned buffer_head has ->b_count elevated.  The caller is expected
-  * to brelse() it when appropriate.
-  */
-+
-+      
- static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-                                       struct ext3_dir_entry_2 ** res_dir)
- {
-@@ -121,12 +754,32 @@
-       int num = 0;
-       int nblocks, i, err;
-       struct inode *dir = dentry->d_parent->d_inode;
-+      int namelen;
-+      const u8 *name;
-+      unsigned blocksize;
-       *res_dir = NULL;
-       sb = dir->i_sb;
--
-+      blocksize = sb->s_blocksize;
-+      namelen = dentry->d_name.len;
-+      name = dentry->d_name.name;
-+      if (namelen > EXT3_NAME_LEN)
-+              return NULL;
-+#ifdef CONFIG_EXT3_INDEX
-+      if (is_dx(dir)) {
-+              bh = ext3_dx_find_entry(dentry, res_dir, &err);
-+              /*
-+               * On success, or if the error was file not found,
-+               * return.  Otherwise, fall back to doing a search the
-+               * old fashioned way.
-+               */
-+              if (bh || (err != ERR_BAD_DX_DIR))
-+                      return bh;
-+              dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
-+      }
-+#endif
-       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
--      start = dir->u.ext3_i.i_dir_start_lookup;
-+      start = EXT3_I(dir)->i_dir_start_lookup;
-       if (start >= nblocks)
-               start = 0;
-       block = start;
-@@ -167,7 +820,7 @@
-               i = search_dirblock(bh, dir, dentry,
-                           block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
-               if (i == 1) {
--                      dir->u.ext3_i.i_dir_start_lookup = block;
-+                      EXT3_I(dir)->i_dir_start_lookup = block;
-                       ret = bh;
-                       goto cleanup_and_exit;
-               } else {
-@@ -198,6 +851,74 @@
-       return ret;
- }
-+#ifdef CONFIG_EXT3_INDEX
-+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-+                     struct ext3_dir_entry_2 **res_dir, int *err)
-+{
-+      struct super_block * sb;
-+      struct dx_hash_info     hinfo;
-+      u32 hash;
-+      struct dx_frame frames[2], *frame;
-+      struct ext3_dir_entry_2 *de, *top;
-+      struct buffer_head *bh;
-+      unsigned long block;
-+      int retval;
-+      int namelen = dentry->d_name.len;
-+      const u8 *name = dentry->d_name.name;
-+      struct inode *dir = dentry->d_parent->d_inode;
-+
-+      sb = dir->i_sb;
-+      /* NFS may look up ".." - look at dx_root directory block */
-+      if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
-+              if (!(frame = dx_probe(dentry, 0, &hinfo, frames, err)))
-+                      return NULL;
-+      } else {
-+              frame = frames;
-+              frame->bh = NULL;                       /* for dx_release() */
-+              frame->at = (struct dx_entry *)frames;  /* hack for zero entry*/
-+              dx_set_block(frame->at, 0);             /* dx_root block is 0 */
-+      }
-+      hash = hinfo.hash;
-+      do {
-+              block = dx_get_block(frame->at);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
-+                      goto errout;
-+              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+              top = (struct ext3_dir_entry_2 *)((char *)de + sb->s_blocksize -
-+                                     EXT3_DIR_REC_LEN(0));
-+              for (; de < top; de = ext3_next_entry(de))
-+              if (ext3_match (namelen, name, de)) {
-+                      if (!ext3_check_dir_entry("ext3_find_entry",
-+                                                dir, de, bh,
-+                                (block<<EXT3_BLOCK_SIZE_BITS(sb))
-+                                        +((char *)de - bh->b_data))) {
-+                              brelse (bh);
-+                              goto errout;
-+                      }
-+                      *res_dir = de;
-+                      dx_release (frames);
-+                      return bh;
-+              }
-+              brelse (bh);
-+              /* Check to see if we should continue to search */
-+              retval = ext3_htree_next_block(dir, hash, frame,
-+                                             frames, err, 0);
-+              if (retval == -1) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                           "error reading index page in directory #%lu",
-+                           dir->i_ino);
-+                      goto errout;
-+              }
-+      } while (retval == 1);
-+      
-+      *err = -ENOENT;
-+errout:
-+      dxtrace(printk("%s not found\n", name));
-+      dx_release (frames);
-+      return NULL;
-+}
-+#endif
-+
- static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
- {
-       struct inode * inode;
-@@ -214,8 +927,9 @@
-               brelse (bh);
-               inode = iget(dir->i_sb, ino);
--              if (!inode)
-+              if (!inode) {
-                       return ERR_PTR(-EACCES);
-+              }
-       }
-       d_add(dentry, inode);
-       return NULL;
-@@ -239,6 +953,301 @@
-               de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
- }
-+#ifdef CONFIG_EXT3_INDEX
-+static struct ext3_dir_entry_2 *
-+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
-+{
-+      unsigned rec_len = 0;
-+
-+      while (count--) {
-+              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
-+              rec_len = EXT3_DIR_REC_LEN(de->name_len);
-+              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
-+              de->inode = 0;
-+              map++;
-+              to += rec_len;
-+      }
-+      return (struct ext3_dir_entry_2 *) (to - rec_len);
-+}
-+
-+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
-+{
-+      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
-+      unsigned rec_len = 0;
-+
-+      prev = to = de;
-+      while ((char*)de < base + size) {
-+              next = (struct ext3_dir_entry_2 *) ((char *) de +
-+                                                  le16_to_cpu(de->rec_len));
-+              if (de->inode && de->name_len) {
-+                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
-+                      if (de > to)
-+                              memmove(to, de, rec_len);
-+                      to->rec_len = cpu_to_le16(rec_len);
-+                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
-+              }
-+              de = next;
-+      }
-+      return prev;
-+}
-+
-+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
-+                      struct buffer_head **bh,struct dx_frame *frame,
-+                      struct dx_hash_info *hinfo, int *error)
-+{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count, continued;
-+      struct buffer_head *bh2;
-+      u32 newblock;
-+      u32 hash2;
-+      struct dx_map_entry *map;
-+      char *data1 = (*bh)->b_data, *data2;
-+      unsigned split;
-+      struct ext3_dir_entry_2 *de = NULL, *de2;
-+      int     err;
-+
-+      bh2 = ext3_append (handle, dir, &newblock, error);
-+      if (!(bh2)) {
-+              brelse(*bh);
-+              *bh = NULL;
-+              goto errout;
-+      }
-+
-+      BUFFER_TRACE(*bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, *bh);
-+      if (err) {
-+      journal_error:
-+              brelse(*bh);
-+              brelse(bh2);
-+              *bh = NULL;
-+              ext3_std_error(dir->i_sb, err);
-+              goto errout;
-+      }
-+      BUFFER_TRACE(frame->bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, frame->bh);
-+      if (err)
-+              goto journal_error;
-+
-+      data2 = bh2->b_data;
-+
-+      /* create map in the end of data2 block */
-+      map = (struct dx_map_entry *) (data2 + blocksize);
-+      count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
-+                           blocksize, hinfo, map);
-+      map -= count;
-+      split = count/2; // need to adjust to actual middle
-+      dx_sort_map (map, count);
-+      hash2 = map[split].hash;
-+      continued = hash2 == map[split - 1].hash;
-+      dxtrace(printk("Split block %i at %x, %i/%i\n",
-+              dx_get_block(frame->at), hash2, split, count-split));
-+
-+      /* Fancy dance to stay within two buffers */
-+      de2 = dx_move_dirents(data1, data2, map + split, count - split);
-+      de = dx_pack_dirents(data1,blocksize);
-+      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+      de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-+      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
-+      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
-+
-+      /* Which block gets the new entry? */
-+      if (hinfo->hash >= hash2)
-+      {
-+              swap(*bh, bh2);
-+              de = de2;
-+      }
-+      dx_insert_block (frame, hash2 + continued, newblock);
-+      err = ext3_journal_dirty_metadata (handle, bh2);
-+      if (err)
-+              goto journal_error;
-+      err = ext3_journal_dirty_metadata (handle, frame->bh);
-+      if (err)
-+              goto journal_error;
-+      brelse (bh2);
-+      dxtrace(dx_show_index ("frame", frame->entries));
-+errout:
-+      return de;
-+}
-+#endif
-+
-+
-+/*
-+ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
-+ * it points to a directory entry which is guaranteed to be large
-+ * enough for new directory entry.  If de is NULL, then
-+ * add_dirent_to_buf will attempt search the directory block for
-+ * space.  It will return -ENOSPC if no space is available, and -EIO
-+ * and -EEXIST if directory entry already exists.
-+ * 
-+ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
-+ * all other cases bh is released.
-+ */
-+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode, struct ext3_dir_entry_2 *de,
-+                           struct buffer_head * bh)
-+{
-+      struct inode    *dir = dentry->d_parent->d_inode;
-+      const char      *name = dentry->d_name.name;
-+      int             namelen = dentry->d_name.len;
-+      unsigned long   offset = 0;
-+      unsigned short  reclen;
-+      int             nlen, rlen, err;
-+      char            *top;
-+      
-+      reclen = EXT3_DIR_REC_LEN(namelen);
-+      if (!de) {
-+              de = (struct ext3_dir_entry_2 *)bh->b_data;
-+              top = bh->b_data + dir->i_sb->s_blocksize - reclen;
-+              while ((char *) de <= top) {
-+                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
-+                                                bh, offset)) {
-+                              brelse (bh);
-+                              return -EIO;
-+                      }
-+                      if (ext3_match (namelen, name, de)) {
-+                              brelse (bh);
-+                              return -EEXIST;
-+                      }
-+                      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+                      rlen = le16_to_cpu(de->rec_len);
-+                      if ((de->inode? rlen - nlen: rlen) >= reclen)
-+                              break;
-+                      de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
-+                      offset += rlen;
-+              }
-+              if ((char *) de > top)
-+                      return -ENOSPC;
-+      }
-+      BUFFER_TRACE(bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, bh);
-+      if (err) {
-+              ext3_std_error(dir->i_sb, err);
-+              brelse(bh);
-+              return err;
-+      }
-+      
-+      /* By now the buffer is marked for journaling */
-+      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+      rlen = le16_to_cpu(de->rec_len);
-+      if (de->inode) {
-+              struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
-+              de1->rec_len = cpu_to_le16(rlen - nlen);
-+              de->rec_len = cpu_to_le16(nlen);
-+              de = de1;
-+      }
-+      de->file_type = EXT3_FT_UNKNOWN;
-+      if (inode) {
-+              de->inode = cpu_to_le32(inode->i_ino);
-+              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
-+      } else
-+              de->inode = 0;
-+      de->name_len = namelen;
-+      memcpy (de->name, name, namelen);
-+      /*
-+       * XXX shouldn't update any times until successful
-+       * completion of syscall, but too many callers depend
-+       * on this.
-+       *
-+       * XXX similarly, too many callers depend on
-+       * ext3_new_inode() setting the times, but error
-+       * recovery deletes the inode, so the worst that can
-+       * happen is that the times are slightly out of date
-+       * and/or different from the directory change time.
-+       */
-+      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-+      ext3_update_dx_flag(dir);
-+      dir->i_version = ++event;
-+      ext3_mark_inode_dirty(handle, dir);
-+      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+      err = ext3_journal_dirty_metadata(handle, bh);
-+      if (err)
-+              ext3_std_error(dir->i_sb, err);
-+      brelse(bh);
-+      return 0;
-+}
-+
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * This converts a one block unindexed directory to a 3 block indexed
-+ * directory, and adds the dentry to the indexed directory.
-+ */
-+static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
-+                          struct inode *inode, struct buffer_head *bh)
-+{
-+      struct inode    *dir = dentry->d_parent->d_inode;
-+      const char      *name = dentry->d_name.name;
-+      int             namelen = dentry->d_name.len;
-+      struct buffer_head *bh2;
-+      struct dx_root  *root;
-+      struct dx_frame frames[2], *frame;
-+      struct dx_entry *entries;
-+      struct ext3_dir_entry_2 *de, *de2;
-+      char            *data1, *top;
-+      unsigned        len;
-+      int             retval;
-+      unsigned        blocksize;
-+      struct dx_hash_info hinfo;
-+      u32             block;
-+              
-+      blocksize =  dir->i_sb->s_blocksize;
-+      dxtrace(printk("Creating index\n"));
-+      retval = ext3_journal_get_write_access(handle, bh);
-+      if (retval) {
-+              ext3_std_error(dir->i_sb, retval);
-+              brelse(bh);
-+              return retval;
-+      }
-+      root = (struct dx_root *) bh->b_data;
-+              
-+      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
-+      bh2 = ext3_append (handle, dir, &block, &retval);
-+      if (!(bh2)) {
-+              brelse(bh);
-+              return retval;
-+      }
-+      data1 = bh2->b_data;
-+
-+      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *)&root->dotdot;
-+      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
-+      len = ((char *) root) + blocksize - (char *) de;
-+      memcpy (data1, de, len);
-+      de = (struct ext3_dir_entry_2 *) data1;
-+      top = data1 + len;
-+      while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
-+              de = de2;
-+      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+      /* Initialize the root; the dot dirents already exist */
-+      de = (struct ext3_dir_entry_2 *) (&root->dotdot);
-+      de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
-+      memset (&root->info, 0, sizeof(root->info));
-+      root->info.info_length = sizeof(root->info);
-+      root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version;
-+      entries = root->entries;
-+      dx_set_block (entries, 1);
-+      dx_set_count (entries, 1);
-+      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
-+
-+      /* Initialize as for dx_probe */
-+      hinfo.hash_version = root->info.hash_version;
-+      hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed;
-+      ext3fs_dirhash(name, namelen, &hinfo);
-+      frame = frames;
-+      frame->entries = entries;
-+      frame->at = entries;
-+      frame->bh = bh;
-+      bh = bh2;
-+      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-+      dx_release (frames);
-+      if (!(de))
-+              return retval;
-+
-+      return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+}
-+#endif
-+
- /*
-  *    ext3_add_entry()
-  *
-@@ -249,127 +1258,198 @@
-  * may not sleep between calling this and putting something into
-  * the entry, as someone else might have used it while you slept.
-  */
--
--/*
-- * AKPM: the journalling code here looks wrong on the error paths
-- */
- static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
-       struct inode *inode)
- {
-       struct inode *dir = dentry->d_parent->d_inode;
--      const char *name = dentry->d_name.name;
--      int namelen = dentry->d_name.len;
-       unsigned long offset;
--      unsigned short rec_len;
-       struct buffer_head * bh;
--      struct ext3_dir_entry_2 * de, * de1;
-+      struct ext3_dir_entry_2 *de;
-       struct super_block * sb;
-       int     retval;
-+#ifdef CONFIG_EXT3_INDEX
-+        int     dx_fallback=0;
-+#endif
-+        unsigned blocksize;
-+        unsigned nlen, rlen;
-+        u32 block, blocks;
-       sb = dir->i_sb;
--
--      if (!namelen)
-+        blocksize = sb->s_blocksize;
-+      if (!dentry->d_name.len)
-               return -EINVAL;
--      bh = ext3_bread (handle, dir, 0, 0, &retval);
-+#ifdef CONFIG_EXT3_INDEX
-+        if (is_dx(dir)) {
-+                retval = ext3_dx_add_entry(handle, dentry, inode);
-+                if (!retval || (retval != ERR_BAD_DX_DIR))
-+                        return retval;
-+                EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
-+                dx_fallback++;
-+                ext3_mark_inode_dirty(handle, dir);
-+        }
-+#endif
-+        blocks = dir->i_size >> sb->s_blocksize_bits;
-+        for (block = 0, offset = 0; block < blocks; block++) {
-+                bh = ext3_bread(handle, dir, block, 0, &retval);
-+                if(!bh)
-+                        return retval;
-+                retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
-+                if (retval != -ENOSPC)
-+                        return retval;
-+
-+#ifdef CONFIG_EXT3_INDEX
-+                if (blocks == 1 && !dx_fallback &&
-+                    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
-+                        return make_indexed_dir(handle, dentry, inode, bh);
-+#endif
-+                brelse(bh);
-+        }
-+        bh = ext3_append(handle, dir, &block, &retval);
-       if (!bh)
-               return retval;
--      rec_len = EXT3_DIR_REC_LEN(namelen);
--      offset = 0;
-       de = (struct ext3_dir_entry_2 *) bh->b_data;
--      while (1) {
--              if ((char *)de >= sb->s_blocksize + bh->b_data) {
--                      brelse (bh);
--                      bh = NULL;
--                      bh = ext3_bread (handle, dir,
--                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
--                      if (!bh)
--                              return retval;
--                      if (dir->i_size <= offset) {
--                              if (dir->i_size == 0) {
--                                      brelse(bh);
--                                      return -ENOENT;
--                              }
--
--                              ext3_debug ("creating next block\n");
--
--                              BUFFER_TRACE(bh, "get_write_access");
--                              ext3_journal_get_write_access(handle, bh);
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
--                              de->inode = 0;
--                              de->rec_len = le16_to_cpu(sb->s_blocksize);
--                              dir->u.ext3_i.i_disksize =
--                                      dir->i_size = offset + sb->s_blocksize;
--                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                              ext3_mark_inode_dirty(handle, dir);
--                      } else {
--
--                              ext3_debug ("skipping to next block\n");
-+        de->inode = 0;
-+        de->rec_len = cpu_to_le16(rlen = blocksize);
-+        nlen = 0;
-+        return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+}
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
--                      }
--              }
--              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
--                                         offset)) {
--                      brelse (bh);
--                      return -ENOENT;
--              }
--              if (ext3_match (namelen, name, de)) {
--                              brelse (bh);
--                              return -EEXIST;
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * Returns 0 for success, or a negative error value
-+ */
-+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
-+                             struct inode *inode)
-+{
-+        struct dx_frame frames[2], *frame;
-+        struct dx_entry *entries, *at;
-+        struct dx_hash_info hinfo;
-+        struct buffer_head * bh;
-+        struct inode *dir = dentry->d_parent->d_inode;
-+        struct super_block * sb = dir->i_sb;
-+        struct ext3_dir_entry_2 *de;
-+        int err;
-+
-+        frame = dx_probe(dentry, 0, &hinfo, frames, &err);
-+        if (!frame)
-+                return err;
-+        entries = frame->entries;
-+        at = frame->at;
-+
-+        if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
-+                goto cleanup;
-+
-+        BUFFER_TRACE(bh, "get_write_access");
-+        err = ext3_journal_get_write_access(handle, bh);
-+        if (err)
-+                goto journal_error;
-+
-+        err = add_dirent_to_buf(handle, dentry, inode, 0, bh);
-+        if (err != -ENOSPC) {
-+                bh = 0;
-+                goto cleanup;
-+        }
-+
-+        /* Block full, should compress but for now just split */
-+        dxtrace(printk("using %u of %u node entries\n",
-+                       dx_get_count(entries), dx_get_limit(entries)));
-+        /* Need to split index? */
-+        if (dx_get_count(entries) == dx_get_limit(entries)) {
-+                u32 newblock;
-+                unsigned icount = dx_get_count(entries);
-+                int levels = frame - frames;
-+                struct dx_entry *entries2;
-+                struct dx_node *node2;
-+                struct buffer_head *bh2;
-+
-+                if (levels && (dx_get_count(frames->entries) ==
-+                               dx_get_limit(frames->entries))) {
-+                        ext3_warning(sb, __FUNCTION__,
-+                                     "Directory index full!\n");
-+                        err = -ENOSPC;
-+                        goto cleanup;
-               }
--              if ((le32_to_cpu(de->inode) == 0 &&
--                              le16_to_cpu(de->rec_len) >= rec_len) ||
--                  (le16_to_cpu(de->rec_len) >=
--                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
--                      BUFFER_TRACE(bh, "get_write_access");
--                      ext3_journal_get_write_access(handle, bh);
--                      /* By now the buffer is marked for journaling */
--                      offset += le16_to_cpu(de->rec_len);
--                      if (le32_to_cpu(de->inode)) {
--                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de1->rec_len =
--                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de->rec_len = cpu_to_le16(
--                                              EXT3_DIR_REC_LEN(de->name_len));
--                              de = de1;
-+
-+                bh2 = ext3_append (handle, dir, &newblock, &err);
-+                if (!(bh2))
-+                        goto cleanup;
-+                node2 = (struct dx_node *)(bh2->b_data);
-+                entries2 = node2->entries;
-+                node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
-+                node2->fake.inode = 0;
-+                BUFFER_TRACE(frame->bh, "get_write_access");
-+                err = ext3_journal_get_write_access(handle, frame->bh);
-+                if (err)
-+                        goto journal_error;
-+                if (levels) {
-+                        unsigned icount1 = icount/2, icount2 = icount - icount1;
-+                      unsigned hash2 = dx_get_hash(entries + icount1);
-+                        dxtrace(printk("Split index %i/%i\n", icount1, icount2));
-+
-+                        BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-+                        err = ext3_journal_get_write_access(handle,
-+                                                            frames[0].bh);
-+                        if (err)
-+                                goto journal_error;
-+
-+                        memcpy ((char *) entries2, (char *) (entries + icount1),+                               icount2 * sizeof(struct dx_entry));
-+                        dx_set_count (entries, icount1);
-+                        dx_set_count (entries2, icount2);
-+                        dx_set_limit (entries2, dx_node_limit(dir));
-+
-+                        /* Which index block gets the new entry? */
-+                        if (at - entries >= icount1) {
-+                                frame->at = at = at - entries - icount1 + entries2;
-+                                frame->entries = entries = entries2;
-+                                swap(frame->bh, bh2);
-                       }
--                      de->file_type = EXT3_FT_UNKNOWN;
--                      if (inode) {
--                              de->inode = cpu_to_le32(inode->i_ino);
--                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
--                      } else
--                              de->inode = 0;
--                      de->name_len = namelen;
--                      memcpy (de->name, name, namelen);
--                      /*
--                       * XXX shouldn't update any times until successful
--                       * completion of syscall, but too many callers depend
--                       * on this.
--                       *
--                       * XXX similarly, too many callers depend on
--                       * ext3_new_inode() setting the times, but error
--                       * recovery deletes the inode, so the worst that can
--                       * happen is that the times are slightly out of date
--                       * and/or different from the directory change time.
--                       */
--                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
--                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                      ext3_mark_inode_dirty(handle, dir);
--                      dir->i_version = ++event;
--                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
--                      ext3_journal_dirty_metadata(handle, bh);
--                      brelse(bh);
--                      return 0;
-+                        dx_insert_block (frames + 0, hash2, newblock);
-+                        dxtrace(dx_show_index ("node", frames[1].entries));
-+                        dxtrace(dx_show_index ("node",
-+                               ((struct dx_node *) bh2->b_data)->entries));
-+                        err = ext3_journal_dirty_metadata(handle, bh2);
-+                        if (err)
-+                                goto journal_error;
-+                        brelse (bh2);
-+                } else {
-+                        dxtrace(printk("Creating second level index...\n"));
-+                        memcpy((char *) entries2, (char *) entries,
-+                               icount * sizeof(struct dx_entry));
-+                        dx_set_limit(entries2, dx_node_limit(dir));
-+
-+                        /* Set up root */
-+                        dx_set_count(entries, 1);
-+                        dx_set_block(entries + 0, newblock);
-+                        ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-+
-+                        /* Add new access path frame */
-+                        frame = frames + 1;
-+                        frame->at = at = at - entries + entries2;
-+                        frame->entries = entries = entries2;
-+                        frame->bh = bh2;
-+                        err = ext3_journal_get_write_access(handle,
-+                                                            frame->bh);
-+                        if (err)
-+                                goto journal_error;
-               }
--              offset += le16_to_cpu(de->rec_len);
--              de = (struct ext3_dir_entry_2 *)
--                      ((char *) de + le16_to_cpu(de->rec_len));
-+                ext3_journal_dirty_metadata(handle, frames[0].bh);
-       }
--      brelse (bh);
--      return -ENOSPC;
-+        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
-+        if (!de)
-+                goto cleanup;
-+        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
-+        bh = 0;
-+        goto cleanup;
-+
-+journal_error:
-+        ext3_std_error(dir->i_sb, err);
-+cleanup:
-+        if (bh)
-+                brelse(bh);
-+        dx_release(frames);
-+        return err;
- }
-+#endif
- /*
-  * ext3_delete_entry deletes a directory entry by merging it with the
-@@ -453,9 +1533,11 @@
-       struct inode * inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -480,9 +1562,11 @@
-       struct inode *inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -512,9 +1596,11 @@
-       if (dir->i_nlink >= EXT3_LINK_MAX)
-               return -EMLINK;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -526,7 +1612,8 @@
-       inode->i_op = &ext3_dir_inode_operations;
-       inode->i_fop = &ext3_dir_operations;
--      inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
-+        inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-+        inode->i_blocks = 0;
-       dir_block = ext3_bread (handle, inode, 0, 1, &err);
-       if (!dir_block) {
-               inode->i_nlink--; /* is this nlink == 0? */
-@@ -555,21 +1642,19 @@
-       brelse (dir_block);
-       ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_entry (handle, dentry, inode);
--      if (err)
--              goto out_no_entry;
-+      if (err) {
-+              inode->i_nlink = 0;
-+              ext3_mark_inode_dirty(handle, inode);
-+              iput (inode);
-+              goto out_stop;
-+      }
-       dir->i_nlink++;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       d_instantiate(dentry, inode);
- out_stop:
-       ext3_journal_stop(handle, dir);
-       return err;
--
--out_no_entry:
--      inode->i_nlink = 0;
--      ext3_mark_inode_dirty(handle, inode);
--      iput (inode);
--      goto out_stop;
- }
- /*
-@@ -656,7 +1741,7 @@
-       int err = 0, rc;
-       
-       lock_super(sb);
--      if (!list_empty(&inode->u.ext3_i.i_orphan))
-+      if (!list_empty(&EXT3_I(inode)->i_orphan))
-               goto out_unlock;
-       /* Orphan handling is only valid for files with data blocks
-@@ -697,7 +1782,7 @@
-        * This is safe: on error we're going to ignore the orphan list
-        * anyway on the next recovery. */
-       if (!err)
--              list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
-+              list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-       jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
-       jbd_debug(4, "orphan inode %ld will point to %d\n",
-@@ -715,25 +1800,26 @@
- int ext3_orphan_del(handle_t *handle, struct inode *inode)
- {
-       struct list_head *prev;
-+        struct ext3_inode_info *ei = EXT3_I(inode);
-       struct ext3_sb_info *sbi;
-       ino_t ino_next; 
-       struct ext3_iloc iloc;
-       int err = 0;
-       
-       lock_super(inode->i_sb);
--      if (list_empty(&inode->u.ext3_i.i_orphan)) {
-+        if (list_empty(&ei->i_orphan)) {
-               unlock_super(inode->i_sb);
-               return 0;
-       }
-       ino_next = NEXT_ORPHAN(inode);
--      prev = inode->u.ext3_i.i_orphan.prev;
-+        prev = ei->i_orphan.prev;
-       sbi = EXT3_SB(inode->i_sb);
-       jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
--      list_del(&inode->u.ext3_i.i_orphan);
--      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
-+        list_del(&ei->i_orphan);
-+        INIT_LIST_HEAD(&ei->i_orphan);
-       /* If we're on an error path, we may not have a valid
-        * transaction handle with which to update the orphan list on
-@@ -794,8 +1880,9 @@
-       handle_t *handle;
-       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       retval = -ENOENT;
-       bh = ext3_find_entry (dentry, &de);
-@@ -833,7 +1920,7 @@
-       ext3_mark_inode_dirty(handle, inode);
-       dir->i_nlink--;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+        ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
- end_rmdir:
-@@ -851,8 +1938,9 @@
-       handle_t *handle;
-       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -879,7 +1967,7 @@
-       if (retval)
-               goto end_unlink;
-       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       inode->i_nlink--;
-       if (!inode->i_nlink)
-@@ -905,9 +1993,11 @@
-       if (l > dir->i_sb->s_blocksize)
-               return -ENAMETOOLONG;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -917,7 +2007,7 @@
-       if (IS_ERR(inode))
-               goto out_stop;
--      if (l > sizeof (inode->u.ext3_i.i_data)) {
-+        if (l > sizeof (EXT3_I(inode)->i_data)) {
-               inode->i_op = &ext3_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
-               /*
-@@ -926,25 +2016,23 @@
-                * i_size in generic_commit_write().
-                */
-               err = block_symlink(inode, symname, l);
--              if (err)
--                      goto out_no_entry;
-+                if (err) {
-+                        ext3_dec_count(handle, inode);
-+                        ext3_mark_inode_dirty(handle, inode);
-+                        iput (inode);
-+                        goto out_stop;
-+                }
-       } else {
-               inode->i_op = &ext3_fast_symlink_inode_operations;
--              memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
-+                memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
-               inode->i_size = l-1;
-       }
--      inode->u.ext3_i.i_disksize = inode->i_size;
-+        EXT3_I(inode)->i_disksize = inode->i_size;
-       ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_nondir(handle, dentry, inode);
- out_stop:
-       ext3_journal_stop(handle, dir);
-       return err;
--
--out_no_entry:
--      ext3_dec_count(handle, inode);
--      ext3_mark_inode_dirty(handle, inode);
--      iput (inode);
--      goto out_stop;
- }
- static int ext3_link (struct dentry * old_dentry,
-@@ -957,12 +2045,15 @@
-       if (S_ISDIR(inode->i_mode))
-               return -EPERM;
--      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (inode->i_nlink >= EXT3_LINK_MAX) {
-               return -EMLINK;
-+      }
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -996,9 +2087,11 @@
-       old_bh = new_bh = dir_bh = NULL;
--      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
-               handle->h_sync = 1;
-@@ -1078,7 +2171,7 @@
-               new_inode->i_ctime = CURRENT_TIME;
-       }
-       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
--      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(old_dir);
-       if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext3_journal_get_write_access(handle, dir_bh);
-@@ -1090,7 +2183,7 @@
-                       new_inode->i_nlink--;
-               } else {
-                       new_dir->i_nlink++;
--                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                      ext3_update_dx_flag(new_dir);
-                       ext3_mark_inode_dirty(handle, new_dir);
-               }
-       }
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     2004-05-27 11:07:21.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  2004-05-27 11:08:28.000000000 -0700
-@@ -741,6 +741,7 @@
-       es->s_mtime = cpu_to_le32(CURRENT_TIME);
-       ext3_update_dynamic_rev(sb);
-       EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+
-       ext3_commit_super (sb, es, 1);
-       if (test_opt (sb, DEBUG))
-               printk (KERN_INFO
-@@ -751,6 +752,7 @@
-                       EXT3_BLOCKS_PER_GROUP(sb),
-                       EXT3_INODES_PER_GROUP(sb),
-                       sbi->s_mount_opt);
-+
-       printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
-                               bdevname(sb->s_dev));
-       if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
-@@ -925,6 +927,7 @@
-       return res;
- }
-+
- struct super_block * ext3_read_super (struct super_block * sb, void * data,
-                                     int silent)
- {
-@@ -1113,6 +1116,9 @@
-       sbi->s_mount_state = le16_to_cpu(es->s_state);
-       sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-       sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
-+      for (i=0; i < 4; i++)
-+              sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
-+      sbi->s_def_hash_version = es->s_def_hash_version;
-       if (sbi->s_blocks_per_group > blocksize * 8) {
-               printk (KERN_ERR
-@@ -1821,6 +1827,7 @@
-       exit_ext3_xattr();
- }
-+EXPORT_SYMBOL(ext3_force_commit);
- EXPORT_SYMBOL(ext3_bread);
- MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-Index: linux-2.4.19.SuSE/fs/ext3/file.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/file.c      2002-12-04 09:46:18.000000000 -0800
-+++ linux-2.4.19.SuSE/fs/ext3/file.c   2004-05-27 11:08:28.000000000 -0700
-@@ -38,6 +38,9 @@
- {
-       if (filp->f_mode & FMODE_WRITE)
-               ext3_discard_prealloc (inode);
-+      if (is_dx(inode) && filp->private_data)
-+              ext3_htree_free_dir_info(filp->private_data);
-+
-       return 0;
- }
-Index: linux-2.4.19.SuSE/fs/ext3/hash.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/hash.c      1970-01-02 14:15:01.000000000 -0800
-+++ linux-2.4.19.SuSE/fs/ext3/hash.c   2004-05-27 11:08:28.000000000 -0700
-@@ -0,0 +1,215 @@
-+/*
-+ *  linux/fs/ext3/hash.c
-+ *
-+ * Copyright (C) 2002 by Theodore Ts'o
-+ *
-+ * This file is released under the GPL v2.
-+ * 
-+ * This file may be redistributed under the terms of the GNU Public
-+ * License.
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/sched.h>
-+#include <linux/ext3_fs.h>
-+
-+#define DELTA 0x9E3779B9
-+
-+static void TEA_transform(__u32 buf[4], __u32 const in[])
-+{
-+      __u32   sum = 0;
-+      __u32   b0 = buf[0], b1 = buf[1];
-+      __u32   a = in[0], b = in[1], c = in[2], d = in[3];
-+      int     n = 16;
-+
-+      do {                                                    
-+              sum += DELTA;                                   
-+              b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 
-+              b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 
-+      } while(--n);
-+
-+      buf[0] += b0;
-+      buf[1] += b1;
-+}
-+
-+/* F, G and H are basic MD4 functions: selection, majority, parity */
-+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
-+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
-+#define H(x, y, z) ((x) ^ (y) ^ (z))
-+
-+/*
-+ * The generic round function.  The application is so specific that
-+ * we don't bother protecting all the arguments with parens, as is generally
-+ * good macro practice, in favor of extra legibility.
-+ * Rotation is separate from addition to prevent recomputation
-+ */
-+#define ROUND(f, a, b, c, d, x, s)    \
-+      (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s)))
-+#define K1 0
-+#define K2 013240474631UL
-+#define K3 015666365641UL
-+
-+/*
-+ * Basic cut-down MD4 transform.  Returns only 32 bits of result.
-+ */
-+static void halfMD4Transform (__u32 buf[4], __u32 const in[])
-+{
-+      __u32   a = buf[0], b = buf[1], c = buf[2], d = buf[3];
-+
-+      /* Round 1 */
-+      ROUND(F, a, b, c, d, in[0] + K1,  3);
-+      ROUND(F, d, a, b, c, in[1] + K1,  7);
-+      ROUND(F, c, d, a, b, in[2] + K1, 11);
-+      ROUND(F, b, c, d, a, in[3] + K1, 19);
-+      ROUND(F, a, b, c, d, in[4] + K1,  3);
-+      ROUND(F, d, a, b, c, in[5] + K1,  7);
-+      ROUND(F, c, d, a, b, in[6] + K1, 11);
-+      ROUND(F, b, c, d, a, in[7] + K1, 19);
-+
-+      /* Round 2 */
-+      ROUND(G, a, b, c, d, in[1] + K2,  3);
-+      ROUND(G, d, a, b, c, in[3] + K2,  5);
-+      ROUND(G, c, d, a, b, in[5] + K2,  9);
-+      ROUND(G, b, c, d, a, in[7] + K2, 13);
-+      ROUND(G, a, b, c, d, in[0] + K2,  3);
-+      ROUND(G, d, a, b, c, in[2] + K2,  5);
-+      ROUND(G, c, d, a, b, in[4] + K2,  9);
-+      ROUND(G, b, c, d, a, in[6] + K2, 13);
-+
-+      /* Round 3 */
-+      ROUND(H, a, b, c, d, in[3] + K3,  3);
-+      ROUND(H, d, a, b, c, in[7] + K3,  9);
-+      ROUND(H, c, d, a, b, in[2] + K3, 11);
-+      ROUND(H, b, c, d, a, in[6] + K3, 15);
-+      ROUND(H, a, b, c, d, in[1] + K3,  3);
-+      ROUND(H, d, a, b, c, in[5] + K3,  9);
-+      ROUND(H, c, d, a, b, in[0] + K3, 11);
-+      ROUND(H, b, c, d, a, in[4] + K3, 15);
-+
-+      buf[0] += a;
-+      buf[1] += b;
-+      buf[2] += c;
-+      buf[3] += d;
-+}
-+
-+#undef ROUND
-+#undef F
-+#undef G
-+#undef H
-+#undef K1
-+#undef K2
-+#undef K3
-+
-+/* The old legacy hash */
-+static __u32 dx_hack_hash (const char *name, int len)
-+{
-+      __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-+      while (len--) {
-+              __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
-+              
-+              if (hash & 0x80000000) hash -= 0x7fffffff;
-+              hash1 = hash0;
-+              hash0 = hash;
-+      }
-+      return (hash0 << 1);
-+}
-+
-+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
-+{
-+      __u32   pad, val;
-+      int     i;
-+
-+      pad = (__u32)len | ((__u32)len << 8);
-+      pad |= pad << 16;
-+
-+      val = pad;
-+      if (len > num*4)
-+              len = num * 4;
-+      for (i=0; i < len; i++) {
-+              if ((i % 4) == 0)
-+                      val = pad;
-+              val = msg[i] + (val << 8);
-+              if ((i % 4) == 3) {
-+                      *buf++ = val;
-+                      val = pad;
-+                      num--;
-+              }
-+      }
-+      if (--num >= 0)
-+              *buf++ = val;
-+      while (--num >= 0)
-+              *buf++ = pad;
-+}
-+
-+/*
-+ * Returns the hash of a filename.  If len is 0 and name is NULL, then
-+ * this function can be used to test whether or not a hash version is
-+ * supported.
-+ * 
-+ * The seed is an 4 longword (32 bits) "secret" which can be used to
-+ * uniquify a hash.  If the seed is all zero's, then some default seed
-+ * may be used.
-+ * 
-+ * A particular hash version specifies whether or not the seed is
-+ * represented, and whether or not the returned hash is 32 bits or 64
-+ * bits.  32 bit hashes will return 0 for the minor hash.
-+ */
-+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
-+{
-+      __u32   hash;
-+      __u32   minor_hash = 0;
-+      const char      *p;
-+      int             i;
-+      __u32           in[8], buf[4];
-+
-+      /* Initialize the default seed for the hash checksum functions */
-+      buf[0] = 0x67452301;
-+      buf[1] = 0xefcdab89;
-+      buf[2] = 0x98badcfe;
-+      buf[3] = 0x10325476;
-+
-+      /* Check to see if the seed is all zero's */
-+      if (hinfo->seed) {
-+              for (i=0; i < 4; i++) {
-+                      if (hinfo->seed[i])
-+                              break;
-+              }
-+              if (i < 4)
-+                      memcpy(buf, hinfo->seed, sizeof(buf));
-+      }
-+              
-+      switch (hinfo->hash_version) {
-+      case DX_HASH_LEGACY:
-+              hash = dx_hack_hash(name, len);
-+              break;
-+      case DX_HASH_HALF_MD4:
-+              p = name;
-+              while (len > 0) {
-+                      str2hashbuf(p, len, in, 8);
-+                      halfMD4Transform(buf, in);
-+                      len -= 32;
-+                      p += 32;
-+              }
-+              minor_hash = buf[2];
-+              hash = buf[1];
-+              break;
-+      case DX_HASH_TEA:
-+              p = name;
-+              while (len > 0) {
-+                      str2hashbuf(p, len, in, 4);
-+                      TEA_transform(buf, in);
-+                      len -= 16;
-+                      p += 16;
-+              }
-+              hash = buf[0];
-+              minor_hash = buf[1];
-+              break;
-+      default:
-+              hinfo->hash = 0;
-+              return -1;
-+      }
-+      hinfo->hash = hash & ~1;
-+      hinfo->minor_hash = minor_hash;
-+      return 0;
-+}
-Index: linux-2.4.19.SuSE/lib/rbtree.c
-===================================================================
---- linux-2.4.19.SuSE.orig/lib/rbtree.c        2002-08-02 17:39:46.000000000 -0700
-+++ linux-2.4.19.SuSE/lib/rbtree.c     2004-05-27 11:08:28.000000000 -0700
-@@ -17,6 +17,8 @@
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-   linux/lib/rbtree.c
-+
-+  rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002
- */
- #include <linux/rbtree.h>
-@@ -294,3 +296,43 @@
-               __rb_erase_color(child, parent, root);
- }
- EXPORT_SYMBOL(rb_erase);
-+
-+/*
-+ * This function returns the first node (in sort order) of the tree.
-+ */
-+rb_node_t *rb_get_first(rb_root_t *root)
-+{
-+      rb_node_t       *n;
-+
-+      n = root->rb_node;
-+      if (!n)
-+              return 0;
-+      while (n->rb_left)
-+              n = n->rb_left;
-+      return n;
-+}
-+EXPORT_SYMBOL(rb_get_first);
-+
-+/*
-+ * Given a node, this function will return the next node in the tree.
-+ */
-+rb_node_t *rb_get_next(rb_node_t *n)
-+{
-+      rb_node_t       *parent;
-+
-+      if (n->rb_right) {
-+              n = n->rb_right;
-+              while (n->rb_left)
-+                      n = n->rb_left;
-+              return n;
-+      } else {
-+              while ((parent = n->rb_parent)) {
-+                      if (n == parent->rb_left)
-+                              return parent;
-+                      n = parent;
-+              }
-+              return 0;
-+      }
-+}
-+EXPORT_SYMBOL(rb_get_next);
-+
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     2003-10-05 09:30:34.000000000 -0700
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  2004-05-27 11:08:28.000000000 -0700
-@@ -40,6 +40,11 @@
- #define EXT3FS_VERSION                "2.4-0.9.18"
- /*
-+ * Always enable hashed directories
-+ */
-+#define CONFIG_EXT3_INDEX
-+
-+/*
-  * Debug code
-  */
- #ifdef EXT3FS_DEBUG
-@@ -414,8 +419,11 @@
- /*E0*/        __u32   s_journal_inum;         /* inode number of journal file */
-       __u32   s_journal_dev;          /* device number of journal file */
-       __u32   s_last_orphan;          /* start of list of inodes to delete */
--
--/*EC*/        __u32   s_reserved[197];        /* Padding to the end of the block */
-+      __u32   s_hash_seed[4];         /* HTREE hash seed */
-+      __u8    s_def_hash_version;     /* Default hash version to use */
-+      __u8    s_reserved_char_pad;
-+      __u16   s_reserved_word_pad;
-+      __u32   s_reserved[192];        /* Padding to the end of the block */
- };
- #ifdef __KERNEL__
-@@ -552,9 +560,46 @@
- #define EXT3_DIR_ROUND                        (EXT3_DIR_PAD - 1)
- #define EXT3_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT3_DIR_ROUND) & \
-                                        ~EXT3_DIR_ROUND)
-+/*
-+ * Hash Tree Directory indexing
-+ * (c) Daniel Phillips, 2001
-+ */
-+
-+#ifdef CONFIG_EXT3_INDEX
-+  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+                    (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
-+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#else
-+  #define is_dx(dir) 0
-+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
-+#endif
-+
-+/* Legal values for the dx_root hash_version field: */
-+
-+#define DX_HASH_LEGACY                0
-+#define DX_HASH_HALF_MD4      1
-+#define DX_HASH_TEA           2
-+
-+/* hash info structure used by the directory hash */
-+struct dx_hash_info
-+{
-+      u32             hash;
-+      u32             minor_hash;
-+      int             hash_version;
-+      u32             *seed;
-+};
- #ifdef __KERNEL__
- /*
-+ * Control parameters used by ext3_htree_next_block
-+ */
-+#define HASH_NB_ALWAYS                1
-+
-+
-+/*
-  * Describe an inode's exact location on disk and in memory
-  */
- struct ext3_iloc
-@@ -564,6 +609,27 @@
-       unsigned long block_group;
- };
-+
-+/*
-+ * This structure is stuffed into the struct file's private_data field
-+ * for directories.  It is where we put information so that we can do
-+ * readdir operations in hash tree order.
-+ */
-+struct dir_private_info {
-+      rb_root_t       root;
-+      rb_node_t       *curr_node;
-+      struct fname    *extra_fname;
-+      loff_t          last_pos;
-+      __u32           curr_hash;
-+      __u32           curr_minor_hash;
-+      __u32           next_hash;
-+};
-+
-+/*
-+ * Special error return code only used by dx_probe() and its callers.
-+ */
-+#define ERR_BAD_DX_DIR        -75000
-+
- /*
-  * Function prototypes
-  */
-@@ -591,11 +657,20 @@
- /* dir.c */
- extern int ext3_check_dir_entry(const char *, struct inode *,
--                              struct ext3_dir_entry_2 *, struct buffer_head *,
--                              unsigned long);
-+                              struct ext3_dir_entry_2 *,
-+                              struct buffer_head *, unsigned long);
-+extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
-+                                  __u32 minor_hash,
-+                                  struct ext3_dir_entry_2 *dirent);
-+extern void ext3_htree_free_dir_info(struct dir_private_info *p);
-+
- /* fsync.c */
- extern int ext3_sync_file (struct file *, struct dentry *, int);
-+/* hash.c */
-+extern int ext3fs_dirhash(const char *name, int len, struct
-+                        dx_hash_info *hinfo);
-+
- /* ialloc.c */
- extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
- extern void ext3_free_inode (handle_t *, struct inode *);
-@@ -628,6 +703,8 @@
- /* namei.c */
- extern int ext3_orphan_add(handle_t *, struct inode *);
- extern int ext3_orphan_del(handle_t *, struct inode *);
-+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-+                              __u32 start_minor_hash, __u32 *next_hash);
- /* super.c */
- extern void ext3_error (struct super_block *, const char *, const char *, ...)
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs_sb.h  2003-10-05 09:16:36.000000000 -0700
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h       2004-05-27 11:08:28.000000000 -0700
-@@ -62,6 +62,8 @@
-       int s_inode_size;
-       int s_first_ino;
-       u32 s_next_generation;
-+      u32 s_hash_seed[4];
-+      int s_def_hash_version;
-       /* Journaling */
-       struct inode * s_journal_inode;
-Index: linux-2.4.19.SuSE/include/linux/ext3_jbd.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_jbd.h    2003-10-05 09:30:34.000000000 -0700
-+++ linux-2.4.19.SuSE/include/linux/ext3_jbd.h 2004-05-27 11:08:28.000000000 -0700
-@@ -69,6 +69,8 @@
- #define EXT3_RESERVE_TRANS_BLOCKS     12
-+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
-+
- int
- ext3_mark_iloc_dirty(handle_t *handle, 
-                    struct inode *inode,
-Index: linux-2.4.19.SuSE/include/linux/rbtree.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/rbtree.h      2003-10-05 09:16:36.000000000 -0700
-+++ linux-2.4.19.SuSE/include/linux/rbtree.h   2004-05-27 11:08:28.000000000 -0700
-@@ -120,6 +120,8 @@
- extern void rb_insert_color(rb_node_t *, rb_root_t *);
- extern void rb_erase(rb_node_t *, rb_root_t *);
-+extern rb_node_t *rb_get_first(rb_root_t *root);
-+extern rb_node_t *rb_get_next(rb_node_t *n);
- static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link)
- {
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.19-suse.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.19-suse.patch
deleted file mode 100644 (file)
index 4bcefce..0000000
+++ /dev/null
@@ -1,481 +0,0 @@
- fs/ext3/file.c             |    4 
- fs/ext3/inode.c            |  116 ++++++++++++++++++++++
- fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    5 
- include/linux/ext3_fs_sb.h |   10 +
- 5 files changed, 365 insertions(+)
-
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 01:18:04 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:19:22 2003
-@@ -401,6 +401,220 @@
-       }
- }
-+#ifdef EXT3_DELETE_THREAD
-+/*
-+ * Delete inodes in a loop until there are no more to be deleted.
-+ * Normally, we run in the background doing the deletes and sleeping again,
-+ * and clients just add new inodes to be deleted onto the end of the list.
-+ * If someone is concerned about free space (e.g. block allocation or similar)
-+ * then they can sleep on s_delete_waiter_queue and be woken up when space
-+ * has been freed.
-+ */
-+int ext3_delete_thread(void *data)
-+{
-+      struct super_block *sb = data;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct task_struct *tsk = current;
-+
-+      /* Almost like daemonize, but not quite */
-+      exit_mm(current);
-+      tsk->session = 1;
-+      tsk->pgrp = 1;
-+      tsk->tty = NULL;
-+      exit_files(current);
-+      reparent_to_init();
-+
-+      sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
-+      sigfillset(&tsk->blocked);
-+
-+      /*tsk->flags |= PF_KERNTHREAD;*/
-+
-+      INIT_LIST_HEAD(&sbi->s_delete_list);
-+      wake_up(&sbi->s_delete_waiter_queue);
-+      ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
-+
-+      /* main loop */
-+      for (;;) {
-+              wait_event_interruptible(sbi->s_delete_thread_queue,
-+                                       !list_empty(&sbi->s_delete_list) ||
-+                                       !test_opt(sb, ASYNCDEL));
-+              ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
-+                         tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
-+
-+              spin_lock(&sbi->s_delete_lock);
-+              if (list_empty(&sbi->s_delete_list)) {
-+                      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+                      memset(&sbi->s_delete_list, 0,
-+                             sizeof(sbi->s_delete_list));
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("delete thread on %s exiting\n",
-+                                 kdevname(sb->s_dev));
-+                      wake_up(&sbi->s_delete_waiter_queue);
-+                      break;
-+              }
-+
-+              while (!list_empty(&sbi->s_delete_list)) {
-+                      struct inode *inode=list_entry(sbi->s_delete_list.next,
-+                                                     struct inode, i_dentry);
-+                      unsigned long blocks = inode->i_blocks >>
-+                                                      (inode->i_blkbits - 9);
-+
-+                      list_del_init(&inode->i_dentry);
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("%s delete ino %lu blk %lu\n",
-+                                 tsk->comm, inode->i_ino, blocks);
-+
-+                      iput(inode);
-+
-+                      spin_lock(&sbi->s_delete_lock);
-+                      sbi->s_delete_blocks -= blocks;
-+                      sbi->s_delete_inodes--;
-+              }
-+              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                                   "%lu blocks, %lu inodes on list?\n",
-+                                   sbi->s_delete_blocks,sbi->s_delete_inodes);
-+                      sbi->s_delete_blocks = 0;
-+                      sbi->s_delete_inodes = 0;
-+              }
-+              spin_unlock(&sbi->s_delete_lock);
-+              wake_up(&sbi->s_delete_waiter_queue);
-+      }
-+
-+      return 0;
-+}
-+
-+static void ext3_start_delete_thread(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int rc;
-+
-+      spin_lock_init(&sbi->s_delete_lock);
-+      init_waitqueue_head(&sbi->s_delete_thread_queue);
-+      init_waitqueue_head(&sbi->s_delete_waiter_queue);
-+
-+      if (!test_opt(sb, ASYNCDEL))
-+              return;
-+
-+      rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
-+      if (rc < 0)
-+              printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
-+                     rc);
-+      else
-+              wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
-+}
-+
-+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
-+{
-+      if (sbi->s_delete_list.next == 0)       /* thread never started */
-+              return;
-+
-+      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+      wake_up(&sbi->s_delete_thread_queue);
-+      wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
-+}
-+
-+/* Instead of playing games with the inode flags, destruction, etc we just
-+ * create a new inode locally and put it on a list for the truncate thread.
-+ * We need large parts of the inode struct in order to complete the
-+ * truncate and unlink, so we may as well just have a real inode to do it.
-+ *
-+ * If we have any problem deferring the delete, just delete it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+static void ext3_delete_inode_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (is_bad_inode(old_inode)) {
-+              clear_inode(old_inode);
-+              return;
-+      }
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_delete;
-+
-+      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
-+              goto out_delete;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_delete;
-+      }
-+
-+      /* We can iget this inode again here, because our caller has unhashed
-+       * old_inode, so new_inode will be in a different inode struct.
-+       *
-+       * We need to ensure that the i_orphan pointers in the other inodes
-+       * point at the new inode copy instead of the old one so the orphan
-+       * list doesn't get corrupted when the old orphan inode is freed.
-+       */
-+      down(&sbi->s_orphan_lock);
-+
-+      sbi->s_mount_state |= EXT3_ORPHAN_FS;
-+      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
-+      if (is_bad_inode(new_inode)) {
-+              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
-+              iput(new_inode);
-+              new_inode = NULL;
-+      }
-+      if (!new_inode) {
-+              up(&sbi->s_orphan_lock);
-+              ext3_debug("delete inode %lu directly (bad read)\n",
-+                         old_inode->i_ino);
-+              goto out_delete;
-+      }
-+      J_ASSERT(new_inode != old_inode);
-+
-+      J_ASSERT(!list_empty(&oei->i_orphan));
-+
-+      nei = EXT3_I(new_inode);
-+      /* Ugh.  We need to insert new_inode into the same spot on the list
-+       * as old_inode was, to ensure the in-memory orphan list is still
-+       * in the same order as the on-disk orphan list (badness otherwise).
-+       */
-+      nei->i_orphan = oei->i_orphan;
-+      nei->i_orphan.next->prev = &nei->i_orphan;
-+      nei->i_orphan.prev->next = &nei->i_orphan;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up(&sbi->s_orphan_lock);
-+
-+      clear_inode(old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_delete:
-+      ext3_delete_inode(old_inode);
-+}
-+#else
-+#define ext3_start_delete_thread(sbi) do {} while(0)
-+#define ext3_stop_delete_thread(sbi) do {} while(0)
-+#endif /* EXT3_DELETE_THREAD */
-+
- void ext3_put_super (struct super_block * sb)
- {
-       struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -408,6 +622,7 @@
-       kdev_t j_dev = sbi->s_journal->j_dev;
-       int i;
-+      ext3_stop_delete_thread(sbi);
-       ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-       if (!(sb->s_flags & MS_RDONLY)) {
-@@ -476,7 +691,11 @@
-       write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
-       dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
-       put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
-+#ifdef EXT3_DELETE_THREAD
-+      delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
-+#else
-       delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
-+#endif
-       put_super:      ext3_put_super,         /* BKL held */
-       write_super:    ext3_write_super,       /* BKL held */
-       sync_fs:        ext3_sync_fs,
-@@ -553,6 +772,13 @@
-                       clear_opt (*mount_options, POSIX_ACL);
-               else
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+              if (!strcmp(this_char, "asyncdel"))
-+                      set_opt(*mount_options, ASYNCDEL);
-+              else if (!strcmp(this_char, "noasyncdel"))
-+                      clear_opt(*mount_options, ASYNCDEL);
-+              else
-+#endif
-               if (!strcmp (this_char, "bsddf"))
-                       clear_opt (*mount_options, MINIX_DF);
-               else if (!strcmp (this_char, "nouid32")) {
-@@ -1254,6 +1480,7 @@
-       }
-       ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
-+      ext3_start_delete_thread(sb);
-       /*
-        * akpm: core read_super() calls in here with the superblock locked.
-        * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1692,6 +1919,9 @@
-       if (!parse_options(data, &tmp, sbi, &tmp, 1))
-               return -EINVAL;
-+      if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
-+              ext3_stop_delete_thread(sbi);
-+
-       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-               ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     Sun Nov 16 01:02:56 2003
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  Sun Nov 16 01:19:22 2003
-@@ -2114,6 +2114,118 @@
-       ext3_journal_stop(handle, inode);
- }
-+#ifdef EXT3_DELETE_THREAD
-+/* Move blocks from to-be-truncated inode over to a new inode, and delete
-+ * that one from the delete thread instead.  This avoids a lot of latency
-+ * when truncating large files.
-+ *
-+ * If we have any problem deferring the truncate, just truncate it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+void ext3_truncate_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      handle_t *handle;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_truncate;
-+
-+      /* XXX This is a temporary limitation for code simplicity.
-+       *     We could truncate to arbitrary sizes at some later time.
-+       */
-+      if (old_inode->i_size != 0)
-+              goto out_truncate;
-+
-+      /* We may want to truncate the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          old_inode->i_size > oei->i_disksize)
-+              goto out_truncate;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_truncate;
-+      }
-+
-+      ext3_discard_prealloc(old_inode);
-+
-+      /* old_inode   = 1
-+       * new_inode   = sb + GDT + ibitmap
-+       * orphan list = 1 inode/superblock for add, 2 inodes for del
-+       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
-+       */
-+      handle = ext3_journal_start(old_inode, 7);
-+      if (IS_ERR(handle))
-+              goto out_truncate;
-+
-+      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
-+      if (IS_ERR(new_inode)) {
-+              ext3_debug("truncate inode %lu directly (no new inodes)\n",
-+                         old_inode->i_ino);
-+              goto out_journal;
-+      }
-+
-+      nei = EXT3_I(new_inode);
-+
-+      down_write(&oei->truncate_sem);
-+      new_inode->i_size = old_inode->i_size;
-+      new_inode->i_blocks = old_inode->i_blocks;
-+      new_inode->i_uid = old_inode->i_uid;
-+      new_inode->i_gid = old_inode->i_gid;
-+      new_inode->i_nlink = 0;
-+
-+      /* FIXME when we do arbitrary truncates */
-+      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
-+      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
-+
-+      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
-+      memset(oei->i_data, 0, sizeof(oei->i_data));
-+
-+      nei->i_disksize = oei->i_disksize;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up_write(&oei->truncate_sem);
-+
-+      if (ext3_orphan_add(handle, new_inode) < 0)
-+              goto out_journal;
-+
-+      if (ext3_orphan_del(handle, old_inode) < 0) {
-+              ext3_orphan_del(handle, new_inode);
-+              iput(new_inode);
-+              goto out_journal;
-+      }
-+
-+      ext3_journal_stop(handle, old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_journal:
-+      ext3_journal_stop(handle, old_inode);
-+out_truncate:
-+      ext3_truncate(old_inode);
-+}
-+#endif /* EXT3_DELETE_THREAD */
-+
- /* 
-  * ext3_get_inode_loc returns with an extra refcount against the
-  * inode's underlying buffer_head on success. 
-Index: linux-2.4.19.SuSE/fs/ext3/file.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/file.c      Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/file.c   Sun Nov 16 01:19:22 2003
-@@ -132,7 +132,11 @@
- };
- struct inode_operations ext3_file_inode_operations = {
-+#ifdef EXT3_DELETE_THREAD
-+      truncate:       ext3_truncate_thread,   /* BKL held */
-+#else
-       truncate:       ext3_truncate,          /* BKL held */
-+#endif
-       setattr:        ext3_setattr,           /* BKL held */
-       setxattr:       ext3_setxattr,          /* BKL held */
-       getxattr:       ext3_getxattr,          /* BKL held */
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     Sun Nov 16 01:02:51 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  Sun Nov 16 01:20:06 2003
-@@ -193,6 +193,7 @@
-  */
- #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
- #define EXT3_STATE_NEW                        0x00000002 /* inode is newly created */
-+#define EXT3_STATE_DELETE             0x00000010 /* deferred delete inode */
- /*
-  * ioctl commands
-@@ -321,6 +322,7 @@
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
- #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -695,6 +697,9 @@
- extern void ext3_dirty_inode(struct inode *);
- extern int ext3_change_inode_journal_flag(struct inode *, int);
- extern void ext3_truncate (struct inode *);
-+#ifdef EXT3_DELETE_THREAD
-+extern void ext3_truncate_thread(struct inode *inode);
-+#endif
- /* ioctl.c */
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs_sb.h  Sun Nov 16 01:18:41 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs_sb.h       Sun Nov 16 01:19:22 2003
-@@ -29,6 +29,8 @@
- #define EXT3_MAX_GROUP_LOADED 8
-+#define EXT3_DELETE_THREAD
-+
- /*
-  * third extended-fs super-block data in memory
-  */
-@@ -75,6 +77,14 @@
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+      spinlock_t s_delete_lock;
-+      struct list_head s_delete_list;
-+      unsigned long s_delete_blocks;
-+      unsigned long s_delete_inodes;
-+      wait_queue_head_t s_delete_thread_queue;
-+      wait_queue_head_t s_delete_waiter_queue;
-+#endif
- };
- #endif        /* _LINUX_EXT3_FS_SB */
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch
deleted file mode 100644 (file)
index ca05893..0000000
+++ /dev/null
@@ -1,541 +0,0 @@
- fs/ext3/file.c             |    4 
- fs/ext3/inode.c            |  116 ++++++++++++++++++++++
- fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    5 
- include/linux/ext3_fs_sb.h |   10 +
- 5 files changed, 365 insertions(+)
-
-Index: linux-2.4.20/fs/ext3/super.c
-===================================================================
---- linux-2.4.20.orig/fs/ext3/super.c  2004-01-12 20:13:37.000000000 +0300
-+++ linux-2.4.20/fs/ext3/super.c       2004-01-13 16:59:54.000000000 +0300
-@@ -48,6 +48,8 @@
- static void ext3_clear_journal_err(struct super_block * sb,
-                                  struct ext3_super_block * es);
-+static int ext3_sync_fs(struct super_block * sb);
-+
- #ifdef CONFIG_JBD_DEBUG
- int journal_no_write[2];
-@@ -398,6 +400,221 @@
-       }
- }
-+#ifdef EXT3_DELETE_THREAD
-+/*
-+ * Delete inodes in a loop until there are no more to be deleted.
-+ * Normally, we run in the background doing the deletes and sleeping again,
-+ * and clients just add new inodes to be deleted onto the end of the list.
-+ * If someone is concerned about free space (e.g. block allocation or similar)
-+ * then they can sleep on s_delete_waiter_queue and be woken up when space
-+ * has been freed.
-+ */
-+int ext3_delete_thread(void *data)
-+{
-+      struct super_block *sb = data;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct task_struct *tsk = current;
-+
-+      /* Almost like daemonize, but not quite */
-+      exit_mm(current);
-+      tsk->session = 1;
-+      tsk->pgrp = 1;
-+      tsk->tty = NULL;
-+      exit_files(current);
-+      reparent_to_init();
-+
-+      sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
-+      sigfillset(&tsk->blocked);
-+
-+      /*tsk->flags |= PF_KERNTHREAD;*/
-+
-+      INIT_LIST_HEAD(&sbi->s_delete_list);
-+      wake_up(&sbi->s_delete_waiter_queue);
-+      ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
-+
-+      /* main loop */
-+      for (;;) {
-+              wait_event_interruptible(sbi->s_delete_thread_queue,
-+                                       !list_empty(&sbi->s_delete_list) ||
-+                                       !test_opt(sb, ASYNCDEL));
-+              ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
-+                         tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
-+
-+              spin_lock(&sbi->s_delete_lock);
-+              if (list_empty(&sbi->s_delete_list)) {
-+                      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+                      memset(&sbi->s_delete_list, 0,
-+                             sizeof(sbi->s_delete_list));
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("delete thread on %s exiting\n",
-+                                 kdevname(sb->s_dev));
-+                      wake_up(&sbi->s_delete_waiter_queue);
-+                      break;
-+              }
-+
-+              while (!list_empty(&sbi->s_delete_list)) {
-+                      struct inode *inode=list_entry(sbi->s_delete_list.next,
-+                                                     struct inode, i_dentry);
-+                      unsigned long blocks = inode->i_blocks >>
-+                                                      (inode->i_blkbits - 9);
-+
-+                      list_del_init(&inode->i_dentry);
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("%s delete ino %lu blk %lu\n",
-+                                 tsk->comm, inode->i_ino, blocks);
-+
-+                      iput(inode);
-+
-+                      spin_lock(&sbi->s_delete_lock);
-+                      sbi->s_delete_blocks -= blocks;
-+                      sbi->s_delete_inodes--;
-+              }
-+              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                                   "%lu blocks, %lu inodes on list?\n",
-+                                   sbi->s_delete_blocks,sbi->s_delete_inodes);
-+                      sbi->s_delete_blocks = 0;
-+                      sbi->s_delete_inodes = 0;
-+              }
-+              spin_unlock(&sbi->s_delete_lock);
-+              wake_up(&sbi->s_delete_waiter_queue);
-+      }
-+
-+      return 0;
-+}
-+
-+static void ext3_start_delete_thread(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int rc;
-+
-+      spin_lock_init(&sbi->s_delete_lock);
-+      init_waitqueue_head(&sbi->s_delete_thread_queue);
-+      init_waitqueue_head(&sbi->s_delete_waiter_queue);
-+
-+      if (!test_opt(sb, ASYNCDEL))
-+              return;
-+
-+      rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
-+      if (rc < 0)
-+              printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
-+                     rc);
-+      else
-+              wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
-+}
-+
-+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
-+{
-+      if (sbi->s_delete_list.next == 0)       /* thread never started */
-+              return;
-+
-+      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+      wake_up(&sbi->s_delete_thread_queue);
-+      wait_event(sbi->s_delete_waiter_queue,
-+                      sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
-+}
-+
-+/* Instead of playing games with the inode flags, destruction, etc we just
-+ * create a new inode locally and put it on a list for the truncate thread.
-+ * We need large parts of the inode struct in order to complete the
-+ * truncate and unlink, so we may as well just have a real inode to do it.
-+ *
-+ * If we have any problem deferring the delete, just delete it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+static void ext3_delete_inode_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (is_bad_inode(old_inode)) {
-+              clear_inode(old_inode);
-+              return;
-+      }
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_delete;
-+
-+      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
-+              goto out_delete;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_delete;
-+      }
-+
-+      /* We can iget this inode again here, because our caller has unhashed
-+       * old_inode, so new_inode will be in a different inode struct.
-+       *
-+       * We need to ensure that the i_orphan pointers in the other inodes
-+       * point at the new inode copy instead of the old one so the orphan
-+       * list doesn't get corrupted when the old orphan inode is freed.
-+       */
-+      down(&sbi->s_orphan_lock);
-+
-+      sbi->s_mount_state |= EXT3_ORPHAN_FS;
-+      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
-+      if (is_bad_inode(new_inode)) {
-+              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
-+              iput(new_inode);
-+              new_inode = NULL;
-+      }
-+      if (!new_inode) {
-+              up(&sbi->s_orphan_lock);
-+              ext3_debug("delete inode %lu directly (bad read)\n",
-+                         old_inode->i_ino);
-+              goto out_delete;
-+      }
-+      J_ASSERT(new_inode != old_inode);
-+
-+      J_ASSERT(!list_empty(&oei->i_orphan));
-+
-+      nei = EXT3_I(new_inode);
-+      /* Ugh.  We need to insert new_inode into the same spot on the list
-+       * as old_inode was, to ensure the in-memory orphan list is still
-+       * in the same order as the on-disk orphan list (badness otherwise).
-+       */
-+      nei->i_orphan = oei->i_orphan;
-+      nei->i_orphan.next->prev = &nei->i_orphan;
-+      nei->i_orphan.prev->next = &nei->i_orphan;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up(&sbi->s_orphan_lock);
-+
-+      clear_inode(old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_delete:
-+      ext3_delete_inode(old_inode);
-+}
-+#else
-+#define ext3_start_delete_thread(sbi) do {} while(0)
-+#define ext3_stop_delete_thread(sbi) do {} while(0)
-+#endif /* EXT3_DELETE_THREAD */
-+
- void ext3_put_super (struct super_block * sb)
- {
-       struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +622,7 @@
-       kdev_t j_dev = sbi->s_journal->j_dev;
-       int i;
-+      J_ASSERT(sbi->s_delete_inodes == 0);
-       ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-       if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,9 +671,14 @@
-       write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
-       dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
-       put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
-+#ifdef EXT3_DELETE_THREAD
-+      delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
-+#else
-       delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
-+#endif
-       put_super:      ext3_put_super,         /* BKL held */
-       write_super:    ext3_write_super,       /* BKL held */
-+      sync_fs:        ext3_sync_fs,
-       write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-       unlockfs:       ext3_unlockfs,          /* BKL not held.  We take it */
-       statfs:         ext3_statfs,            /* BKL held */
-@@ -521,6 +744,13 @@
-                       clear_opt (*mount_options, XATTR_USER);
-               else
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+              if (!strcmp(this_char, "asyncdel"))
-+                      set_opt(*mount_options, ASYNCDEL);
-+              else if (!strcmp(this_char, "noasyncdel"))
-+                      clear_opt(*mount_options, ASYNCDEL);
-+              else
-+#endif
-               if (!strcmp (this_char, "bsddf"))
-                       clear_opt (*mount_options, MINIX_DF);
-               else if (!strcmp (this_char, "nouid32")) {
-@@ -1220,6 +1450,7 @@
-       }
-       ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
-+      ext3_start_delete_thread(sb);
-       /*
-        * akpm: core read_super() calls in here with the superblock locked.
-        * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1625,6 +1856,21 @@
-       }
- }
-+static int ext3_sync_fs(struct super_block *sb)
-+{
-+      tid_t target;
-+
-+      if (atomic_read(&sb->s_active) == 0) {
-+              /* fs is being umounted: time to stop delete thread */
-+              ext3_stop_delete_thread(EXT3_SB(sb));
-+      }
-+
-+      sb->s_dirt = 0;
-+      target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-+      log_wait_commit(EXT3_SB(sb)->s_journal, target);
-+      return 0;
-+}
-+
- /*
-  * LVM calls this function before a (read-only) snapshot is created.  This
-  * gives us a chance to flush the journal completely and mark the fs clean.
-@@ -1682,6 +1928,9 @@
-       if (!parse_options(data, &tmp, sbi, &tmp, 1))
-               return -EINVAL;
-+      if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
-+              ext3_stop_delete_thread(sbi);
-+
-       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-               ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-Index: linux-2.4.20/fs/ext3/inode.c
-===================================================================
---- linux-2.4.20.orig/fs/ext3/inode.c  2004-01-12 20:13:37.000000000 +0300
-+++ linux-2.4.20/fs/ext3/inode.c       2004-01-13 16:55:45.000000000 +0300
-@@ -2552,6 +2552,118 @@
-       return err;
- }
-+#ifdef EXT3_DELETE_THREAD
-+/* Move blocks from to-be-truncated inode over to a new inode, and delete
-+ * that one from the delete thread instead.  This avoids a lot of latency
-+ * when truncating large files.
-+ *
-+ * If we have any problem deferring the truncate, just truncate it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+void ext3_truncate_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      handle_t *handle;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_truncate;
-+
-+      /* XXX This is a temporary limitation for code simplicity.
-+       *     We could truncate to arbitrary sizes at some later time.
-+       */
-+      if (old_inode->i_size != 0)
-+              goto out_truncate;
-+
-+      /* We may want to truncate the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          old_inode->i_size > oei->i_disksize)
-+              goto out_truncate;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_truncate;
-+      }
-+
-+      ext3_discard_prealloc(old_inode);
-+
-+      /* old_inode   = 1
-+       * new_inode   = sb + GDT + ibitmap
-+       * orphan list = 1 inode/superblock for add, 2 inodes for del
-+       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
-+       */
-+      handle = ext3_journal_start(old_inode, 7);
-+      if (IS_ERR(handle))
-+              goto out_truncate;
-+
-+      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
-+      if (IS_ERR(new_inode)) {
-+              ext3_debug("truncate inode %lu directly (no new inodes)\n",
-+                         old_inode->i_ino);
-+              goto out_journal;
-+      }
-+
-+      nei = EXT3_I(new_inode);
-+
-+      down_write(&oei->truncate_sem);
-+      new_inode->i_size = old_inode->i_size;
-+      new_inode->i_blocks = old_inode->i_blocks;
-+      new_inode->i_uid = old_inode->i_uid;
-+      new_inode->i_gid = old_inode->i_gid;
-+      new_inode->i_nlink = 0;
-+
-+      /* FIXME when we do arbitrary truncates */
-+      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
-+      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
-+
-+      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
-+      memset(oei->i_data, 0, sizeof(oei->i_data));
-+
-+      nei->i_disksize = oei->i_disksize;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up_write(&oei->truncate_sem);
-+
-+      if (ext3_orphan_add(handle, new_inode) < 0)
-+              goto out_journal;
-+
-+      if (ext3_orphan_del(handle, old_inode) < 0) {
-+              ext3_orphan_del(handle, new_inode);
-+              iput(new_inode);
-+              goto out_journal;
-+      }
-+
-+      ext3_journal_stop(handle, old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_journal:
-+      ext3_journal_stop(handle, old_inode);
-+out_truncate:
-+      ext3_truncate(old_inode);
-+}
-+#endif /* EXT3_DELETE_THREAD */
-+
- /* 
-  * On success, We end up with an outstanding reference count against
-  * iloc->bh.  This _must_ be cleaned up later. 
-Index: linux-2.4.20/fs/ext3/file.c
-===================================================================
---- linux-2.4.20.orig/fs/ext3/file.c   2004-01-12 20:13:36.000000000 +0300
-+++ linux-2.4.20/fs/ext3/file.c        2004-01-13 16:55:45.000000000 +0300
-@@ -125,7 +125,11 @@
- };
- struct inode_operations ext3_file_inode_operations = {
-+#ifdef EXT3_DELETE_THREAD
-+      truncate:       ext3_truncate_thread,   /* BKL held */
-+#else
-       truncate:       ext3_truncate,          /* BKL held */
-+#endif
-       setattr:        ext3_setattr,           /* BKL held */
-       setxattr:       ext3_setxattr,          /* BKL held */
-       getxattr:       ext3_getxattr,          /* BKL held */
-Index: linux-2.4.20/fs/buffer.c
-===================================================================
---- linux-2.4.20.orig/fs/buffer.c      2003-05-16 05:29:12.000000000 +0400
-+++ linux-2.4.20/fs/buffer.c   2004-01-13 16:55:45.000000000 +0300
-@@ -328,6 +328,8 @@
-       if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
-               sb->s_op->write_super(sb);
-       unlock_super(sb);
-+      if (sb->s_op && sb->s_op->sync_fs)
-+              sb->s_op->sync_fs(sb);
-       unlock_kernel();
-       return sync_buffers(dev, 1);
-Index: linux-2.4.20/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.20.orig/include/linux/ext3_fs.h  2004-01-12 20:13:37.000000000 +0300
-+++ linux-2.4.20/include/linux/ext3_fs.h       2004-01-13 16:55:45.000000000 +0300
-@@ -193,6 +193,7 @@
-  */
- #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
- #define EXT3_STATE_NEW                        0x00000002 /* inode is newly created */
-+#define EXT3_STATE_DELETE             0x00000010 /* deferred delete inode */
- /*
-  * ioctl commands
-@@ -320,6 +321,7 @@
- #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
- #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -696,6 +698,9 @@
- extern void ext3_dirty_inode(struct inode *);
- extern int ext3_change_inode_journal_flag(struct inode *, int);
- extern void ext3_truncate (struct inode *);
-+#ifdef EXT3_DELETE_THREAD
-+extern void ext3_truncate_thread(struct inode *inode);
-+#endif
- /* ioctl.c */
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
-Index: linux-2.4.20/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.20.orig/include/linux/ext3_fs_sb.h       2004-01-12 20:13:37.000000000 +0300
-+++ linux-2.4.20/include/linux/ext3_fs_sb.h    2004-01-13 16:55:45.000000000 +0300
-@@ -29,6 +29,8 @@
- #define EXT3_MAX_GROUP_LOADED 8
-+#define EXT3_DELETE_THREAD
-+
- /*
-  * third extended-fs super-block data in memory
-  */
-@@ -76,6 +78,14 @@
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+      spinlock_t s_delete_lock;
-+      struct list_head s_delete_list;
-+      unsigned long s_delete_blocks;
-+      unsigned long s_delete_inodes;
-+      wait_queue_head_t s_delete_thread_queue;
-+      wait_queue_head_t s_delete_waiter_queue;
-+#endif
- };
- #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.4.20/include/linux/fs.h
-===================================================================
---- linux-2.4.20.orig/include/linux/fs.h       2004-01-12 20:13:36.000000000 +0300
-+++ linux-2.4.20/include/linux/fs.h    2004-01-13 16:55:45.000000000 +0300
-@@ -917,6 +917,7 @@
-       void (*delete_inode) (struct inode *);
-       void (*put_super) (struct super_block *);
-       void (*write_super) (struct super_block *);
-+      int (*sync_fs) (struct super_block *);
-       void (*write_super_lockfs) (struct super_block *);
-       void (*unlockfs) (struct super_block *);
-       int (*statfs) (struct super_block *, struct statfs *);
index 507b044..3f5687b 100644 (file)
@@ -27,7 +27,7 @@ Index: linux-stage/fs/ext3/inode.c
                                struct ext3_iloc *iloc, int in_mem)
  {
        unsigned long block;
-@@ -2484,6 +2484,11 @@
+@@ -2484,6 +2484,11 @@ void ext3_read_inode(struct inode * inod
                ei->i_data[block] = raw_inode->i_block[block];
        INIT_LIST_HEAD(&ei->i_orphan);
  
@@ -39,7 +39,7 @@ Index: linux-stage/fs/ext3/inode.c
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
-@@ -2619,6 +2624,9 @@
+@@ -2619,6 +2624,9 @@ static int ext3_do_update_inode(handle_t
        } else for (block = 0; block < EXT3_N_BLOCKS; block++)
                raw_inode->i_block[block] = ei->i_data[block];
  
@@ -49,7 +49,7 @@ Index: linux-stage/fs/ext3/inode.c
        BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
        rc = ext3_journal_dirty_metadata(handle, bh);
        if (!err)
-@@ -2849,7 +2857,8 @@
+@@ -2849,7 +2857,8 @@ ext3_reserve_inode_write(handle_t *handl
  {
        int err = 0;
        if (handle) {
index 68e52bb..588916f 100644 (file)
@@ -2540,10 +2540,10 @@ Index: linux-2.4.21-rhel/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -332,6 +336,8 @@
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index fb7d2cb..305ef8e 100644 (file)
@@ -2539,10 +2539,10 @@ Index: linux-2.4.21-suse2/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -328,6 +332,8 @@
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 7246be1..8e84625 100644 (file)
@@ -2527,10 +2527,10 @@ Index: linux-2.4.24/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -327,6 +331,8 @@
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index d030f04..d77d9a7 100644 (file)
@@ -2527,10 +2527,10 @@ Index: linux-2.4.29/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -327,6 +331,8 @@
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index b6d0c57..657ecf4 100644 (file)
@@ -2482,12 +2482,13 @@ Index: linux-2.6.12-rc6/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/ext3/Makefile     2005-06-14 16:31:09.179354899 +0200
 +++ linux-2.6.12-rc6/fs/ext3/Makefile  2005-06-14 16:31:25.872714069 +0200
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o resize.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o resize.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2512,19 +2513,18 @@ Index: linux-2.6.12-rc6/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -593,7 +596,7 @@
-       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+@@ -593,6 +596,7 @@
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
-+      Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
 @@ -644,6 +647,8 @@
-       {Opt_iopen,  "iopen"},
-       {Opt_noiopen,  "noiopen"},
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
@@ -2593,8 +2593,8 @@ Index: linux-2.6.12-rc6/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -360,6 +364,8 @@
  #define EXT3_MOUNT_NOBH                       0x40000 /* No bufferheads */
- #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
 +#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
 +#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
index f69e16c..0ee8d28 100644 (file)
@@ -2471,12 +2471,13 @@ Index: linux-2.6.5-sles9/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2005-02-23 01:01:46.501172896 +0300
 +++ linux-2.6.5-sles9/fs/ext3/Makefile 2005-02-23 01:02:37.405434272 +0300
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o
-+         ioctl.o namei.o super.o symlink.o hash.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2501,12 +2502,11 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -537,7 +540,7 @@
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_err,
-+      Opt_err, Opt_extents, Opt_extdebug
+@@ -537,6 +540,7 @@
+       Opt_ignore, Opt_barrier,
+       Opt_err,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
@@ -2516,9 +2516,9 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
        {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
 @@ -797,6 +802,12 @@
                        break;
                case Opt_ignore:
@@ -2583,10 +2583,10 @@ Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -333,6 +337,8 @@
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef clear_opt
index 3b873c2..56fe653 100644 (file)
@@ -2466,12 +2466,13 @@ Index: linux-stage/fs/ext3/Makefile
 ===================================================================
 --- linux-stage.orig/fs/ext3/Makefile  2005-02-25 14:49:42.168561008 +0200
 +++ linux-stage/fs/ext3/Makefile       2005-02-25 15:39:28.384587168 +0200
-@@ -5,7 +5,7 @@
+@@ -5,7 +5,8 @@
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
 -         ioctl.o namei.o super.o symlink.o hash.o resize.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
++         ioctl.o namei.o super.o symlink.o hash.o resize.o \
++         extents.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
@@ -2496,19 +2497,18 @@ Index: linux-stage/fs/ext3/super.c
        return &ei->vfs_inode;
  }
  
-@@ -589,7 +594,7 @@
-       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+@@ -589,6 +594,7 @@
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
-+      Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug,
  };
  
  static match_table_t tokens = {
 @@ -639,6 +644,8 @@
-       {Opt_iopen,  "iopen"},
-       {Opt_noiopen,  "noiopen"},
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen, "iopen"},
+       {Opt_noiopen, "noiopen"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
@@ -2578,10 +2578,10 @@ Index: linux-stage/include/linux/ext3_fs.h
   * Structure of an inode on the disk
 @@ -359,6 +363,8 @@
  #define EXT3_MOUNT_RESERVATION                0x20000 /* Preallocation */
- #define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index e5a5616..cd37db4 100644 (file)
@@ -2,13 +2,15 @@ Index: 57chaos/fs/ext3/inode.c
 ===================================================================
 --- 57chaos.orig/fs/ext3/inode.c       2004-06-21 14:15:31.000000000 -0700
 +++ 57chaos/fs/ext3/inode.c    2004-06-21 14:19:27.000000000 -0700
-@@ -2270,6 +2270,10 @@ void ext3_truncate_thread(struct inode *
+@@ -2270,6 +2270,12 @@ void ext3_truncate_thread(struct inode *
  
        memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
        memset(oei->i_data, 0, sizeof(oei->i_data));
 +      if (EXT3_I(old_inode)->i_flags & EXT3_EXTENTS_FL) {
 +              EXT3_I(new_inode)->i_flags |= EXT3_EXTENTS_FL;
 +              ext3_extents_initialize_blockmap(handle, old_inode);
++      } else {
++              EXT3_I(new_inode)->i_flags &= ~EXT3_EXTENTS_FL;
 +      }
  
        nei->i_disksize = oei->i_disksize;
index 43681a6..bc752e5 100644 (file)
@@ -16,13 +16,15 @@ Index: linux-2.4.24/fs/ext3/inode.c
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
-@@ -2659,6 +2665,10 @@
+@@ -2659,6 +2665,12 @@
  
        memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
        memset(oei->i_data, 0, sizeof(oei->i_data));
 +      if (EXT3_I(old_inode)->i_flags & EXT3_EXTENTS_FL) {
 +              EXT3_I(new_inode)->i_flags |= EXT3_EXTENTS_FL;
 +              ext3_extents_initialize_blockmap(handle, old_inode);
++      } else {
++              EXT3_I(new_inode)->i_flags &= ~EXT3_EXTENTS_FL;
 +      }
  
        nei->i_disksize = oei->i_disksize;
diff --git a/lustre/kernel_patches/patches/ext3-htree-2.4.19-bgl.patch b/lustre/kernel_patches/patches/ext3-htree-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 6e4c834..0000000
+++ /dev/null
@@ -1,2584 +0,0 @@
- fs/ext3/Makefile           |    2 
- fs/ext3/dir.c              |  302 +++++++++
- fs/ext3/file.c             |    3 
- fs/ext3/hash.c             |  215 ++++++
- fs/ext3/namei.c            | 1420 ++++++++++++++++++++++++++++++++++++++++-----
- fs/ext3/super.c            |    7 
- include/linux/ext3_fs.h    |   85 ++
- include/linux/ext3_fs_sb.h |    2 
- include/linux/ext3_jbd.h   |    2 
- include/linux/rbtree.h     |    2 
- lib/rbtree.c               |   42 +
- 11 files changed, 1921 insertions(+), 161 deletions(-)
-
-Index: linux.mcp2/fs/ext3/dir.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/dir.c      2004-05-17 15:03:55.000000000 -0700
-+++ linux.mcp2/fs/ext3/dir.c   2004-05-17 15:07:06.000000000 -0700
-@@ -21,12 +21,16 @@
- #include <linux/fs.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
-+#include <linux/slab.h>
-+#include <linux/rbtree.h>
- static unsigned char ext3_filetype_table[] = {
-       DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
- };
- static int ext3_readdir(struct file *, void *, filldir_t);
-+static int ext3_dx_readdir(struct file * filp,
-+                         void * dirent, filldir_t filldir);
- struct file_operations ext3_dir_operations = {
-       read:           generic_read_dir,
-@@ -35,6 +39,17 @@
-       fsync:          ext3_sync_file,         /* BKL held */
- };
-+
-+static unsigned char get_dtype(struct super_block *sb, int filetype)
-+{
-+      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
-+          (filetype >= EXT3_FT_MAX))
-+              return DT_UNKNOWN;
-+
-+      return (ext3_filetype_table[filetype]);
-+}
-+                             
-+
- int ext3_check_dir_entry (const char * function, struct inode * dir,
-                         struct ext3_dir_entry_2 * de,
-                         struct buffer_head * bh,
-@@ -79,6 +94,16 @@
-       sb = inode->i_sb;
-+      if (is_dx(inode)) {
-+              err = ext3_dx_readdir(filp, dirent, filldir);
-+              if (err != ERR_BAD_DX_DIR)
-+                      return err;
-+              /*
-+               * We don't set the inode dirty flag since it's not
-+               * critical that it get flushed back to the disk.
-+               */
-+              EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
-+      }
-       stored = 0;
-       bh = NULL;
-       offset = filp->f_pos & (sb->s_blocksize - 1);
-@@ -162,18 +187,12 @@
-                                * during the copy operation.
-                                */
-                               unsigned long version = filp->f_version;
--                              unsigned char d_type = DT_UNKNOWN;
--                              if (EXT3_HAS_INCOMPAT_FEATURE(sb,
--                                              EXT3_FEATURE_INCOMPAT_FILETYPE)
--                                              && de->file_type < EXT3_FT_MAX)
--                                      d_type =
--                                        ext3_filetype_table[de->file_type];
-                               error = filldir(dirent, de->name,
-                                               de->name_len,
-                                               filp->f_pos,
-                                               le32_to_cpu(de->inode),
--                                              d_type);
-+                                              get_dtype(sb, de->file_type));
-                               if (error)
-                                       break;
-                               if (version != filp->f_version)
-@@ -188,3 +207,272 @@
-       UPDATE_ATIME(inode);
-       return 0;
- }
-+
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * These functions convert from the major/minor hash to an f_pos
-+ * value.
-+ * 
-+ * Currently we only use major hash numer.  This is unfortunate, but
-+ * on 32-bit machines, the same VFS interface is used for lseek and
-+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
-+ * lseek/telldir/seekdir will blow out spectacularly, and from within
-+ * the ext2 low-level routine, we don't know if we're being called by
-+ * a 64-bit version of the system call or the 32-bit version of the
-+ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
-+ * cookie.  Sigh.
-+ */
-+#define hash2pos(major, minor)        (major >> 1)
-+#define pos2maj_hash(pos)     ((pos << 1) & 0xffffffff)
-+#define pos2min_hash(pos)     (0)
-+
-+/*
-+ * This structure holds the nodes of the red-black tree used to store
-+ * the directory entry in hash order.
-+ */
-+struct fname {
-+      __u32           hash;
-+      __u32           minor_hash;
-+      rb_node_t       rb_hash; 
-+      struct fname    *next;
-+      __u32           inode;
-+      __u8            name_len;
-+      __u8            file_type;
-+      char            name[0];
-+};
-+
-+/*
-+ * This functoin implements a non-recursive way of freeing all of the
-+ * nodes in the red-black tree.
-+ */
-+static void free_rb_tree_fname(rb_root_t *root)
-+{
-+      rb_node_t       *n = root->rb_node;
-+      rb_node_t       *parent;
-+      struct fname    *fname;
-+
-+      while (n) {
-+              /* Do the node's children first */
-+              if ((n)->rb_left) {
-+                      n = n->rb_left;
-+                      continue;
-+              }
-+              if (n->rb_right) {
-+                      n = n->rb_right;
-+                      continue;
-+              }
-+              /*
-+               * The node has no children; free it, and then zero
-+               * out parent's link to it.  Finally go to the
-+               * beginning of the loop and try to free the parent
-+               * node.
-+               */
-+              parent = n->rb_parent;
-+              fname = rb_entry(n, struct fname, rb_hash);
-+              kfree(fname);
-+              if (!parent)
-+                      root->rb_node = 0;
-+              else if (parent->rb_left == n)
-+                      parent->rb_left = 0;
-+              else if (parent->rb_right == n)
-+                      parent->rb_right = 0;
-+              n = parent;
-+      }
-+      root->rb_node = 0;
-+}
-+
-+
-+struct dir_private_info *create_dir_info(loff_t pos)
-+{
-+      struct dir_private_info *p;
-+
-+      p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
-+      if (!p)
-+              return NULL;
-+      p->root.rb_node = 0;
-+      p->curr_node = 0;
-+      p->extra_fname = 0;
-+      p->last_pos = 0;
-+      p->curr_hash = pos2maj_hash(pos);
-+      p->curr_minor_hash = pos2min_hash(pos);
-+      p->next_hash = 0;
-+      return p;
-+}
-+
-+void ext3_htree_free_dir_info(struct dir_private_info *p)
-+{
-+      free_rb_tree_fname(&p->root);
-+      kfree(p);
-+}
-+              
-+/*
-+ * Given a directory entry, enter it into the fname rb tree.
-+ */
-+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
-+                           __u32 minor_hash,
-+                           struct ext3_dir_entry_2 *dirent)
-+{
-+      rb_node_t **p, *parent = NULL;
-+      struct fname * fname, *new_fn;
-+      struct dir_private_info *info;
-+      int len;
-+
-+      info = (struct dir_private_info *) dir_file->private_data;
-+      p = &info->root.rb_node;
-+
-+      /* Create and allocate the fname structure */
-+      len = sizeof(struct fname) + dirent->name_len + 1;
-+      new_fn = kmalloc(len, GFP_KERNEL);
-+      if (!new_fn)
-+              return -ENOMEM;
-+      memset(new_fn, 0, len);
-+      new_fn->hash = hash;
-+      new_fn->minor_hash = minor_hash;
-+      new_fn->inode = le32_to_cpu(dirent->inode);
-+      new_fn->name_len = dirent->name_len;
-+      new_fn->file_type = dirent->file_type;
-+      memcpy(new_fn->name, dirent->name, dirent->name_len);
-+      new_fn->name[dirent->name_len] = 0;
-+      
-+      while (*p) {
-+              parent = *p;
-+              fname = rb_entry(parent, struct fname, rb_hash);
-+
-+              /*
-+               * If the hash and minor hash match up, then we put
-+               * them on a linked list.  This rarely happens...
-+               */
-+              if ((new_fn->hash == fname->hash) &&
-+                  (new_fn->minor_hash == fname->minor_hash)) {
-+                      new_fn->next = fname->next;
-+                      fname->next = new_fn;
-+                      return 0;
-+              }
-+                      
-+              if (new_fn->hash < fname->hash)
-+                      p = &(*p)->rb_left;
-+              else if (new_fn->hash > fname->hash)
-+                      p = &(*p)->rb_right;
-+              else if (new_fn->minor_hash < fname->minor_hash)
-+                      p = &(*p)->rb_left;
-+              else /* if (new_fn->minor_hash > fname->minor_hash) */
-+                      p = &(*p)->rb_right;
-+      }
-+
-+      rb_link_node(&new_fn->rb_hash, parent, p);
-+      rb_insert_color(&new_fn->rb_hash, &info->root);
-+      return 0;
-+}
-+
-+
-+
-+/*
-+ * This is a helper function for ext3_dx_readdir.  It calls filldir
-+ * for all entres on the fname linked list.  (Normally there is only
-+ * one entry on the linked list, unless there are 62 bit hash collisions.)
-+ */
-+static int call_filldir(struct file * filp, void * dirent,
-+                      filldir_t filldir, struct fname *fname)
-+{
-+      struct dir_private_info *info = filp->private_data;
-+      loff_t  curr_pos;
-+      struct inode *inode = filp->f_dentry->d_inode;
-+      struct super_block * sb;
-+      int error;
-+
-+      sb = inode->i_sb;
-+      
-+      if (!fname) {
-+              printk("call_filldir: called with null fname?!?\n");
-+              return 0;
-+      }
-+      curr_pos = hash2pos(fname->hash, fname->minor_hash);
-+      while (fname) {
-+              error = filldir(dirent, fname->name,
-+                              fname->name_len, curr_pos, 
-+                              fname->inode,
-+                              get_dtype(sb, fname->file_type));
-+              if (error) {
-+                      filp->f_pos = curr_pos;
-+                      info->extra_fname = fname->next;
-+                      return error;
-+              }
-+              fname = fname->next;
-+      }
-+      return 0;
-+}
-+
-+static int ext3_dx_readdir(struct file * filp,
-+                       void * dirent, filldir_t filldir)
-+{
-+      struct dir_private_info *info = filp->private_data;
-+      struct inode *inode = filp->f_dentry->d_inode;
-+      struct fname *fname;
-+      int     ret;
-+
-+      if (!info) {
-+              info = create_dir_info(filp->f_pos);
-+              if (!info)
-+                      return -ENOMEM;
-+              filp->private_data = info;
-+      }
-+
-+      /* Some one has messed with f_pos; reset the world */
-+      if (info->last_pos != filp->f_pos) {
-+              free_rb_tree_fname(&info->root);
-+              info->curr_node = 0;
-+              info->extra_fname = 0;
-+              info->curr_hash = pos2maj_hash(filp->f_pos);
-+              info->curr_minor_hash = pos2min_hash(filp->f_pos);
-+      }
-+
-+      /*
-+       * If there are any leftover names on the hash collision
-+       * chain, return them first.
-+       */
-+      if (info->extra_fname &&
-+          call_filldir(filp, dirent, filldir, info->extra_fname))
-+              goto finished;
-+
-+      if (!info->curr_node)
-+              info->curr_node = rb_get_first(&info->root);
-+
-+      while (1) {
-+              /*
-+               * Fill the rbtree if we have no more entries,
-+               * or the inode has changed since we last read in the
-+               * cached entries. 
-+               */
-+              if ((!info->curr_node) ||
-+                  (filp->f_version != inode->i_version)) {
-+                      info->curr_node = 0;
-+                      free_rb_tree_fname(&info->root);
-+                      filp->f_version = inode->i_version;
-+                      ret = ext3_htree_fill_tree(filp, info->curr_hash,
-+                                                 info->curr_minor_hash,
-+                                                 &info->next_hash);
-+                      if (ret < 0)
-+                              return ret;
-+                      if (ret == 0)
-+                              break;
-+                      info->curr_node = rb_get_first(&info->root);
-+              }
-+
-+              fname = rb_entry(info->curr_node, struct fname, rb_hash);
-+              info->curr_hash = fname->hash;
-+              info->curr_minor_hash = fname->minor_hash;
-+              if (call_filldir(filp, dirent, filldir, fname))
-+                      break;
-+
-+              info->curr_node = rb_get_next(info->curr_node);
-+              if (!info->curr_node) {
-+                      info->curr_hash = info->next_hash;
-+                      info->curr_minor_hash = 0;
-+              }
-+      }
-+finished:
-+      info->last_pos = filp->f_pos;
-+      UPDATE_ATIME(inode);
-+      return 0;
-+}
-+#endif
-Index: linux.mcp2/fs/ext3/file.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/file.c     2004-05-17 15:03:55.000000000 -0700
-+++ linux.mcp2/fs/ext3/file.c  2004-05-17 15:07:06.000000000 -0700
-@@ -35,6 +35,9 @@
- {
-       if (filp->f_mode & FMODE_WRITE)
-               ext3_discard_prealloc (inode);
-+      if (is_dx(inode) && filp->private_data)
-+              ext3_htree_free_dir_info(filp->private_data);
-+
-       return 0;
- }
-Index: linux.mcp2/fs/ext3/hash.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/hash.c     2002-04-11 07:25:15.000000000 -0700
-+++ linux.mcp2/fs/ext3/hash.c  2004-05-17 15:07:06.000000000 -0700
-@@ -0,0 +1,215 @@
-+/*
-+ *  linux/fs/ext3/hash.c
-+ *
-+ * Copyright (C) 2002 by Theodore Ts'o
-+ *
-+ * This file is released under the GPL v2.
-+ * 
-+ * This file may be redistributed under the terms of the GNU Public
-+ * License.
-+ */
-+
-+#include <linux/fs.h>
-+#include <linux/jbd.h>
-+#include <linux/sched.h>
-+#include <linux/ext3_fs.h>
-+
-+#define DELTA 0x9E3779B9
-+
-+static void TEA_transform(__u32 buf[4], __u32 const in[])
-+{
-+      __u32   sum = 0;
-+      __u32   b0 = buf[0], b1 = buf[1];
-+      __u32   a = in[0], b = in[1], c = in[2], d = in[3];
-+      int     n = 16;
-+
-+      do {                                                    
-+              sum += DELTA;                                   
-+              b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); 
-+              b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); 
-+      } while(--n);
-+
-+      buf[0] += b0;
-+      buf[1] += b1;
-+}
-+
-+/* F, G and H are basic MD4 functions: selection, majority, parity */
-+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
-+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
-+#define H(x, y, z) ((x) ^ (y) ^ (z))
-+
-+/*
-+ * The generic round function.  The application is so specific that
-+ * we don't bother protecting all the arguments with parens, as is generally
-+ * good macro practice, in favor of extra legibility.
-+ * Rotation is separate from addition to prevent recomputation
-+ */
-+#define ROUND(f, a, b, c, d, x, s)    \
-+      (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s)))
-+#define K1 0
-+#define K2 013240474631UL
-+#define K3 015666365641UL
-+
-+/*
-+ * Basic cut-down MD4 transform.  Returns only 32 bits of result.
-+ */
-+static void halfMD4Transform (__u32 buf[4], __u32 const in[])
-+{
-+      __u32   a = buf[0], b = buf[1], c = buf[2], d = buf[3];
-+
-+      /* Round 1 */
-+      ROUND(F, a, b, c, d, in[0] + K1,  3);
-+      ROUND(F, d, a, b, c, in[1] + K1,  7);
-+      ROUND(F, c, d, a, b, in[2] + K1, 11);
-+      ROUND(F, b, c, d, a, in[3] + K1, 19);
-+      ROUND(F, a, b, c, d, in[4] + K1,  3);
-+      ROUND(F, d, a, b, c, in[5] + K1,  7);
-+      ROUND(F, c, d, a, b, in[6] + K1, 11);
-+      ROUND(F, b, c, d, a, in[7] + K1, 19);
-+
-+      /* Round 2 */
-+      ROUND(G, a, b, c, d, in[1] + K2,  3);
-+      ROUND(G, d, a, b, c, in[3] + K2,  5);
-+      ROUND(G, c, d, a, b, in[5] + K2,  9);
-+      ROUND(G, b, c, d, a, in[7] + K2, 13);
-+      ROUND(G, a, b, c, d, in[0] + K2,  3);
-+      ROUND(G, d, a, b, c, in[2] + K2,  5);
-+      ROUND(G, c, d, a, b, in[4] + K2,  9);
-+      ROUND(G, b, c, d, a, in[6] + K2, 13);
-+
-+      /* Round 3 */
-+      ROUND(H, a, b, c, d, in[3] + K3,  3);
-+      ROUND(H, d, a, b, c, in[7] + K3,  9);
-+      ROUND(H, c, d, a, b, in[2] + K3, 11);
-+      ROUND(H, b, c, d, a, in[6] + K3, 15);
-+      ROUND(H, a, b, c, d, in[1] + K3,  3);
-+      ROUND(H, d, a, b, c, in[5] + K3,  9);
-+      ROUND(H, c, d, a, b, in[0] + K3, 11);
-+      ROUND(H, b, c, d, a, in[4] + K3, 15);
-+
-+      buf[0] += a;
-+      buf[1] += b;
-+      buf[2] += c;
-+      buf[3] += d;
-+}
-+
-+#undef ROUND
-+#undef F
-+#undef G
-+#undef H
-+#undef K1
-+#undef K2
-+#undef K3
-+
-+/* The old legacy hash */
-+static __u32 dx_hack_hash (const char *name, int len)
-+{
-+      __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-+      while (len--) {
-+              __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
-+              
-+              if (hash & 0x80000000) hash -= 0x7fffffff;
-+              hash1 = hash0;
-+              hash0 = hash;
-+      }
-+      return (hash0 << 1);
-+}
-+
-+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
-+{
-+      __u32   pad, val;
-+      int     i;
-+
-+      pad = (__u32)len | ((__u32)len << 8);
-+      pad |= pad << 16;
-+
-+      val = pad;
-+      if (len > num*4)
-+              len = num * 4;
-+      for (i=0; i < len; i++) {
-+              if ((i % 4) == 0)
-+                      val = pad;
-+              val = msg[i] + (val << 8);
-+              if ((i % 4) == 3) {
-+                      *buf++ = val;
-+                      val = pad;
-+                      num--;
-+              }
-+      }
-+      if (--num >= 0)
-+              *buf++ = val;
-+      while (--num >= 0)
-+              *buf++ = pad;
-+}
-+
-+/*
-+ * Returns the hash of a filename.  If len is 0 and name is NULL, then
-+ * this function can be used to test whether or not a hash version is
-+ * supported.
-+ * 
-+ * The seed is an 4 longword (32 bits) "secret" which can be used to
-+ * uniquify a hash.  If the seed is all zero's, then some default seed
-+ * may be used.
-+ * 
-+ * A particular hash version specifies whether or not the seed is
-+ * represented, and whether or not the returned hash is 32 bits or 64
-+ * bits.  32 bit hashes will return 0 for the minor hash.
-+ */
-+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
-+{
-+      __u32   hash;
-+      __u32   minor_hash = 0;
-+      const char      *p;
-+      int             i;
-+      __u32           in[8], buf[4];
-+
-+      /* Initialize the default seed for the hash checksum functions */
-+      buf[0] = 0x67452301;
-+      buf[1] = 0xefcdab89;
-+      buf[2] = 0x98badcfe;
-+      buf[3] = 0x10325476;
-+
-+      /* Check to see if the seed is all zero's */
-+      if (hinfo->seed) {
-+              for (i=0; i < 4; i++) {
-+                      if (hinfo->seed[i])
-+                              break;
-+              }
-+              if (i < 4)
-+                      memcpy(buf, hinfo->seed, sizeof(buf));
-+      }
-+              
-+      switch (hinfo->hash_version) {
-+      case DX_HASH_LEGACY:
-+              hash = dx_hack_hash(name, len);
-+              break;
-+      case DX_HASH_HALF_MD4:
-+              p = name;
-+              while (len > 0) {
-+                      str2hashbuf(p, len, in, 8);
-+                      halfMD4Transform(buf, in);
-+                      len -= 32;
-+                      p += 32;
-+              }
-+              minor_hash = buf[2];
-+              hash = buf[1];
-+              break;
-+      case DX_HASH_TEA:
-+              p = name;
-+              while (len > 0) {
-+                      str2hashbuf(p, len, in, 4);
-+                      TEA_transform(buf, in);
-+                      len -= 16;
-+                      p += 16;
-+              }
-+              hash = buf[0];
-+              minor_hash = buf[1];
-+              break;
-+      default:
-+              hinfo->hash = 0;
-+              return -1;
-+      }
-+      hinfo->hash = hash & ~1;
-+      hinfo->minor_hash = minor_hash;
-+      return 0;
-+}
-Index: linux.mcp2/fs/ext3/Makefile
-===================================================================
---- linux.mcp2.orig/fs/ext3/Makefile   2004-05-17 15:03:55.000000000 -0700
-+++ linux.mcp2/fs/ext3/Makefile        2004-05-17 15:07:06.000000000 -0700
-@@ -10,7 +10,7 @@
- O_TARGET := ext3.o
- obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
--              ioctl.o namei.o super.o symlink.o
-+              ioctl.o namei.o super.o symlink.o hash.o
- obj-m    := $(O_TARGET)
- include $(TOPDIR)/Rules.make
-Index: linux.mcp2/fs/ext3/namei.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/namei.c    2004-05-17 15:03:55.000000000 -0700
-+++ linux.mcp2/fs/ext3/namei.c 2004-05-17 15:07:06.000000000 -0700
-@@ -16,6 +16,12 @@
-  *        David S. Miller (davem@caip.rutgers.edu), 1995
-  *  Directory entry file type support and forward compatibility hooks
-  *    for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
-+ *  Hash Tree Directory indexing (c)
-+ *    Daniel Phillips, 2001
-+ *  Hash Tree Directory indexing porting
-+ *    Christopher Li, 2002
-+ *  Hash Tree Directory indexing cleanup
-+ *    Theodore Ts'o, 2002
-  */
- #include <linux/fs.h>
-@@ -38,6 +44,642 @@
- #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
-+static struct buffer_head *ext3_append(handle_t *handle,
-+                                      struct inode *inode,
-+                                      u32 *block, int *err)
-+{
-+      struct buffer_head *bh;
-+
-+      *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-+
-+      if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
-+              inode->i_size += inode->i_sb->s_blocksize;
-+              EXT3_I(inode)->i_disksize = inode->i_size;
-+              ext3_journal_get_write_access(handle,bh);
-+      }
-+      return bh;
-+}
-+
-+#ifndef assert
-+#define assert(test) J_ASSERT(test)
-+#endif
-+
-+#ifndef swap
-+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-+#endif
-+
-+typedef struct { u32 v; } le_u32;
-+typedef struct { u16 v; } le_u16;
-+
-+#ifdef DX_DEBUG
-+#define dxtrace(command) command
-+#else
-+#define dxtrace(command) 
-+#endif
-+
-+struct fake_dirent
-+{
-+      /*le*/u32 inode;
-+      /*le*/u16 rec_len;
-+      u8 name_len;
-+      u8 file_type;
-+};
-+
-+struct dx_countlimit
-+{
-+      le_u16 limit;
-+      le_u16 count;
-+};
-+
-+struct dx_entry
-+{
-+      le_u32 hash;
-+      le_u32 block;
-+};
-+
-+/*
-+ * dx_root_info is laid out so that if it should somehow get overlaid by a
-+ * dirent the two low bits of the hash version will be zero.  Therefore, the
-+ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
-+ */
-+
-+struct dx_root
-+{
-+      struct fake_dirent dot;
-+      char dot_name[4];
-+      struct fake_dirent dotdot;
-+      char dotdot_name[4];
-+      struct dx_root_info
-+      {
-+              le_u32 reserved_zero;
-+              u8 hash_version;
-+              u8 info_length; /* 8 */
-+              u8 indirect_levels;
-+              u8 unused_flags;
-+      }
-+      info;
-+      struct dx_entry entries[0];
-+};
-+
-+struct dx_node
-+{
-+      struct fake_dirent fake;
-+      struct dx_entry entries[0];
-+};
-+
-+
-+struct dx_frame
-+{
-+      struct buffer_head *bh;
-+      struct dx_entry *entries;
-+      struct dx_entry *at;
-+};
-+
-+struct dx_map_entry
-+{
-+      u32 hash;
-+      u32 offs;
-+};
-+
-+#ifdef CONFIG_EXT3_INDEX
-+static inline unsigned dx_get_block (struct dx_entry *entry);
-+static void dx_set_block (struct dx_entry *entry, unsigned value);
-+static inline unsigned dx_get_hash (struct dx_entry *entry);
-+static void dx_set_hash (struct dx_entry *entry, unsigned value);
-+static unsigned dx_get_count (struct dx_entry *entries);
-+static unsigned dx_get_limit (struct dx_entry *entries);
-+static void dx_set_count (struct dx_entry *entries, unsigned value);
-+static void dx_set_limit (struct dx_entry *entries, unsigned value);
-+static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
-+static unsigned dx_node_limit (struct inode *dir);
-+static struct dx_frame *dx_probe(struct dentry *dentry,
-+                               struct inode *dir,
-+                               struct dx_hash_info *hinfo,
-+                               struct dx_frame *frame,
-+                               int *err);
-+static void dx_release (struct dx_frame *frames);
-+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
-+                      struct dx_hash_info *hinfo, struct dx_map_entry map[]);
-+static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
-+              struct dx_map_entry *offsets, int count);
-+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
-+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                               struct dx_frame *frame,
-+                               struct dx_frame *frames, int *err,
-+                               __u32 *start_hash);
-+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-+                     struct ext3_dir_entry_2 **res_dir, int *err);
-+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode);
-+
-+/*
-+ * Future: use high four bits of block for coalesce-on-delete flags
-+ * Mask them off for now.
-+ */
-+
-+static inline unsigned dx_get_block (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->block.v) & 0x00ffffff;
-+}
-+
-+static inline void dx_set_block (struct dx_entry *entry, unsigned value)
-+{
-+      entry->block.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_hash (struct dx_entry *entry)
-+{
-+      return le32_to_cpu(entry->hash.v);
-+}
-+
-+static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
-+{
-+      entry->hash.v = cpu_to_le32(value);
-+}
-+
-+static inline unsigned dx_get_count (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->count.v);
-+}
-+
-+static inline unsigned dx_get_limit (struct dx_entry *entries)
-+{
-+      return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v);
-+}
-+
-+static inline void dx_set_count (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value);
-+}
-+
-+static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value);
-+}
-+
-+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-+              EXT3_DIR_REC_LEN(2) - infosize;
-+      return 0? 20: entry_space / sizeof(struct dx_entry);
-+}
-+
-+static inline unsigned dx_node_limit (struct inode *dir)
-+{
-+      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-+      return 0? 22: entry_space / sizeof(struct dx_entry);
-+}
-+
-+/*
-+ * Debug
-+ */
-+#ifdef DX_DEBUG
-+struct stats
-+{ 
-+      unsigned names;
-+      unsigned space;
-+      unsigned bcount;
-+};
-+
-+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
-+                               int size, int show_names)
-+{
-+      unsigned names = 0, space = 0;
-+      char *base = (char *) de;
-+      struct dx_hash_info h = *hinfo;
-+      
-+      printk("names: ");
-+      while ((char *) de < base + size)
-+      {
-+              if (de->inode)
-+              {
-+                      if (show_names)
-+                      {
-+                              int len = de->name_len;
-+                              char *name = de->name;
-+                              while (len--) printk("%c", *name++);
-+                              ext3fs_dirhash(de->name, de->name_len, &h);
-+                              printk(":%x.%u ", h.hash,
-+                                     ((char *) de - base));
-+                      }
-+                      space += EXT3_DIR_REC_LEN(de->name_len);
-+                      names++;
-+              }
-+              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      printk("(%i)\n", names);
-+      return (struct stats) { names, space, 1 };
-+}
-+
-+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
-+                           struct dx_entry *entries, int levels)
-+{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count = dx_get_count (entries), names = 0, space = 0, i;
-+      unsigned bcount = 0;
-+      struct buffer_head *bh;
-+      int err;
-+      printk("%i indexed blocks...\n", count);
-+      for (i = 0; i < count; i++, entries++)
-+      {
-+              u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
-+              u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
-+              struct stats stats;
-+              printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
-+              stats = levels?
-+                 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
-+                 dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
-+              names += stats.names;
-+              space += stats.space;
-+              bcount += stats.bcount;
-+              brelse (bh);
-+      }
-+      if (bcount)
-+              printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
-+                      names, space/bcount,(space/bcount)*100/blocksize);
-+      return (struct stats) { names, space, bcount};
-+}
-+#endif /* DX_DEBUG */
-+
-+/*
-+ * Probe for a directory leaf block to search.
-+ *
-+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
-+ * error in the directory index, and the caller should fall back to
-+ * searching the directory normally.  The callers of dx_probe **MUST**
-+ * check for this error code, and make sure it never gets reflected
-+ * back to userspace.
-+ */
-+static struct dx_frame *
-+dx_probe(struct dentry *dentry, struct inode *dir,
-+       struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
-+{
-+      unsigned count, indirect;
-+      struct dx_entry *at, *entries, *p, *q, *m;
-+      struct dx_root *root;
-+      struct buffer_head *bh;
-+      struct dx_frame *frame = frame_in;
-+      u32 hash;
-+
-+      frame->bh = NULL;
-+      if (dentry)
-+              dir = dentry->d_parent->d_inode;
-+      if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
-+              goto fail;
-+      root = (struct dx_root *) bh->b_data;
-+      if (root->info.hash_version != DX_HASH_TEA &&
-+          root->info.hash_version != DX_HASH_HALF_MD4 &&
-+          root->info.hash_version != DX_HASH_LEGACY) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unrecognised inode hash code %d",
-+                           root->info.hash_version);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+      hinfo->hash_version = root->info.hash_version;
-+      hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
-+      if (dentry)
-+              ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
-+      hash = hinfo->hash;
-+
-+      if (root->info.unused_flags & 1) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unimplemented inode hash flags: %#06x",
-+                           root->info.unused_flags);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+
-+      if ((indirect = root->info.indirect_levels) > 1) {
-+              ext3_warning(dir->i_sb, __FUNCTION__,
-+                           "Unimplemented inode hash depth: %#06x",
-+                           root->info.indirect_levels);
-+              brelse(bh);
-+              *err = ERR_BAD_DX_DIR;
-+              goto fail;
-+      }
-+
-+      entries = (struct dx_entry *) (((char *)&root->info) +
-+                                     root->info.info_length);
-+      assert(dx_get_limit(entries) == dx_root_limit(dir,
-+                                                    root->info.info_length));
-+      dxtrace (printk("Look up %x", hash));
-+      while (1)
-+      {
-+              count = dx_get_count(entries);
-+              assert (count && count <= dx_get_limit(entries));
-+              p = entries + 1;
-+              q = entries + count - 1;
-+              while (p <= q)
-+              {
-+                      m = p + (q - p)/2;
-+                      dxtrace(printk("."));
-+                      if (dx_get_hash(m) > hash)
-+                              q = m - 1;
-+                      else
-+                              p = m + 1;
-+              }
-+
-+              if (0) // linear search cross check
-+              {
-+                      unsigned n = count - 1;
-+                      at = entries;
-+                      while (n--)
-+                      {
-+                              dxtrace(printk(","));
-+                              if (dx_get_hash(++at) > hash)
-+                              {
-+                                      at--;
-+                                      break;
-+                              }
-+                      }
-+                      assert (at == p - 1);
-+              }
-+
-+              at = p - 1;
-+              dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-+              frame->bh = bh;
-+              frame->entries = entries;
-+              frame->at = at;
-+              if (!indirect--) return frame;
-+              if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
-+                      goto fail2;
-+              at = entries = ((struct dx_node *) bh->b_data)->entries;
-+              assert (dx_get_limit(entries) == dx_node_limit (dir));
-+              frame++;
-+      }
-+fail2:
-+      while (frame >= frame_in) {
-+              brelse(frame->bh);
-+              frame--;
-+      }
-+fail:
-+      return NULL;
-+}
-+
-+static void dx_release (struct dx_frame *frames)
-+{
-+      if (frames[0].bh == NULL)
-+              return;
-+
-+      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
-+              brelse(frames[1].bh);
-+      brelse(frames[0].bh);
-+}
-+
-+/*
-+ * This function increments the frame pointer to search the next leaf
-+ * block, and reads in the necessary intervening nodes if the search
-+ * should be necessary.  Whether or not the search is necessary is
-+ * controlled by the hash parameter.  If the hash value is even, then
-+ * the search is only continued if the next block starts with that
-+ * hash value.  This is used if we are searching for a specific file.
-+ *
-+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
-+ *
-+ * This function returns 1 if the caller should continue to search,
-+ * or 0 if it should not.  If there is an error reading one of the
-+ * index blocks, it will return -1.
-+ *
-+ * If start_hash is non-null, it will be filled in with the starting
-+ * hash of the next page.
-+ */
-+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                               struct dx_frame *frame,
-+                               struct dx_frame *frames, int *err,
-+                               __u32 *start_hash)
-+{
-+      struct dx_frame *p;
-+      struct buffer_head *bh;
-+      int num_frames = 0;
-+      __u32 bhash;
-+
-+      *err = ENOENT;
-+      p = frame;
-+      /*
-+       * Find the next leaf page by incrementing the frame pointer.
-+       * If we run out of entries in the interior node, loop around and
-+       * increment pointer in the parent node.  When we break out of
-+       * this loop, num_frames indicates the number of interior
-+       * nodes need to be read.
-+       */
-+      while (1) {
-+              if (++(p->at) < p->entries + dx_get_count(p->entries))
-+                      break;
-+              if (p == frames)
-+                      return 0;
-+              num_frames++;
-+              p--;
-+      }
-+
-+      /*
-+       * If the hash is 1, then continue only if the next page has a
-+       * continuation hash of any value.  This is used for readdir
-+       * handling.  Otherwise, check to see if the hash matches the
-+       * desired contiuation hash.  If it doesn't, return since
-+       * there's no point to read in the successive index pages.
-+       */
-+      bhash = dx_get_hash(p->at);
-+      if (start_hash)
-+              *start_hash = bhash;
-+      if ((hash & 1) == 0) {
-+              if ((bhash & ~1) != hash)
-+                      return 0;
-+      }
-+      /*
-+       * If the hash is HASH_NB_ALWAYS, we always go to the next
-+       * block so no check is necessary
-+       */
-+      while (num_frames--) {
-+              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
-+                                    0, err)))
-+                      return -1; /* Failure */
-+              p++;
-+              brelse (p->bh);
-+              p->bh = bh;
-+              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
-+      }
-+      return 1;
-+}
-+
-+
-+/*
-+ * p is at least 6 bytes before the end of page
-+ */
-+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
-+{
-+      return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
-+}
-+
-+/*
-+ * This function fills a red-black tree with information from a
-+ * directory.  We start scanning the directory in hash order, starting
-+ * at start_hash and start_minor_hash.
-+ *
-+ * This function returns the number of entries inserted into the tree,
-+ * or a negative error code.
-+ */
-+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-+                       __u32 start_minor_hash, __u32 *next_hash)
-+{
-+      struct dx_hash_info hinfo;
-+      struct buffer_head *bh;
-+      struct ext3_dir_entry_2 *de, *top;
-+      static struct dx_frame frames[2], *frame;
-+      struct inode *dir;
-+      int block, err;
-+      int count = 0;
-+      int ret;
-+      __u32 hashval;
-+      
-+      dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
-+                     start_minor_hash));
-+      dir = dir_file->f_dentry->d_inode;
-+      hinfo.hash = start_hash;
-+      hinfo.minor_hash = 0;
-+      frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
-+      if (!frame)
-+              return err;
-+
-+      /* Add '.' and '..' from the htree header */
-+      if (!start_hash && !start_minor_hash) {
-+              de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
-+              if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
-+                      goto errout;
-+              de = ext3_next_entry(de);
-+              if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
-+                      goto errout;
-+              count += 2;
-+      }
-+
-+      while (1) {
-+              block = dx_get_block(frame->at);
-+              dxtrace(printk("Reading block %d\n", block));
-+              if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
-+                      goto errout;
-+      
-+              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+              top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize -
-+                                     EXT3_DIR_REC_LEN(0));
-+              for (; de < top; de = ext3_next_entry(de)) {
-+                      ext3fs_dirhash(de->name, de->name_len, &hinfo);
-+                      if ((hinfo.hash < start_hash) ||
-+                          ((hinfo.hash == start_hash) &&
-+                           (hinfo.minor_hash < start_minor_hash)))
-+                              continue;
-+                      if ((err = ext3_htree_store_dirent(dir_file,
-+                                 hinfo.hash, hinfo.minor_hash, de)) != 0)
-+                              goto errout;
-+                      count++;
-+              }
-+              brelse (bh);
-+              hashval = ~1;
-+              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
-+                                          frame, frames, &err, &hashval);
-+              if (next_hash)
-+                      *next_hash = hashval;
-+              if (ret == -1)
-+                      goto errout;
-+              /*
-+               * Stop if:  (a) there are no more entries, or
-+               * (b) we have inserted at least one entry and the
-+               * next hash value is not a continuation
-+               */
-+              if ((ret == 0) ||
-+                  (count && ((hashval & 1) == 0)))
-+                      break;
-+      }
-+      dx_release(frames);
-+      dxtrace(printk("Fill tree: returned %d entries\n", count));
-+      return count;
-+errout:
-+      dx_release(frames);
-+      return (err);
-+}
-+
-+
-+/*
-+ * Directory block splitting, compacting
-+ */
-+
-+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
-+                      struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
-+{
-+      int count = 0;
-+      char *base = (char *) de;
-+      struct dx_hash_info h = *hinfo;
-+      
-+      while ((char *) de < base + size)
-+      {
-+              if (de->name_len && de->inode) {
-+                      ext3fs_dirhash(de->name, de->name_len, &h);
-+                      map_tail--;
-+                      map_tail->hash = h.hash;
-+                      map_tail->offs = (u32) ((char *) de - base);
-+                      count++;
-+              }
-+              /* XXX: do we need to check rec_len == 0 case? -Chris */
-+              de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
-+      }
-+      return count;
-+}
-+
-+static void dx_sort_map (struct dx_map_entry *map, unsigned count)
-+{
-+        struct dx_map_entry *p, *q, *top = map + count - 1;
-+        int more;
-+        /* Combsort until bubble sort doesn't suck */
-+        while (count > 2)
-+      {
-+                count = count*10/13;
-+                if (count - 9 < 2) /* 9, 10 -> 11 */
-+                        count = 11;
-+                for (p = top, q = p - count; q >= map; p--, q--)
-+                        if (p->hash < q->hash)
-+                                swap(*p, *q);
-+        }
-+        /* Garden variety bubble sort */
-+        do {
-+                more = 0;
-+                q = top;
-+                while (q-- > map)
-+              {
-+                        if (q[1].hash >= q[0].hash)
-+                              continue;
-+                        swap(*(q+1), *q);
-+                        more = 1;
-+              }
-+      } while(more);
-+}
-+
-+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
-+{
-+      struct dx_entry *entries = frame->entries;
-+      struct dx_entry *old = frame->at, *new = old + 1;
-+      int count = dx_get_count(entries);
-+
-+      assert(count < dx_get_limit(entries));
-+      assert(old < entries + count);
-+      memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
-+      dx_set_hash(new, hash);
-+      dx_set_block(new, block);
-+      dx_set_count(entries, count + 1);
-+}
-+#endif
-+
-+
-+static void ext3_update_dx_flag(struct inode *inode)
-+{
-+      if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-+                                   EXT3_FEATURE_COMPAT_DIR_INDEX))
-+              EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
-+}
-+
- /*
-  * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
-  *
-@@ -94,6 +736,7 @@
-       return 0;
- }
-+
- /*
-  *    ext3_find_entry()
-  *
-@@ -105,6 +748,8 @@
-  * The returned buffer_head has ->b_count elevated.  The caller is expected
-  * to brelse() it when appropriate.
-  */
-+
-+      
- static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-                                       struct ext3_dir_entry_2 ** res_dir)
- {
-@@ -119,12 +764,32 @@
-       int num = 0;
-       int nblocks, i, err;
-       struct inode *dir = dentry->d_parent->d_inode;
-+      int namelen;
-+      const u8 *name;
-+      unsigned blocksize;
-       *res_dir = NULL;
-       sb = dir->i_sb;
--
-+      blocksize = sb->s_blocksize;
-+      namelen = dentry->d_name.len;
-+      name = dentry->d_name.name;
-+      if (namelen > EXT3_NAME_LEN)
-+              return NULL;
-+#ifdef CONFIG_EXT3_INDEX
-+      if (is_dx(dir)) {
-+              bh = ext3_dx_find_entry(dentry, res_dir, &err);
-+              /*
-+               * On success, or if the error was file not found,
-+               * return.  Otherwise, fall back to doing a search the
-+               * old fashioned way.
-+               */
-+              if (bh || (err != ERR_BAD_DX_DIR))
-+                      return bh;
-+              dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
-+      }
-+#endif
-       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
--      start = dir->u.ext3_i.i_dir_start_lookup;
-+      start = EXT3_I(dir)->i_dir_start_lookup;
-       if (start >= nblocks)
-               start = 0;
-       block = start;
-@@ -165,7 +830,7 @@
-               i = search_dirblock(bh, dir, dentry,
-                           block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
-               if (i == 1) {
--                      dir->u.ext3_i.i_dir_start_lookup = block;
-+                      EXT3_I(dir)->i_dir_start_lookup = block;
-                       ret = bh;
-                       goto cleanup_and_exit;
-               } else {
-@@ -196,6 +861,66 @@
-       return ret;
- }
-+#ifdef CONFIG_EXT3_INDEX
-+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-+                     struct ext3_dir_entry_2 **res_dir, int *err)
-+{
-+      struct super_block * sb;
-+      struct dx_hash_info     hinfo;
-+      u32 hash;
-+      struct dx_frame frames[2], *frame;
-+      struct ext3_dir_entry_2 *de, *top;
-+      struct buffer_head *bh;
-+      unsigned long block;
-+      int retval;
-+      int namelen = dentry->d_name.len;
-+      const u8 *name = dentry->d_name.name;
-+      struct inode *dir = dentry->d_parent->d_inode;
-+      
-+      sb = dir->i_sb;
-+      if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
-+              return NULL;
-+      hash = hinfo.hash;
-+      do {
-+              block = dx_get_block(frame->at);
-+              if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
-+                      goto errout;
-+              de = (struct ext3_dir_entry_2 *) bh->b_data;
-+              top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
-+                                     EXT3_DIR_REC_LEN(0));
-+              for (; de < top; de = ext3_next_entry(de))
-+              if (ext3_match (namelen, name, de)) {
-+                      if (!ext3_check_dir_entry("ext3_find_entry",
-+                                                dir, de, bh,
-+                                (block<<EXT3_BLOCK_SIZE_BITS(sb))
-+                                        +((char *)de - bh->b_data))) {
-+                              brelse (bh);
-+                              goto errout;
-+                      }
-+                      *res_dir = de;
-+                      dx_release (frames);
-+                      return bh;
-+              }
-+              brelse (bh);
-+              /* Check to see if we should continue to search */
-+              retval = ext3_htree_next_block(dir, hash, frame,
-+                                             frames, err, 0);
-+              if (retval == -1) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                           "error reading index page in directory #%lu",
-+                           dir->i_ino);
-+                      goto errout;
-+              }
-+      } while (retval == 1);
-+      
-+      *err = -ENOENT;
-+errout:
-+      dxtrace(printk("%s not found\n", name));
-+      dx_release (frames);
-+      return NULL;
-+}
-+#endif
-+
- static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
- {
-       struct inode * inode;
-@@ -212,8 +937,9 @@
-               brelse (bh);
-               inode = iget(dir->i_sb, ino);
--              if (!inode)
-+              if (!inode) {
-                       return ERR_PTR(-EACCES);
-+              }
-       }
-       d_add(dentry, inode);
-       return NULL;
-@@ -237,6 +963,301 @@
-               de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
- }
-+#ifdef CONFIG_EXT3_INDEX
-+static struct ext3_dir_entry_2 *
-+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
-+{
-+      unsigned rec_len = 0;
-+
-+      while (count--) {
-+              struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
-+              rec_len = EXT3_DIR_REC_LEN(de->name_len);
-+              memcpy (to, de, rec_len);
-+              ((struct ext3_dir_entry_2 *)to)->rec_len = cpu_to_le16(rec_len);
-+              de->inode = 0;
-+              map++;
-+              to += rec_len;
-+      }
-+      return (struct ext3_dir_entry_2 *) (to - rec_len);
-+}
-+
-+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
-+{
-+      struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
-+      unsigned rec_len = 0;
-+
-+      prev = to = de;
-+      while ((char*)de < base + size) {
-+              next = (struct ext3_dir_entry_2 *) ((char *) de +
-+                                                  le16_to_cpu(de->rec_len));
-+              if (de->inode && de->name_len) {
-+                      rec_len = EXT3_DIR_REC_LEN(de->name_len);
-+                      if (de > to)
-+                              memmove(to, de, rec_len);
-+                      to->rec_len = cpu_to_le16(rec_len);
-+                      prev = to;
-+                      to = (struct ext3_dir_entry_2 *)((char *)to + rec_len);
-+              }
-+              de = next;
-+      }
-+      return prev;
-+}
-+
-+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
-+                      struct buffer_head **bh,struct dx_frame *frame,
-+                      struct dx_hash_info *hinfo, int *error)
-+{
-+      unsigned blocksize = dir->i_sb->s_blocksize;
-+      unsigned count, continued;
-+      struct buffer_head *bh2;
-+      u32 newblock;
-+      u32 hash2;
-+      struct dx_map_entry *map;
-+      char *data1 = (*bh)->b_data, *data2;
-+      unsigned split;
-+      struct ext3_dir_entry_2 *de = NULL, *de2;
-+      int     err;
-+
-+      bh2 = ext3_append (handle, dir, &newblock, error);
-+      if (!(bh2)) {
-+              brelse(*bh);
-+              *bh = NULL;
-+              goto errout;
-+      }
-+
-+      BUFFER_TRACE(*bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, *bh);
-+      if (err) {
-+      journal_error:
-+              brelse(*bh);
-+              brelse(bh2);
-+              *bh = NULL;
-+              ext3_std_error(dir->i_sb, err);
-+              goto errout;
-+      }
-+      BUFFER_TRACE(frame->bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, frame->bh);
-+      if (err)
-+              goto journal_error;
-+
-+      data2 = bh2->b_data;
-+
-+      /* create map in the end of data2 block */
-+      map = (struct dx_map_entry *) (data2 + blocksize);
-+      count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
-+                           blocksize, hinfo, map);
-+      map -= count;
-+      split = count/2; // need to adjust to actual middle
-+      dx_sort_map (map, count);
-+      hash2 = map[split].hash;
-+      continued = hash2 == map[split - 1].hash;
-+      dxtrace(printk("Split block %i at %x, %i/%i\n",
-+              dx_get_block(frame->at), hash2, split, count-split));
-+
-+      /* Fancy dance to stay within two buffers */
-+      de2 = dx_move_dirents(data1, data2, map + split, count - split);
-+      de = dx_pack_dirents(data1,blocksize);
-+      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+      de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-+      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
-+      dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
-+
-+      /* Which block gets the new entry? */
-+      if (hinfo->hash >= hash2)
-+      {
-+              swap(*bh, bh2);
-+              de = de2;
-+      }
-+      dx_insert_block (frame, hash2 + continued, newblock);
-+      err = ext3_journal_dirty_metadata (handle, bh2);
-+      if (err)
-+              goto journal_error;
-+      err = ext3_journal_dirty_metadata (handle, frame->bh);
-+      if (err)
-+              goto journal_error;
-+      brelse (bh2);
-+      dxtrace(dx_show_index ("frame", frame->entries));
-+errout:
-+      return de;
-+}
-+#endif
-+
-+
-+/*
-+ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
-+ * it points to a directory entry which is guaranteed to be large
-+ * enough for new directory entry.  If de is NULL, then
-+ * add_dirent_to_buf will attempt search the directory block for
-+ * space.  It will return -ENOSPC if no space is available, and -EIO
-+ * and -EEXIST if directory entry already exists.
-+ * 
-+ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
-+ * all other cases bh is released.
-+ */
-+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode, struct ext3_dir_entry_2 *de,
-+                           struct buffer_head * bh)
-+{
-+      struct inode    *dir = dentry->d_parent->d_inode;
-+      const char      *name = dentry->d_name.name;
-+      int             namelen = dentry->d_name.len;
-+      unsigned long   offset = 0;
-+      unsigned short  reclen;
-+      int             nlen, rlen, err;
-+      char            *top;
-+      
-+      reclen = EXT3_DIR_REC_LEN(namelen);
-+      if (!de) {
-+              de = (struct ext3_dir_entry_2 *)bh->b_data;
-+              top = bh->b_data + dir->i_sb->s_blocksize - reclen;
-+              while ((char *) de <= top) {
-+                      if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
-+                                                bh, offset)) {
-+                              brelse (bh);
-+                              return -EIO;
-+                      }
-+                      if (ext3_match (namelen, name, de)) {
-+                              brelse (bh);
-+                              return -EEXIST;
-+                      }
-+                      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+                      rlen = le16_to_cpu(de->rec_len);
-+                      if ((de->inode? rlen - nlen: rlen) >= reclen)
-+                              break;
-+                      de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
-+                      offset += rlen;
-+              }
-+              if ((char *) de > top)
-+                      return -ENOSPC;
-+      }
-+      BUFFER_TRACE(bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, bh);
-+      if (err) {
-+              ext3_std_error(dir->i_sb, err);
-+              brelse(bh);
-+              return err;
-+      }
-+      
-+      /* By now the buffer is marked for journaling */
-+      nlen = EXT3_DIR_REC_LEN(de->name_len);
-+      rlen = le16_to_cpu(de->rec_len);
-+      if (de->inode) {
-+              struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
-+              de1->rec_len = cpu_to_le16(rlen - nlen);
-+              de->rec_len = cpu_to_le16(nlen);
-+              de = de1;
-+      }
-+      de->file_type = EXT3_FT_UNKNOWN;
-+      if (inode) {
-+              de->inode = cpu_to_le32(inode->i_ino);
-+              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
-+      } else
-+              de->inode = 0;
-+      de->name_len = namelen;
-+      memcpy (de->name, name, namelen);
-+      /*
-+       * XXX shouldn't update any times until successful
-+       * completion of syscall, but too many callers depend
-+       * on this.
-+       *
-+       * XXX similarly, too many callers depend on
-+       * ext3_new_inode() setting the times, but error
-+       * recovery deletes the inode, so the worst that can
-+       * happen is that the times are slightly out of date
-+       * and/or different from the directory change time.
-+       */
-+      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
-+      ext3_update_dx_flag(dir);
-+      dir->i_version = ++event;
-+      ext3_mark_inode_dirty(handle, dir);
-+      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+      err = ext3_journal_dirty_metadata(handle, bh);
-+      if (err)
-+              ext3_std_error(dir->i_sb, err);
-+      brelse(bh);
-+      return 0;
-+}
-+
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * This converts a one block unindexed directory to a 3 block indexed
-+ * directory, and adds the dentry to the indexed directory.
-+ */
-+static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
-+                          struct inode *inode, struct buffer_head *bh)
-+{
-+      struct inode    *dir = dentry->d_parent->d_inode;
-+      const char      *name = dentry->d_name.name;
-+      int             namelen = dentry->d_name.len;
-+      struct buffer_head *bh2;
-+      struct dx_root  *root;
-+      struct dx_frame frames[2], *frame;
-+      struct dx_entry *entries;
-+      struct ext3_dir_entry_2 *de, *de2;
-+      char            *data1, *top;
-+      unsigned        len;
-+      int             retval;
-+      unsigned        blocksize;
-+      struct dx_hash_info hinfo;
-+      u32             block;
-+              
-+      blocksize =  dir->i_sb->s_blocksize;
-+      dxtrace(printk("Creating index\n"));
-+      retval = ext3_journal_get_write_access(handle, bh);
-+      if (retval) {
-+              ext3_std_error(dir->i_sb, retval);
-+              brelse(bh);
-+              return retval;
-+      }
-+      root = (struct dx_root *) bh->b_data;
-+              
-+      EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
-+      bh2 = ext3_append (handle, dir, &block, &retval);
-+      if (!(bh2)) {
-+              brelse(bh);
-+              return retval;
-+      }
-+      data1 = bh2->b_data;
-+
-+      /* The 0th block becomes the root, move the dirents out */
-+      de = (struct ext3_dir_entry_2 *)&root->dotdot;
-+      de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len));
-+      len = ((char *) root) + blocksize - (char *) de;
-+      memcpy (data1, de, len);
-+      de = (struct ext3_dir_entry_2 *) data1;
-+      top = data1 + len;
-+      while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top)
-+              de = de2;
-+      de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
-+      /* Initialize the root; the dot dirents already exist */
-+      de = (struct ext3_dir_entry_2 *) (&root->dotdot);
-+      de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
-+      memset (&root->info, 0, sizeof(root->info));
-+      root->info.info_length = sizeof(root->info);
-+      root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version;
-+      entries = root->entries;
-+      dx_set_block (entries, 1);
-+      dx_set_count (entries, 1);
-+      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
-+
-+      /* Initialize as for dx_probe */
-+      hinfo.hash_version = root->info.hash_version;
-+      hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed;
-+      ext3fs_dirhash(name, namelen, &hinfo);
-+      frame = frames;
-+      frame->entries = entries;
-+      frame->at = entries;
-+      frame->bh = bh;
-+      bh = bh2;
-+      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-+      dx_release (frames);
-+      if (!(de))
-+              return retval;
-+
-+      return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+}
-+#endif
-+
- /*
-  *    ext3_add_entry()
-  *
-@@ -247,127 +1268,198 @@
-  * may not sleep between calling this and putting something into
-  * the entry, as someone else might have used it while you slept.
-  */
--
--/*
-- * AKPM: the journalling code here looks wrong on the error paths
-- */
- static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
-       struct inode *inode)
- {
-       struct inode *dir = dentry->d_parent->d_inode;
--      const char *name = dentry->d_name.name;
--      int namelen = dentry->d_name.len;
-       unsigned long offset;
--      unsigned short rec_len;
-       struct buffer_head * bh;
--      struct ext3_dir_entry_2 * de, * de1;
-+      struct ext3_dir_entry_2 *de;
-       struct super_block * sb;
-       int     retval;
-+#ifdef CONFIG_EXT3_INDEX
-+      int     dx_fallback=0;
-+#endif
-+      unsigned blocksize;
-+      unsigned nlen, rlen;
-+      u32 block, blocks;
-       sb = dir->i_sb;
--
--      if (!namelen)
-+      blocksize = sb->s_blocksize;
-+      if (!dentry->d_name.len)
-               return -EINVAL;
--      bh = ext3_bread (handle, dir, 0, 0, &retval);
-+#ifdef CONFIG_EXT3_INDEX
-+      if (is_dx(dir)) {
-+              retval = ext3_dx_add_entry(handle, dentry, inode);
-+              if (!retval || (retval != ERR_BAD_DX_DIR))
-+                      return retval;
-+              EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
-+              dx_fallback++;
-+              ext3_mark_inode_dirty(handle, dir);
-+      }
-+#endif
-+      blocks = dir->i_size >> sb->s_blocksize_bits;
-+      for (block = 0, offset = 0; block < blocks; block++) {
-+              bh = ext3_bread(handle, dir, block, 0, &retval);
-+              if(!bh)
-+                      return retval;
-+              retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
-+              if (retval != -ENOSPC)
-+                      return retval;
-+
-+#ifdef CONFIG_EXT3_INDEX
-+              if (blocks == 1 && !dx_fallback &&
-+                  EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
-+                      return make_indexed_dir(handle, dentry, inode, bh);
-+#endif
-+              brelse(bh);
-+      }
-+      bh = ext3_append(handle, dir, &block, &retval);
-       if (!bh)
-               return retval;
--      rec_len = EXT3_DIR_REC_LEN(namelen);
--      offset = 0;
-       de = (struct ext3_dir_entry_2 *) bh->b_data;
--      while (1) {
--              if ((char *)de >= sb->s_blocksize + bh->b_data) {
--                      brelse (bh);
--                      bh = NULL;
--                      bh = ext3_bread (handle, dir,
--                              offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval);
--                      if (!bh)
--                              return retval;
--                      if (dir->i_size <= offset) {
--                              if (dir->i_size == 0) {
--                                      brelse(bh);
--                                      return -ENOENT;
--                              }
-+      de->inode = 0;
-+      de->rec_len = cpu_to_le16(rlen = blocksize);
-+      nlen = 0;
-+      return add_dirent_to_buf(handle, dentry, inode, de, bh);
-+}
--                              ext3_debug ("creating next block\n");
-+#ifdef CONFIG_EXT3_INDEX
-+/*
-+ * Returns 0 for success, or a negative error value
-+ */
-+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode)
-+{
-+      struct dx_frame frames[2], *frame;
-+      struct dx_entry *entries, *at;
-+      struct dx_hash_info hinfo;
-+      struct buffer_head * bh;
-+      struct inode *dir = dentry->d_parent->d_inode;
-+      struct super_block * sb = dir->i_sb;
-+      struct ext3_dir_entry_2 *de;
-+      int err;
--                              BUFFER_TRACE(bh, "get_write_access");
--                              ext3_journal_get_write_access(handle, bh);
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
--                              de->inode = 0;
--                              de->rec_len = le16_to_cpu(sb->s_blocksize);
--                              dir->u.ext3_i.i_disksize =
--                                      dir->i_size = offset + sb->s_blocksize;
--                              dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                              ext3_mark_inode_dirty(handle, dir);
--                      } else {
-+      frame = dx_probe(dentry, 0, &hinfo, frames, &err);
-+      if (!frame)
-+              return err;
-+      entries = frame->entries;
-+      at = frame->at;
--                              ext3_debug ("skipping to next block\n");
-+      if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
-+              goto cleanup;
--                              de = (struct ext3_dir_entry_2 *) bh->b_data;
--                      }
--              }
--              if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh,
--                                         offset)) {
--                      brelse (bh);
--                      return -ENOENT;
--              }
--              if (ext3_match (namelen, name, de)) {
--                              brelse (bh);
--                              return -EEXIST;
-+      BUFFER_TRACE(bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, bh);
-+      if (err)
-+              goto journal_error;
-+
-+      err = add_dirent_to_buf(handle, dentry, inode, 0, bh);
-+      if (err != -ENOSPC) {
-+              bh = 0;
-+              goto cleanup;
-+      }
-+
-+      /* Block full, should compress but for now just split */
-+      dxtrace(printk("using %u of %u node entries\n",
-+                     dx_get_count(entries), dx_get_limit(entries)));
-+      /* Need to split index? */
-+      if (dx_get_count(entries) == dx_get_limit(entries)) {
-+              u32 newblock;
-+              unsigned icount = dx_get_count(entries);
-+              int levels = frame - frames;
-+              struct dx_entry *entries2;
-+              struct dx_node *node2;
-+              struct buffer_head *bh2;
-+
-+              if (levels && (dx_get_count(frames->entries) ==
-+                             dx_get_limit(frames->entries))) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                                   "Directory index full!\n");
-+                      err = -ENOSPC;
-+                      goto cleanup;
-               }
--              if ((le32_to_cpu(de->inode) == 0 &&
--                              le16_to_cpu(de->rec_len) >= rec_len) ||
--                  (le16_to_cpu(de->rec_len) >=
--                              EXT3_DIR_REC_LEN(de->name_len) + rec_len)) {
--                      BUFFER_TRACE(bh, "get_write_access");
--                      ext3_journal_get_write_access(handle, bh);
--                      /* By now the buffer is marked for journaling */
--                      offset += le16_to_cpu(de->rec_len);
--                      if (le32_to_cpu(de->inode)) {
--                              de1 = (struct ext3_dir_entry_2 *) ((char *) de +
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de1->rec_len =
--                                      cpu_to_le16(le16_to_cpu(de->rec_len) -
--                                      EXT3_DIR_REC_LEN(de->name_len));
--                              de->rec_len = cpu_to_le16(
--                                              EXT3_DIR_REC_LEN(de->name_len));
--                              de = de1;
-+              bh2 = ext3_append (handle, dir, &newblock, &err);
-+              if (!(bh2))
-+                      goto cleanup;
-+              node2 = (struct dx_node *)(bh2->b_data);
-+              entries2 = node2->entries;
-+              node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
-+              node2->fake.inode = 0;
-+              BUFFER_TRACE(frame->bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, frame->bh);
-+              if (err)
-+                      goto journal_error;
-+              if (levels) {
-+                      unsigned icount1 = icount/2, icount2 = icount - icount1;
-+                      unsigned hash2 = dx_get_hash(entries + icount1);
-+                      dxtrace(printk("Split index %i/%i\n", icount1, icount2));
-+                              
-+                      BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-+                      err = ext3_journal_get_write_access(handle,
-+                                                           frames[0].bh);
-+                      if (err)
-+                              goto journal_error;
-+                              
-+                      memcpy ((char *) entries2, (char *) (entries + icount1),
-+                              icount2 * sizeof(struct dx_entry));
-+                      dx_set_count (entries, icount1);
-+                      dx_set_count (entries2, icount2);
-+                      dx_set_limit (entries2, dx_node_limit(dir));
-+
-+                      /* Which index block gets the new entry? */
-+                      if (at - entries >= icount1) {
-+                              frame->at = at = at - entries - icount1 + entries2;
-+                              frame->entries = entries = entries2;
-+                              swap(frame->bh, bh2);
-                       }
--                      de->file_type = EXT3_FT_UNKNOWN;
--                      if (inode) {
--                              de->inode = cpu_to_le32(inode->i_ino);
--                              ext3_set_de_type(dir->i_sb, de, inode->i_mode);
--                      } else
--                              de->inode = 0;
--                      de->name_len = namelen;
--                      memcpy (de->name, name, namelen);
--                      /*
--                       * XXX shouldn't update any times until successful
--                       * completion of syscall, but too many callers depend
--                       * on this.
--                       *
--                       * XXX similarly, too many callers depend on
--                       * ext3_new_inode() setting the times, but error
--                       * recovery deletes the inode, so the worst that can
--                       * happen is that the times are slightly out of date
--                       * and/or different from the directory change time.
--                       */
--                      dir->i_mtime = dir->i_ctime = CURRENT_TIME;
--                      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
--                      ext3_mark_inode_dirty(handle, dir);
--                      dir->i_version = ++event;
--                      BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
--                      ext3_journal_dirty_metadata(handle, bh);
--                      brelse(bh);
--                      return 0;
-+                      dx_insert_block (frames + 0, hash2, newblock);
-+                      dxtrace(dx_show_index ("node", frames[1].entries));
-+                      dxtrace(dx_show_index ("node",
-+                             ((struct dx_node *) bh2->b_data)->entries));
-+                      err = ext3_journal_dirty_metadata(handle, bh2);
-+                      if (err)
-+                              goto journal_error;
-+                      brelse (bh2);
-+              } else {
-+                      dxtrace(printk("Creating second level index...\n"));
-+                      memcpy((char *) entries2, (char *) entries,
-+                             icount * sizeof(struct dx_entry));
-+                      dx_set_limit(entries2, dx_node_limit(dir));
-+
-+                      /* Set up root */
-+                      dx_set_count(entries, 1);
-+                      dx_set_block(entries + 0, newblock);
-+                      ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-+
-+                      /* Add new access path frame */
-+                      frame = frames + 1;
-+                      frame->at = at = at - entries + entries2;
-+                      frame->entries = entries = entries2;
-+                      frame->bh = bh2;
-+                      err = ext3_journal_get_write_access(handle,
-+                                                           frame->bh);
-+                      if (err)
-+                              goto journal_error;
-               }
--              offset += le16_to_cpu(de->rec_len);
--              de = (struct ext3_dir_entry_2 *)
--                      ((char *) de + le16_to_cpu(de->rec_len));
-+              ext3_journal_dirty_metadata(handle, frames[0].bh);
-       }
--      brelse (bh);
--      return -ENOSPC;
-+      de = do_split(handle, dir, &bh, frame, &hinfo, &err);
-+      if (!de)
-+              goto cleanup;
-+      err = add_dirent_to_buf(handle, dentry, inode, de, bh);
-+      bh = 0;
-+      goto cleanup;
-+      
-+journal_error:
-+      ext3_std_error(dir->i_sb, err);
-+cleanup:
-+      if (bh)
-+              brelse(bh);
-+      dx_release(frames);
-+      return err;
- }
-+#endif
- /*
-  * ext3_delete_entry deletes a directory entry by merging it with the
-@@ -451,9 +1543,11 @@
-       struct inode * inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -478,9 +1572,11 @@
-       struct inode *inode;
-       int err;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -507,9 +1603,11 @@
-       if (dir->i_nlink >= EXT3_LINK_MAX)
-               return -EMLINK;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -521,7 +1619,7 @@
-       inode->i_op = &ext3_dir_inode_operations;
-       inode->i_fop = &ext3_dir_operations;
--      inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize;
-+      inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-       inode->i_blocks = 0;    
-       dir_block = ext3_bread (handle, inode, 0, 1, &err);
-       if (!dir_block) {
-@@ -554,21 +1652,19 @@
-               inode->i_mode |= S_ISGID;
-       ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_entry (handle, dentry, inode);
--      if (err)
--              goto out_no_entry;
-+      if (err) {
-+              inode->i_nlink = 0;
-+              ext3_mark_inode_dirty(handle, inode);
-+              iput (inode);
-+              goto out_stop;
-+      }
-       dir->i_nlink++;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       d_instantiate(dentry, inode);
- out_stop:
-       ext3_journal_stop(handle, dir);
-       return err;
--
--out_no_entry:
--      inode->i_nlink = 0;
--      ext3_mark_inode_dirty(handle, inode);
--      iput (inode);
--      goto out_stop;
- }
- /*
-@@ -655,7 +1751,7 @@
-       int err = 0, rc;
-       
-       lock_super(sb);
--      if (!list_empty(&inode->u.ext3_i.i_orphan))
-+      if (!list_empty(&EXT3_I(inode)->i_orphan))
-               goto out_unlock;
-       /* Orphan handling is only valid for files with data blocks
-@@ -696,7 +1792,7 @@
-        * This is safe: on error we're going to ignore the orphan list
-        * anyway on the next recovery. */
-       if (!err)
--              list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan);
-+              list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
-       jbd_debug(4, "superblock will point to %ld\n", inode->i_ino);
-       jbd_debug(4, "orphan inode %ld will point to %d\n",
-@@ -714,25 +1810,26 @@
- int ext3_orphan_del(handle_t *handle, struct inode *inode)
- {
-       struct list_head *prev;
-+      struct ext3_inode_info *ei = EXT3_I(inode);
-       struct ext3_sb_info *sbi;
-       ino_t ino_next; 
-       struct ext3_iloc iloc;
-       int err = 0;
-       
-       lock_super(inode->i_sb);
--      if (list_empty(&inode->u.ext3_i.i_orphan)) {
-+      if (list_empty(&ei->i_orphan)) {
-               unlock_super(inode->i_sb);
-               return 0;
-       }
-       ino_next = NEXT_ORPHAN(inode);
--      prev = inode->u.ext3_i.i_orphan.prev;
-+      prev = ei->i_orphan.prev;
-       sbi = EXT3_SB(inode->i_sb);
-       jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino);
--      list_del(&inode->u.ext3_i.i_orphan);
--      INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
-+      list_del(&ei->i_orphan);
-+      INIT_LIST_HEAD(&ei->i_orphan);
-       /* If we're on an error path, we may not have a valid
-        * transaction handle with which to update the orphan list on
-@@ -793,8 +1890,9 @@
-       handle_t *handle;
-       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       retval = -ENOENT;
-       bh = ext3_find_entry (dentry, &de);
-@@ -832,7 +1930,7 @@
-       ext3_mark_inode_dirty(handle, inode);
-       dir->i_nlink--;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
- end_rmdir:
-@@ -850,8 +1948,9 @@
-       handle_t *handle;
-       handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -878,7 +1977,7 @@
-       if (retval)
-               goto end_unlink;
-       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
--      dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       inode->i_nlink--;
-       if (!inode->i_nlink)
-@@ -904,9 +2003,11 @@
-       if (l > dir->i_sb->s_blocksize)
-               return -ENAMETOOLONG;
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -916,7 +2017,7 @@
-       if (IS_ERR(inode))
-               goto out_stop;
--      if (l > sizeof (inode->u.ext3_i.i_data)) {
-+      if (l > sizeof (EXT3_I(inode)->i_data)) {
-               inode->i_op = &page_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
-               /*
-@@ -925,8 +2026,12 @@
-                * i_size in generic_commit_write().
-                */
-               err = block_symlink(inode, symname, l);
--              if (err)
--                      goto out_no_entry;
-+              if (err) {
-+                      ext3_dec_count(handle, inode);
-+                      ext3_mark_inode_dirty(handle, inode);
-+                      iput (inode);
-+                      goto out_stop;
-+              }
-       } else {
-               inode->i_op = &ext3_fast_symlink_inode_operations;
-               memcpy((char*)&inode->u.ext3_i.i_data,symname,l);
-@@ -938,12 +2043,6 @@
- out_stop:
-       ext3_journal_stop(handle, dir);
-       return err;
--
--out_no_entry:
--      ext3_dec_count(handle, inode);
--      ext3_mark_inode_dirty(handle, inode);
--      iput (inode);
--      goto out_stop;
- }
- static int ext3_link (struct dentry * old_dentry,
-@@ -956,12 +2055,15 @@
-       if (S_ISDIR(inode->i_mode))
-               return -EPERM;
--      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (inode->i_nlink >= EXT3_LINK_MAX) {
-               return -EMLINK;
-+      }
--      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
-@@ -995,9 +2097,11 @@
-       old_bh = new_bh = dir_bh = NULL;
--      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2);
--      if (IS_ERR(handle))
-+      handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
-+                                      EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
-+      if (IS_ERR(handle)) {
-               return PTR_ERR(handle);
-+      }
-       if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
-               handle->h_sync = 1;
-@@ -1070,14 +2174,33 @@
-       /*
-        * ok, that's it
-        */
--      ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      retval = ext3_delete_entry(handle, old_dir, old_de, old_bh);
-+      if (retval == -ENOENT) {
-+              /*
-+               * old_de could have moved out from under us.
-+               */
-+              struct buffer_head *old_bh2;
-+              struct ext3_dir_entry_2 *old_de2;
-+              
-+              old_bh2 = ext3_find_entry(old_dentry, &old_de2);
-+              if (old_bh2) {
-+                      retval = ext3_delete_entry(handle, old_dir,
-+                                                 old_de2, old_bh2);
-+                      brelse(old_bh2);
-+              }
-+      }
-+      if (retval) {
-+              ext3_warning(old_dir->i_sb, "ext3_rename",
-+                              "Deleting old file (%lu), %d, error=%d",
-+                              old_dir->i_ino, old_dir->i_nlink, retval);
-+      }
-       if (new_inode) {
-               new_inode->i_nlink--;
-               new_inode->i_ctime = CURRENT_TIME;
-       }
-       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
--      old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+      ext3_update_dx_flag(old_dir);
-       if (dir_bh) {
-               BUFFER_TRACE(dir_bh, "get_write_access");
-               ext3_journal_get_write_access(handle, dir_bh);
-@@ -1089,7 +2212,7 @@
-                       new_inode->i_nlink--;
-               } else {
-                       new_dir->i_nlink++;
--                      new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
-+                      ext3_update_dx_flag(new_dir);
-                       ext3_mark_inode_dirty(handle, new_dir);
-               }
-       }
-Index: linux.mcp2/fs/ext3/super.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/super.c    2004-05-17 15:03:55.000000000 -0700
-+++ linux.mcp2/fs/ext3/super.c 2004-05-17 15:08:50.000000000 -0700
-@@ -702,6 +702,7 @@
-       es->s_mtime = cpu_to_le32(CURRENT_TIME);
-       ext3_update_dynamic_rev(sb);
-       EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-+
-       ext3_commit_super (sb, es, 1);
-       if (test_opt (sb, DEBUG))
-               printk (KERN_INFO
-@@ -712,6 +713,7 @@
-                       EXT3_BLOCKS_PER_GROUP(sb),
-                       EXT3_INODES_PER_GROUP(sb),
-                       sbi->s_mount_opt);
-+
-       printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ",
-                               bdevname(sb->s_dev));
-       if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
-@@ -886,6 +888,7 @@
-       return res;
- }
-+
- struct super_block * ext3_read_super (struct super_block * sb, void * data,
-                                     int silent)
- {
-@@ -1062,6 +1065,9 @@
-       sbi->s_mount_state = le16_to_cpu(es->s_state);
-       sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-       sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
-+      for (i=0; i < 4; i++)
-+              sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
-+      sbi->s_def_hash_version = es->s_def_hash_version;
-       if (sbi->s_blocks_per_group > blocksize * 8) {
-               printk (KERN_ERR
-@@ -1744,7 +1750,7 @@
-       unregister_filesystem(&ext3_fs_type);
- }
--EXPORT_NO_SYMBOLS;
-+EXPORT_SYMBOL(ext3_force_commit);
- MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
- MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
-Index: linux.mcp2/include/linux/ext3_fs.h
-===================================================================
---- linux.mcp2.orig/include/linux/ext3_fs.h    2004-05-17 14:53:17.000000000 -0700
-+++ linux.mcp2/include/linux/ext3_fs.h 2004-05-17 15:07:07.000000000 -0700
-@@ -40,6 +40,11 @@
- #define EXT3FS_VERSION                "2.4-0.9.17"
- /*
-+ * Always enable hashed directories
-+ */
-+#define CONFIG_EXT3_INDEX
-+
-+/*
-  * Debug code
-  */
- #ifdef EXT3FS_DEBUG
-@@ -437,8 +442,11 @@
- /*E0*/        __u32   s_journal_inum;         /* inode number of journal file */
-       __u32   s_journal_dev;          /* device number of journal file */
-       __u32   s_last_orphan;          /* start of list of inodes to delete */
--
--/*EC*/        __u32   s_reserved[197];        /* Padding to the end of the block */
-+      __u32   s_hash_seed[4];         /* HTREE hash seed */
-+      __u8    s_def_hash_version;     /* Default hash version to use */
-+      __u8    s_reserved_char_pad;
-+      __u16   s_reserved_word_pad;
-+      __u32   s_reserved[192];        /* Padding to the end of the block */
- };
- #ifdef __KERNEL__
-@@ -575,9 +583,46 @@
- #define EXT3_DIR_ROUND                        (EXT3_DIR_PAD - 1)
- #define EXT3_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT3_DIR_ROUND) & \
-                                        ~EXT3_DIR_ROUND)
-+/*
-+ * Hash Tree Directory indexing
-+ * (c) Daniel Phillips, 2001
-+ */
-+
-+#ifdef CONFIG_EXT3_INDEX
-+  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+                    (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
-+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#else
-+  #define is_dx(dir) 0
-+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
-+#endif
-+
-+/* Legal values for the dx_root hash_version field: */
-+
-+#define DX_HASH_LEGACY                0
-+#define DX_HASH_HALF_MD4      1
-+#define DX_HASH_TEA           2
-+
-+/* hash info structure used by the directory hash */
-+struct dx_hash_info
-+{
-+      u32             hash;
-+      u32             minor_hash;
-+      int             hash_version;
-+      u32             *seed;
-+};
- #ifdef __KERNEL__
- /*
-+ * Control parameters used by ext3_htree_next_block
-+ */
-+#define HASH_NB_ALWAYS                1
-+
-+
-+/*
-  * Describe an inode's exact location on disk and in memory
-  */
- struct ext3_iloc
-@@ -587,6 +632,27 @@
-       unsigned long block_group;
- };
-+
-+/*
-+ * This structure is stuffed into the struct file's private_data field
-+ * for directories.  It is where we put information so that we can do
-+ * readdir operations in hash tree order.
-+ */
-+struct dir_private_info {
-+      rb_root_t       root;
-+      rb_node_t       *curr_node;
-+      struct fname    *extra_fname;
-+      loff_t          last_pos;
-+      __u32           curr_hash;
-+      __u32           curr_minor_hash;
-+      __u32           next_hash;
-+};
-+
-+/*
-+ * Special error return code only used by dx_probe() and its callers.
-+ */
-+#define ERR_BAD_DX_DIR        -75000
-+
- /*
-  * Function prototypes
-  */
-@@ -614,11 +680,20 @@
- /* dir.c */
- extern int ext3_check_dir_entry(const char *, struct inode *,
--                              struct ext3_dir_entry_2 *, struct buffer_head *,
--                              unsigned long);
-+                              struct ext3_dir_entry_2 *,
-+                              struct buffer_head *, unsigned long);
-+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
-+                                  __u32 minor_hash,
-+                                  struct ext3_dir_entry_2 *dirent);
-+extern void ext3_htree_free_dir_info(struct dir_private_info *p);
-+
- /* fsync.c */
- extern int ext3_sync_file (struct file *, struct dentry *, int);
-+/* hash.c */
-+extern int ext3fs_dirhash(const char *name, int len, struct
-+                        dx_hash_info *hinfo);
-+
- /* ialloc.c */
- extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int);
- extern void ext3_free_inode (handle_t *, struct inode *);
-@@ -650,6 +725,8 @@
- /* namei.c */
- extern int ext3_orphan_add(handle_t *, struct inode *);
- extern int ext3_orphan_del(handle_t *, struct inode *);
-+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-+                              __u32 start_minor_hash, __u32 *next_hash);
- /* super.c */
- extern void ext3_error (struct super_block *, const char *, const char *, ...)
-Index: linux.mcp2/include/linux/ext3_fs_sb.h
-===================================================================
---- linux.mcp2.orig/include/linux/ext3_fs_sb.h 2004-05-17 14:41:25.000000000 -0700
-+++ linux.mcp2/include/linux/ext3_fs_sb.h      2004-05-17 15:07:07.000000000 -0700
-@@ -62,6 +62,8 @@
-       int s_inode_size;
-       int s_first_ino;
-       u32 s_next_generation;
-+      u32 s_hash_seed[4];
-+      int s_def_hash_version;
-       /* Journaling */
-       struct inode * s_journal_inode;
-Index: linux.mcp2/include/linux/ext3_jbd.h
-===================================================================
---- linux.mcp2.orig/include/linux/ext3_jbd.h   2004-05-17 14:53:17.000000000 -0700
-+++ linux.mcp2/include/linux/ext3_jbd.h        2004-05-17 15:07:07.000000000 -0700
-@@ -63,6 +63,8 @@
- #define EXT3_RESERVE_TRANS_BLOCKS     12
-+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
-+
- int
- ext3_mark_iloc_dirty(handle_t *handle, 
-                    struct inode *inode,
-Index: linux.mcp2/include/linux/rbtree.h
-===================================================================
---- linux.mcp2.orig/include/linux/rbtree.h     2004-05-17 14:41:25.000000000 -0700
-+++ linux.mcp2/include/linux/rbtree.h  2004-05-17 15:07:07.000000000 -0700
-@@ -120,6 +120,8 @@
- extern void rb_insert_color(rb_node_t *, rb_root_t *);
- extern void rb_erase(rb_node_t *, rb_root_t *);
-+extern rb_node_t *rb_get_first(rb_root_t *root);
-+extern rb_node_t *rb_get_next(rb_node_t *n);
- static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link)
- {
-Index: linux.mcp2/lib/rbtree.c
-===================================================================
---- linux.mcp2.orig/lib/rbtree.c       2004-01-19 07:49:44.000000000 -0800
-+++ linux.mcp2/lib/rbtree.c    2004-05-17 15:10:39.000000000 -0700
-@@ -17,6 +17,8 @@
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-   linux/lib/rbtree.c
-+
-+  rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002
- */
- #include <linux/rbtree.h>
-@@ -294,3 +296,42 @@
-               __rb_erase_color(child, parent, root);
- }
- EXPORT_SYMBOL(rb_erase);
-+
-+/*
-+ * This function returns the first node (in sort order) of the tree.
-+ */
-+rb_node_t *rb_get_first(rb_root_t *root)
-+{
-+      rb_node_t       *n;
-+
-+      n = root->rb_node;
-+      if (!n)
-+              return 0;
-+      while (n->rb_left)
-+              n = n->rb_left;
-+      return n;
-+}
-+EXPORT_SYMBOL(rb_get_first);
-+
-+/*
-+ * Given a node, this function will return the next node in the tree.
-+ */
-+rb_node_t *rb_get_next(rb_node_t *n)
-+{
-+      rb_node_t       *parent;
-+
-+      if (n->rb_right) {
-+              n = n->rb_right;
-+              while (n->rb_left)
-+                      n = n->rb_left;
-+              return n;
-+      } else {
-+              while ((parent = n->rb_parent)) {
-+                      if (n == parent->rb_left)
-+                              return parent;
-+                      n = parent;
-+              }
-+              return 0;
-+      }
-+}
-+EXPORT_SYMBOL(rb_get_next);
index 49528cf..52e5521 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-stage/include/linux/ext3_fs.h
 --- linux-stage.orig/include/linux/ext3_fs.h   2005-02-25 14:53:56.424908168 +0200
 +++ linux-stage/include/linux/ext3_fs.h        2005-02-25 14:53:59.376459464 +0200
 @@ -361,12 +361,13 @@
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 -#ifndef _LINUX_EXT2_FS_H
index acf97dd..1ac944b 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-stage/include/linux/ext3_fs.h
 --- linux-stage.orig/include/linux/ext3_fs.h   2004-04-02 16:43:37.000000000 -0500
 +++ linux-stage/include/linux/ext3_fs.h        2004-04-02 16:43:37.000000000 -0500
 @@ -331,12 +331,13 @@
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 -#ifndef _LINUX_EXT2_FS_H
index 0594199..bb9928a 100644 (file)
@@ -18,9 +18,9 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -339,6 +347,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
@@ -126,11 +126,11 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
 @@ -543,7 +544,7 @@ enum {
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-       Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_err, Opt_extents, Opt_extdebug
-+      Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc
+       Opt_ignore, Opt_barrier,
+       Opt_err,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
@@ -139,9 +139,9 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
 @@ -811,6 +813,9 @@ static int parse_options (char * options
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
@@ -334,7 +334,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c   2005-12-09 13:08:53.191437750 +0300
 +++ linux-2.6.5-7.201/fs/ext3/mballoc.c        2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2435 @@
+@@ -0,0 +1,2430 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -899,10 +899,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1664,8 +1666,6 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1944,10 +1944,6 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2769,18 +2765,16 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.5-7.201/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.5-7.201.orig/fs/ext3/Makefile    2005-12-17 02:53:30.000000000 +0300
 +++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
--         ioctl.o namei.o super.o symlink.o hash.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o extents.o \
-+         mballoc.o
+          ioctl.o namei.o super.o symlink.o hash.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
index 2e6a6f4..a2b9caf 100644 (file)
@@ -18,7 +18,7 @@ Index: linux-2.6.12.6/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -366,6 +374,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
  #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
@@ -122,16 +122,17 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -597,6 +598,7 @@ enum {
+@@ -597,7 +598,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
-+      Opt_mballoc,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
 @@ -649,6 +651,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
@@ -328,7 +329,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.12.6.orig/fs/ext3/mballoc.c      2005-12-09 13:08:53.191437750 +0300
 +++ linux-2.6.12.6/fs/ext3/mballoc.c   2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2429 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -893,10 +894,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1658,8 +1661,6 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1938,10 +1939,6 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2762,18 +2759,16 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.12.6/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.12.6.orig/fs/ext3/Makefile       2005-12-17 02:17:16.000000000 +0300
 +++ linux-2.6.12.6/fs/ext3/Makefile    2005-12-17 02:21:21.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
--         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+         mballoc.o
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
index 8fdd6cf..d12c678 100644 (file)
@@ -18,9 +18,9 @@ Index: linux-2.6.9-full/include/linux/ext3_fs.h
   * Special inodes numbers
   */
 @@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x100000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x200000/* Extents debug */
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
 +#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
@@ -123,16 +123,17 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -596,6 +597,7 @@ enum {
+@@ -596,7 +597,7 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_extents, Opt_extdebug,
-+      Opt_mballoc,
+-      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_extdebug, Opt_mballoc,
  };
  
  static match_table_t tokens = {
 @@ -647,6 +649,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv,  "iopen_nopriv"},
+       {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
@@ -347,7 +348,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2005-12-16 17:46:19.148560250 +0300
 +++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2429 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -912,10 +913,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; bh && i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -1677,8 +1680,6 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1957,10 +1958,6 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              return -EIO;
 +      size = sizeof(struct ext3_mb_history) * sbi->s_mb_history_max;
 +      s->history = kmalloc(size, GFP_KERNEL);
-+      if (s == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
 +
 +      spin_lock(&sbi->s_mb_history_lock);
 +      memcpy(s->history, sbi->s_mb_history, size);
@@ -2781,18 +2778,16 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-+
 Index: linux-2.6.9-full/fs/ext3/Makefile
 ===================================================================
 --- linux-2.6.9-full.orig/fs/ext3/Makefile     2005-12-16 23:16:41.000000000 +0300
 +++ linux-2.6.9-full/fs/ext3/Makefile  2005-12-16 23:16:42.000000000 +0300
-@@ -5,7 +5,8 @@
- obj-$(CONFIG_EXT3_FS) += ext3.o
+@@ -6,7 +6,7 @@
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
--         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
-+         ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+         mballoc.o
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.6.12.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.6.12.patch
deleted file mode 100644 (file)
index 6c3ebe1..0000000
+++ /dev/null
@@ -1,161 +0,0 @@
-Index: linux-2.6.7/fs/ext3/namei.c
-===================================================================
---- linux-2.6.7.orig/fs/ext3/namei.c   2004-06-15 23:19:36.000000000 -0600
-+++ linux-2.6.7/fs/ext3/namei.c        2004-08-20 17:48:54.000000000 -0600
-@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t *
- static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
- {
-       inode->i_nlink++;
-+      if (is_dx(inode) && inode->i_nlink > 1) {
-+              /* limit is 16-bit i_links_count */
-+              if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2)
-+                      inode->i_nlink = 1;
-+        }
- }
- static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
- {
--      inode->i_nlink--;
-+      if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
-+              inode->i_nlink--;
- }
- static int ext3_add_nondir(handle_t *handle,
-@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir
-       struct ext3_dir_entry_2 * de;
-       int err;
--      if (dir->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(dir))
-               return -EMLINK;
-       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir
-       inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-       dir_block = ext3_bread (handle, inode, 0, 1, &err);
-       if (!dir_block) {
--              inode->i_nlink--; /* is this nlink == 0? */
-+              ext3_dec_count(handle, inode); /* is this nlink == 0? */
-               ext3_mark_inode_dirty(handle, inode);
-               iput (inode);
-               goto out_stop;
-@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir
-               iput (inode);
-               goto out_stop;
-       }
--      dir->i_nlink++;
-+      ext3_inc_count(handle, dir);
-       ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-       d_instantiate(dentry, inode);
-@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di
-       retval = ext3_delete_entry(handle, dir, de, bh);
-       if (retval)
-               goto end_rmdir;
--      if (inode->i_nlink != 2)
--              ext3_warning (inode->i_sb, "ext3_rmdir",
--                            "empty directory has nlink!=2 (%d)",
--                            inode->i_nlink);
-+      if (!EXT3_DIR_LINK_EMPTY(inode))
-+              ext3_warning(inode->i_sb, "ext3_rmdir",
-+                           "empty directory has too many links (%d)",
-+                           inode->i_nlink);
-       inode->i_version++;
-       inode->i_nlink = 0;
-       /* There's no need to set i_disksize: the fact that i_nlink is
-@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di
-       ext3_orphan_add(handle, inode);
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-       ext3_mark_inode_dirty(handle, inode);
--      dir->i_nlink--;
-+      ext3_dec_count(handle, dir);
-       ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
-@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di
-       dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-       ext3_update_dx_flag(dir);
-       ext3_mark_inode_dirty(handle, dir);
--      inode->i_nlink--;
-+      ext3_dec_count(handle, inode);
-       if (!inode->i_nlink)
-               ext3_orphan_add(handle, inode);
-       inode->i_ctime = dir->i_ctime;
-@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol
-       struct inode *inode = old_dentry->d_inode;
-       int err;
--      if (inode->i_nlink >= EXT3_LINK_MAX)
-+      if (EXT3_DIR_LINK_MAXED(inode))
-               return -EMLINK;
-       handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
-@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o
-               if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
-                       goto end_rename;
-               retval = -EMLINK;
--              if (!new_inode && new_dir!=old_dir &&
--                              new_dir->i_nlink >= EXT3_LINK_MAX)
-+              if (!new_inode && new_dir != old_dir &&
-+                  EXT3_DIR_LINK_MAXED(new_dir))
-                       goto end_rename;
-       }
-       if (!new_bh) {
-@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o
-       }
-       if (new_inode) {
--              new_inode->i_nlink--;
-+              ext3_dec_count(handle, new_inode);
-               new_inode->i_ctime = CURRENT_TIME_SEC;
-       }
-       old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
-@@ -2299,11 +2304,13 @@ static int ext3_rename (struct inode * o
-               PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
-               BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
-               ext3_journal_dirty_metadata(handle, dir_bh);
--              old_dir->i_nlink--;
-+              ext3_dec_count(handle, old_dir);
-               if (new_inode) {
--                      new_inode->i_nlink--;
-+                      /* checked empty_dir above, can't have another parent,
-+                       * ext3_dec_count() won't work for many-linked dirs */
-+                      new_inode->i_nlink = 0;
-               } else {
--                      new_dir->i_nlink++;
-+                      ext3_inc_count(handle, new_dir);
-                       ext3_update_dx_flag(new_dir);
-                       ext3_mark_inode_dirty(handle, new_dir);
-               }
---- linux-2.6.7.orig/include/linux/ext3_fs.h   2004-06-15 23:19:36.000000000 -0600
-+++ linux-2.6.7/include/linux/ext3_fs.h        2004-08-20 17:41:27.000000000 -0600
-@@ -79,7 +81,7 @@
- /*
-  * Maximal count of links to a file
-  */
--#define EXT3_LINK_MAX         32000
-+#define EXT3_LINK_MAX         65000
- /*
-  * Macro-instructions used to manage several block sizes
-@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
-  */
- #ifdef CONFIG_EXT3_INDEX
--  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
--                                            EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-+                                          EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-                     (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
--#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
--#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
-+#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
-+                                (is_dx(dir) && (dir)->i_nlink == 1))
- #else
-   #define is_dx(dir) 0
--#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-+#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
- #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
- #endif
diff --git a/lustre/kernel_patches/patches/ext3-no-write-super.patch b/lustre/kernel_patches/patches/ext3-no-write-super.patch
deleted file mode 100644 (file)
index d2dcdae..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
- 0 files changed
-
---- linux-2.4.20/fs/ext3/super.c~ext3-no-write-super   2003-08-11 13:20:17.000000000 +0400
-+++ linux-2.4.20-alexey/fs/ext3/super.c        2003-08-11 13:31:35.000000000 +0400
-@@ -1849,7 +1849,6 @@ void ext3_write_super (struct super_bloc
-       if (down_trylock(&sb->s_lock) == 0)
-               BUG();          /* aviro detector */
-       sb->s_dirt = 0;
--      target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-       /*
-        * Tricky --- if we are unmounting, the write really does need
-@@ -1857,6 +1856,7 @@ void ext3_write_super (struct super_bloc
-        * sb->s_root.
-        */
-       if (do_sync_supers || !sb->s_root) {
-+              target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-               unlock_super(sb);
-               log_wait_commit(EXT3_SB(sb)->s_journal, target);
-               lock_super(sb);
-
-_
diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch b/lustre/kernel_patches/patches/ext3-orphan_lock-2.4.19-suse.patch
deleted file mode 100644 (file)
index 4c16fe6..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-Index: linux-2.4.19/fs/ext3/namei.c
-===================================================================
---- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:36:03.000000000 -0400
-+++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:37:37.000000000 -0400
-@@ -1751,8 +1751,8 @@
-       struct super_block *sb = inode->i_sb;
-       struct ext3_iloc iloc;
-       int err = 0, rc;
--      
--      lock_super(sb);
-+
-+      down(&EXT3_SB(sb)->s_orphan_lock);
-       if (!list_empty(&EXT3_I(inode)->i_orphan))
-               goto out_unlock;
-@@ -1800,7 +1800,7 @@
-       jbd_debug(4, "orphan inode %ld will point to %d\n",
-                       inode->i_ino, NEXT_ORPHAN(inode));
- out_unlock:
--      unlock_super(sb);
-+      up(&EXT3_SB(sb)->s_orphan_lock);
-       ext3_std_error(inode->i_sb, err);
-       return err;
- }
-@@ -1813,20 +1813,19 @@
- {
-       struct list_head *prev;
-       struct ext3_inode_info *ei = EXT3_I(inode);
--      struct ext3_sb_info *sbi;
-+      struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
-       unsigned long ino_next;
-       struct ext3_iloc iloc;
-       int err = 0;
--      lock_super(inode->i_sb);
-+      down(&sbi->s_orphan_lock);
-       if (list_empty(&ei->i_orphan)) {
--              unlock_super(inode->i_sb);
-+              up(&sbi->s_orphan_lock);
-               return 0;
-       }
-       ino_next = NEXT_ORPHAN(inode);
-       prev = ei->i_orphan.prev;
--      sbi = EXT3_SB(inode->i_sb);
-       jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
-@@ -1872,10 +1871,10 @@
-       if (err)
-               goto out_brelse;
--out_err: 
-+out_err:
-       ext3_std_error(inode->i_sb, err);
- out:
--      unlock_super(inode->i_sb);
-+      up(&sbi->s_orphan_lock);
-       return err;
- out_brelse:
-Index: linux-2.4.19/fs/ext3/super.c
-===================================================================
---- linux-2.4.19.orig/fs/ext3/super.c  2004-04-23 22:30:41.000000000 -0400
-+++ linux-2.4.19/fs/ext3/super.c       2004-04-23 22:36:22.000000000 -0400
-@@ -1179,6 +1179,7 @@
-        */
-       sb->s_op = &ext3_sops;
-       INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
-+      sema_init(&sbi->s_orphan_lock, 1);
-       sb->s_root = 0;
-Index: linux-2.4.19/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.19.orig/include/linux/ext3_fs_sb.h       2004-04-23 18:26:27.000000000 -0400
-+++ linux-2.4.19/include/linux/ext3_fs_sb.h    2004-04-23 22:36:22.000000000 -0400
-@@ -69,6 +69,7 @@
-       struct inode * s_journal_inode;
-       struct journal_s * s_journal;
-       struct list_head s_orphan;
-+      struct semaphore s_orphan_lock;
-       struct block_device *journal_bdev;
- #ifdef CONFIG_JBD_DEBUG
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
diff --git a/lustre/kernel_patches/patches/ext3-unmount_sync.patch b/lustre/kernel_patches/patches/ext3-unmount_sync.patch
deleted file mode 100644 (file)
index c57903c..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
- fs/ext3/super.c |    7 ++++++-
- 1 files changed, 6 insertions(+), 1 deletion(-)
-
---- linux-2.4.20/fs/ext3/super.c~ext3-unmount_sync     2003-04-08 23:35:44.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/super.c 2003-04-08 23:35:44.000000000 -0600
-@@ -1612,7 +1612,12 @@ void ext3_write_super (struct super_bloc
-       sb->s_dirt = 0;
-       target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
--      if (do_sync_supers) {
-+      /*
-+       * Tricky --- if we are unmounting, the write really does need
-+       * to be synchronous.  We can detect that by looking for NULL in
-+       * sb->s_root.
-+       */
-+      if (do_sync_supers || !sb->s_root) {
-               unlock_super(sb);
-               log_wait_commit(EXT3_SB(sb)->s_journal, target);
-               lock_super(sb);
-
-_
diff --git a/lustre/kernel_patches/patches/ext3-use-after-free-2.4.19-pre1.patch b/lustre/kernel_patches/patches/ext3-use-after-free-2.4.19-pre1.patch
deleted file mode 100644 (file)
index 595db54..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
- ./fs/ext3/namei.c |   11 +++++------
- 1 files changed, 5 insertions(+), 6 deletions(-)
-
-Index: linux-2.4.19-pre1/./fs/ext3/namei.c
-===================================================================
---- linux-2.4.19-pre1.orig/./fs/ext3/namei.c   2003-11-21 01:52:06.000000000 +0300
-+++ linux-2.4.19-pre1/./fs/ext3/namei.c        2003-11-21 01:58:15.000000000 +0300
-@@ -1522,8 +1522,11 @@
- {
-       int err = ext3_add_entry(handle, dentry, inode);
-       if (!err) {
--              d_instantiate(dentry, inode);
--              return 0;
-+              err = ext3_mark_inode_dirty(handle, inode);
-+              if (err == 0) {
-+                      d_instantiate(dentry, inode);
-+                      return 0;
-+              }
-       }
-       ext3_dec_count(handle, inode);
-       iput(inode);
-@@ -1559,7 +1562,6 @@
-               inode->i_op = &ext3_file_inode_operations;
-               inode->i_fop = &ext3_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
--              ext3_mark_inode_dirty(handle, inode);
-               err = ext3_add_nondir(handle, dentry, inode);
-       }
-       ext3_journal_stop(handle, dir);
-@@ -1586,7 +1588,6 @@
-       err = PTR_ERR(inode);
-       if (!IS_ERR(inode)) {
-               init_special_inode(inode, mode, rdev);
--              ext3_mark_inode_dirty(handle, inode);
-               err = ext3_add_nondir(handle, dentry, inode);
-       }
-       ext3_journal_stop(handle, dir);
-@@ -2035,7 +2036,6 @@
-               inode->i_size = l-1;
-       }
-       inode->u.ext3_i.i_disksize = inode->i_size;
--      ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_nondir(handle, dentry, inode);
- out_stop:
-       ext3_journal_stop(handle, dir);
-@@ -2069,7 +2069,6 @@
-       ext3_inc_count(handle, inode);
-       atomic_inc(&inode->i_count);
--      ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_nondir(handle, dentry, inode);
-       ext3_journal_stop(handle, dir);
-       return err;
diff --git a/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch b/lustre/kernel_patches/patches/ext3-use-after-free-suse.patch
deleted file mode 100644 (file)
index 7899354..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
- ./fs/ext3/namei.c |   11 +++++------
- 1 files changed, 5 insertions(+), 6 deletions(-)
-
-Index: linux-2.4.19/fs/ext3/namei.c
-===================================================================
---- linux-2.4.19.orig/fs/ext3/namei.c  2004-04-23 22:30:41.000000000 -0400
-+++ linux-2.4.19/fs/ext3/namei.c       2004-04-23 22:36:03.000000000 -0400
-@@ -1522,8 +1522,11 @@
- {
-       int err = ext3_add_entry(handle, dentry, inode);
-       if (!err) {
--              d_instantiate(dentry, inode);
--              return 0;
-+              err = ext3_mark_inode_dirty(handle, inode);
-+              if (err == 0) {
-+                      d_instantiate(dentry, inode);
-+                      return 0;
-+              }
-       }
-       ext3_dec_count(handle, inode);
-       iput(inode);
-@@ -1559,7 +1562,6 @@
-               inode->i_op = &ext3_file_inode_operations;
-               inode->i_fop = &ext3_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
--              ext3_mark_inode_dirty(handle, inode);
-               err = ext3_add_nondir(handle, dentry, inode);
-       }
-       ext3_journal_stop(handle, dir);
-@@ -1589,7 +1591,6 @@
- #ifdef CONFIG_EXT3_FS_XATTR
-               inode->i_op = &ext3_special_inode_operations;
- #endif
--              ext3_mark_inode_dirty(handle, inode);
-               err = ext3_add_nondir(handle, dentry, inode);
-       }
-       ext3_journal_stop(handle, dir);
-@@ -2039,7 +2040,6 @@
-               inode->i_size = l-1;
-       }
-       EXT3_I(inode)->i_disksize = inode->i_size;
--      ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_nondir(handle, dentry, inode);
- out_stop:
-       ext3_journal_stop(handle, dir);
-@@ -2073,7 +2073,6 @@
-       ext3_inc_count(handle, inode);
-       atomic_inc(&inode->i_count);
--      ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_nondir(handle, dentry, inode);
-       ext3_journal_stop(handle, dir);
-       return err;
diff --git a/lustre/kernel_patches/patches/extN-wantedi-2.4.19-suse.patch b/lustre/kernel_patches/patches/extN-wantedi-2.4.19-suse.patch
deleted file mode 100644 (file)
index 02cfef1..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
- fs/ext3/ialloc.c        |   40 ++++++++++++++++++++++++++++++++++++++--
- fs/ext3/inode.c         |    2 +-
- fs/ext3/ioctl.c         |   25 +++++++++++++++++++++++++
- fs/ext3/namei.c         |   21 +++++++++++++++++----
- include/linux/dcache.h  |    5 +++++
- include/linux/ext3_fs.h |    5 ++++-
- 6 files changed, 90 insertions(+), 8 deletions(-)
-
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:18:04 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:23:20 2003
-@@ -1534,6 +1534,19 @@
-       return err;
- }
-+static struct inode * ext3_new_inode_wantedi(handle_t *handle, struct inode *dir,
-+                                              int mode, struct dentry *dentry)
-+{
-+      unsigned long inum = 0;
-+
-+      if (dentry->d_fsdata != NULL) {
-+              struct dentry_params *param =
-+                      (struct dentry_params *) dentry->d_fsdata;
-+              inum = param->p_inum;
-+      }
-+      return ext3_new_inode(handle, dir, mode, inum);
-+}
-+
- /*
-  * By the time this is called, we already have created
-  * the directory cache entry for the new file, but it
-@@ -1557,7 +1570,7 @@
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
--      inode = ext3_new_inode (handle, dir, mode);
-+      inode = ext3_new_inode_wantedi (handle, dir, mode, dentry);
-       err = PTR_ERR(inode);
-       if (!IS_ERR(inode)) {
-               inode->i_op = &ext3_file_inode_operations;
-@@ -1585,7 +1598,7 @@
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
--      inode = ext3_new_inode (handle, dir, mode);
-+      inode = ext3_new_inode_wantedi (handle, dir, mode, dentry);
-       err = PTR_ERR(inode);
-       if (!IS_ERR(inode)) {
-               init_special_inode(inode, inode->i_mode, rdev);
-@@ -1618,7 +1631,7 @@
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
--      inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
-+      inode = ext3_new_inode_wantedi (handle, dir, S_IFDIR | mode, dentry);
-       err = PTR_ERR(inode);
-       if (IS_ERR(inode))
-               goto out_stop;
-@@ -2013,7 +2026,7 @@
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
--      inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
-+      inode = ext3_new_inode_wantedi (handle, dir, S_IFLNK|S_IRWXUGO, dentry);
-       err = PTR_ERR(inode);
-       if (IS_ERR(inode))
-               goto out_stop;
-Index: linux-2.4.19.SuSE/fs/ext3/ialloc.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ialloc.c    Sun Nov 16 01:20:17 2003
-+++ linux-2.4.19.SuSE/fs/ext3/ialloc.c Sun Nov 16 01:24:49 2003
-@@ -330,7 +330,8 @@
-  * For other inodes, search forward from the parent directory's block
-  * group to find a free inode.
-  */
--struct inode * ext3_new_inode (handle_t *handle, struct inode * dir, int mode)
-+struct inode * ext3_new_inode(handle_t *handle, const struct inode * dir,
-+                            int mode, unsigned long goal)
- {
-       struct super_block * sb;
-       struct buffer_head * bh;
-@@ -355,7 +356,41 @@
-       init_rwsem(&inode->u.ext3_i.truncate_sem);
-       lock_super (sb);
--      es = sb->u.ext3_sb.s_es;
-+      es = EXT3_SB(sb)->s_es;
-+
-+      if (goal) {
-+              i = (goal - 1) / EXT3_INODES_PER_GROUP(sb);
-+              j = (goal - 1) % EXT3_INODES_PER_GROUP(sb);
-+              gdp = ext3_get_group_desc(sb, i, &bh2);
-+
-+              bitmap_nr = load_inode_bitmap (sb, i);
-+              if (bitmap_nr < 0) {
-+                      err = bitmap_nr;
-+                      goto fail;
-+              }
-+
-+              bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
-+
-+              BUFFER_TRACE(bh, "get_write_access");
-+              err = ext3_journal_get_write_access(handle, bh);
-+              if (err) goto fail;
-+
-+              if (ext3_set_bit(j, bh->b_data)) {
-+                      printk(KERN_ERR "goal inode %lu unavailable\n", goal);
-+                      /* Oh well, we tried. */
-+                      goto repeat;
-+              }
-+
-+              BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-+              err = ext3_journal_dirty_metadata(handle, bh);
-+              if (err) goto fail;
-+
-+              /* We've shortcircuited the allocation system successfully,
-+               * now finish filling in the inode.
-+               */
-+              goto have_bit_and_group;
-+      }
-+
- repeat:
-       gdp = NULL;
-       i = 0;
-@@ -470,6 +505,7 @@
-               }
-               goto repeat;
-       }
-+ have_bit_and_group:
-       j += i * EXT3_INODES_PER_GROUP(sb) + 1;
-       if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
-               ext3_error (sb, "ext3_new_inode",
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     Sun Nov 16 01:20:17 2003
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  Sun Nov 16 01:23:20 2003
-@@ -2168,7 +2168,7 @@
-       if (IS_ERR(handle))
-               goto out_truncate;
--      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
-+      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
-       if (IS_ERR(new_inode)) {
-               ext3_debug("truncate inode %lu directly (no new inodes)\n",
-                          old_inode->i_ino);
-Index: linux-2.4.19.SuSE/fs/ext3/ioctl.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ioctl.c     Fri Nov  9 14:25:04 2001
-+++ linux-2.4.19.SuSE/fs/ext3/ioctl.c  Sun Nov 16 01:23:20 2003
-@@ -23,6 +23,31 @@
-       ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
-       switch (cmd) {
-+      case EXT3_IOC_CREATE_INUM: {
-+              char name[32];
-+              struct dentry *dchild, *dparent;
-+              int rc = 0;
-+
-+              dparent = list_entry(inode->i_dentry.next, struct dentry,
-+                                   d_alias);
-+              snprintf(name, sizeof name, "%lu", arg);
-+              dchild = lookup_one_len(name, dparent, strlen(name));
-+              if (dchild->d_inode) {
-+                      printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n",
-+                             dparent->d_name.len, dparent->d_name.name, arg,
-+                             dchild->d_inode->i_ino);
-+                      rc = -EEXIST;
-+              } else {
-+                      dchild->d_fsdata = (void *)arg;
-+                      rc = vfs_create(inode, dchild, 0644);
-+                      if (rc)
-+                              printk(KERN_ERR "vfs_create: %d\n", rc);
-+                      else if (dchild->d_inode->i_ino != arg)
-+                              rc = -EEXIST;
-+              }
-+              dput(dchild);
-+              return rc;
-+      }
-       case EXT3_IOC_GETFLAGS:
-               flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE;
-               return put_user(flags, (int *) arg);
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     Sun Nov 16 01:20:17 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  Sun Nov 16 01:25:42 2003
-@@ -202,6 +202,7 @@
- #define       EXT3_IOC_SETFLAGS               _IOW('f', 2, long)
- #define       EXT3_IOC_GETVERSION             _IOR('f', 3, long)
- #define       EXT3_IOC_SETVERSION             _IOW('f', 4, long)
-+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
- #define       EXT3_IOC_GETVERSION_OLD         _IOR('v', 1, long)
- #define       EXT3_IOC_SETVERSION_OLD         _IOW('v', 2, long)
- #ifdef CONFIG_JBD_DEBUG
-@@ -674,7 +675,8 @@
-                         dx_hash_info *hinfo);
- /* ialloc.c */
--extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
-+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int,
-+                                    unsigned long);
- extern void ext3_free_inode (handle_t *, struct inode *);
- extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
- extern unsigned long ext3_count_free_inodes (struct super_block *);
-@@ -765,4 +767,5 @@
- #endif        /* __KERNEL__ */
-+#define EXT3_IOC_CREATE_INUM                  _IOW('f', 5, long)
- #endif        /* _LINUX_EXT3_FS_H */
-Index: linux-2.4.19.SuSE/include/linux/dcache.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/dcache.h      Sat Nov 15 17:35:46 2003
-+++ linux-2.4.19.SuSE/include/linux/dcache.h   Sun Nov 16 01:23:20 2003
-@@ -62,6 +62,11 @@
- #define IS_ROOT(x) ((x) == (x)->d_parent)
-+struct dentry_params {
-+      unsigned long   p_inum;
-+      void            *p_ptr;
-+};
-+
- /*
-  * "quick string" -- eases parameter passing, but more importantly
-  * saves "metadata" about the string (ie length and the hash).
diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.19-bgl.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 85bdf9e..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-
-
-
- fs/inode.c         |   21 ++++++++++++++-------
- fs/smbfs/inode.c   |    2 +-
- fs/super.c         |    4 ++--
- include/linux/fs.h |    2 +-
- 4 files changed, 18 insertions(+), 11 deletions(-)
-
-Index: linux.mcp2/fs/inode.c
-===================================================================
---- linux.mcp2.orig/fs/inode.c 2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/inode.c      2004-05-05 14:31:31.000000000 -0700
-@@ -553,7 +553,8 @@
- /*
-  * Invalidate all inodes for a device.
-  */
--static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
-+static int invalidate_list(struct list_head *head, struct super_block * sb,
-+                         struct list_head * dispose, int show)
- {
-       struct list_head *next;
-       int busy = 0, count = 0;
-@@ -578,6 +579,11 @@
-                       count++;
-                       continue;
-               }
-+              if (show)
-+                      printk(KERN_ERR
-+                             "inode busy: dev %s:%lu (%p) mode %o count %u\n",
-+                             kdevname(sb->s_dev), inode->i_ino, inode,
-+                             inode->i_mode, atomic_read(&inode->i_count));
-               busy = 1;
-       }
-       /* only unused inodes may be cached with i_count zero */
-@@ -596,22 +602,23 @@
- /**
-  *    invalidate_inodes       - discard the inodes on a device
-  *    @sb: superblock
-+ *    @show: whether we should display any busy inodes found
-  *
-  *    Discard all of the inodes for a given superblock. If the discard
-  *    fails because there are busy inodes then a non zero value is returned.
-  *    If the discard is successful all the inodes have been discarded.
-  */
-  
--int invalidate_inodes(struct super_block * sb)
-+int invalidate_inodes(struct super_block * sb, int show)
- {
-       int busy;
-       LIST_HEAD(throw_away);
-       spin_lock(&inode_lock);
--      busy = invalidate_list(&inode_in_use, sb, &throw_away);
--      busy |= invalidate_list(&inode_unused, sb, &throw_away);
--      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
--      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
-+      busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
-+      busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
-+      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
-+      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
-       spin_unlock(&inode_lock);
-       dispose_list(&throw_away);
-@@ -637,7 +644,7 @@
-                * hold).
-                */
-               shrink_dcache_sb(sb);
--              res = invalidate_inodes(sb);
-+              res = invalidate_inodes(sb, 0);
-               drop_super(sb);
-       }
-       invalidate_buffers(dev);
-Index: linux.mcp2/fs/super.c
-===================================================================
---- linux.mcp2.orig/fs/super.c 2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/super.c      2004-05-05 14:32:06.000000000 -0700
-@@ -838,7 +838,7 @@
-       lock_super(sb);
-       lock_kernel();
-       sb->s_flags &= ~MS_ACTIVE;
--      invalidate_inodes(sb);  /* bad name - it should be evict_inodes() */
-+      invalidate_inodes(sb, 0);  /* bad name - it should be evict_inodes() */
-       if (sop) {
-               if (sop->write_super && sb->s_dirt)
-                       sop->write_super(sb);
-@@ -847,7 +847,7 @@
-       }
-       /* Forget any remaining inodes */
--      if (invalidate_inodes(sb)) {
-+      if (invalidate_inodes(sb, 1)) {
-               printk(KERN_ERR "VFS: Busy inodes after unmount. "
-                       "Self-destruct in 5 seconds.  Have a nice day...\n");
-       }
-Index: linux.mcp2/fs/smbfs/inode.c
-===================================================================
---- linux.mcp2.orig/fs/smbfs/inode.c   2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/smbfs/inode.c        2004-05-05 14:31:31.000000000 -0700
-@@ -166,7 +166,7 @@
- {
-       VERBOSE("\n");
-       shrink_dcache_sb(SB_of(server));
--      invalidate_inodes(SB_of(server));
-+      invalidate_inodes(SB_of(server), 0);
- }
- /*
-Index: linux.mcp2/include/linux/fs.h
-===================================================================
---- linux.mcp2.orig/include/linux/fs.h 2004-05-05 14:31:06.000000000 -0700
-+++ linux.mcp2/include/linux/fs.h      2004-05-05 14:31:31.000000000 -0700
-@@ -1283,7 +1283,7 @@
- extern void set_buffer_flushtime(struct buffer_head *);
- extern void balance_dirty(void);
- extern int check_disk_change(kdev_t);
--extern int invalidate_inodes(struct super_block *);
-+extern int invalidate_inodes(struct super_block *, int);
- extern int invalidate_device(kdev_t, int);
- extern void invalidate_inode_pages(struct inode *);
- extern void invalidate_inode_pages2(struct address_space *);
diff --git a/lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-bgl.patch b/lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 2466af6..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
- fs/Makefile     |    2 +-
- fs/inode.c      |    4 +++-
- mm/page_alloc.c |    1 +
- 3 files changed, 5 insertions(+), 2 deletions(-)
-
-Index: linux-ion/fs/inode.c
-===================================================================
---- linux-ion.orig/fs/inode.c  2004-09-27 14:58:03.000000000 -0700
-+++ linux-ion/fs/inode.c       2004-09-27 14:58:34.000000000 -0700
-@@ -5,6 +5,7 @@
-  */
- #include <linux/config.h>
-+#include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/string.h>
- #include <linux/mm.h>
-@@ -66,7 +67,8 @@
-  * NOTE! You also have to own the lock if you change
-  * the i_state of an inode while it is in use..
-  */
--static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
-+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
-+EXPORT_SYMBOL(inode_lock);
- /*
-  * Statistics gathering..
-Index: linux-ion/fs/Makefile
-===================================================================
---- linux-ion.orig/fs/Makefile 2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/fs/Makefile      2004-09-27 14:59:37.000000000 -0700
-@@ -7,7 +7,7 @@
- O_TARGET := fs.o
--export-objs :=        filesystems.o open.o dcache.o buffer.o
-+export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
- mod-subdirs :=        nls
- obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
-Index: linux-ion/mm/page_alloc.c
-===================================================================
---- linux-ion.orig/mm/page_alloc.c     2004-07-28 14:34:57.000000000 -0700
-+++ linux-ion/mm/page_alloc.c  2004-09-27 14:58:34.000000000 -0700
-@@ -28,6 +28,7 @@
- LIST_HEAD(inactive_list);
- LIST_HEAD(active_list);
- pg_data_t *pgdat_list;
-+EXPORT_SYMBOL(pgdat_list);
- /* Used to look up the address of the struct zone encoded in page->zone */
- zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
diff --git a/lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-suse.patch b/lustre/kernel_patches/patches/iod-stock-24-exports-2.4.19-suse.patch
deleted file mode 100644 (file)
index 2040fcd..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
- fs/Makefile     |    2 +-
- fs/inode.c      |    4 +++-
- mm/page_alloc.c |    1 +
- 3 files changed, 5 insertions(+), 2 deletions(-)
-
-Index: linux-2.4.19.SuSE/fs/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/inode.c  Sat Nov 15 18:02:13 2003
-+++ linux-2.4.19.SuSE/fs/inode.c       Sat Nov 15 18:03:04 2003
-@@ -5,6 +5,7 @@
-  */
- #include <linux/config.h>
-+#include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/string.h>
- #include <linux/mm.h>
-@@ -67,7 +68,8 @@
-  * NOTE! You also have to own the lock if you change
-  * the i_state of an inode while it is in use..
-  */
--static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
-+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
-+EXPORT_SYMBOL(inode_lock);
- /*
-  * Statistics gathering..
-Index: linux-2.4.19.SuSE/fs/Makefile
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/Makefile Mon Jan 27 05:08:56 2003
-+++ linux-2.4.19.SuSE/fs/Makefile      Sat Nov 15 18:03:54 2003
-@@ -7,7 +7,7 @@
- O_TARGET := fs.o
--export-objs :=        filesystems.o open.o dcache.o buffer.o
-+export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
- mod-subdirs :=        nls
- obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
-Index: linux-2.4.19.SuSE/mm/page_alloc.c
-===================================================================
---- linux-2.4.19.SuSE.orig/mm/page_alloc.c     Mon Jan 27 05:08:55 2003
-+++ linux-2.4.19.SuSE/mm/page_alloc.c  Sat Nov 15 18:03:04 2003
-@@ -32,6 +32,7 @@
- LIST_HEAD(inactive_list);
- LIST_HEAD(active_list);
- pg_data_t *pgdat_list;
-+EXPORT_SYMBOL(pgdat_list);
- /* Used to look up the address of the struct zone encoded in page->zone */
- zone_t *zone_table[MAX_NR_ZONES*MAX_NR_NODES];
diff --git a/lustre/kernel_patches/patches/iopen-2.4.19-bgl.patch b/lustre/kernel_patches/patches/iopen-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 8ab05f9..0000000
+++ /dev/null
@@ -1,497 +0,0 @@
- Documentation/filesystems/ext2.txt |   16 ++
- fs/ext3/Makefile                   |    2 
- fs/ext3/inode.c                    |    4 
- fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   13 +
- fs/ext3/namei.c                    |   13 +
- fs/ext3/super.c                    |   11 +
- include/linux/ext3_fs.h            |    2 
- 8 files changed, 318 insertions(+), 2 deletions(-)
-
-Index: linux-2.4.19/Documentation/filesystems/ext2.txt
-===================================================================
---- linux-2.4.19.orig/Documentation/filesystems/ext2.txt       2001-07-11 18:44:45.000000000 -0400
-+++ linux-2.4.19/Documentation/filesystems/ext2.txt    2004-04-23 22:37:48.000000000 -0400
-@@ -35,6 +35,22 @@
- sb=n                          Use alternate superblock at this location.
-+iopen                         Makes an invisible pseudo-directory called
-+                              __iopen__ available in the root directory
-+                              of the filesystem.  Allows open-by-inode-
-+                              number.  i.e., inode 3145 can be accessed
-+                              via /mntpt/__iopen__/3145
-+
-+iopen_nopriv                  This option makes the iopen directory be
-+                              world-readable.  This may be safer since it
-+                              allows daemons to run as an unprivileged user,
-+                              however it significantly changes the security
-+                              model of a Unix filesystem, since previously
-+                              all files under a mode 700 directory were not
-+                              generally avilable even if the
-+                              permissions on the file itself is
-+                              world-readable.
-+
- grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
-Index: linux.mcp2/fs/ext3/Makefile
-===================================================================
---- linux.mcp2.orig/fs/ext3/Makefile   2004-05-17 15:20:52.000000000 -0700
-+++ linux.mcp2/fs/ext3/Makefile        2004-05-17 15:21:55.000000000 -0700
-@@ -11,7 +11,7 @@
- export-objs := ext3-exports.o
--obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
- obj-m    := $(O_TARGET)
-Index: linux.mcp2/fs/ext3/inode.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/inode.c    2004-05-17 15:20:59.000000000 -0700
-+++ linux.mcp2/fs/ext3/inode.c 2004-05-17 15:21:55.000000000 -0700
-@@ -31,6 +31,7 @@
- #include <linux/highuid.h>
- #include <linux/quotaops.h>
- #include <linux/module.h>
-+#include "iopen.h"
- /*
-  * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2125,6 +2126,9 @@
-       struct buffer_head *bh;
-       int block;
-       
-+      if (ext3_iopen_get_inode(inode))
-+              return;
-+
-       if(ext3_get_inode_loc(inode, &iloc))
-               goto bad_inode;
-       bh = iloc.bh;
-Index: linux.mcp2/fs/ext3/iopen.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/iopen.c    2002-04-11 07:25:15.000000000 -0700
-+++ linux.mcp2/fs/ext3/iopen.c 2004-05-17 15:21:55.000000000 -0700
-@@ -0,0 +1,285 @@
-+/*
-+ * linux/fs/ext3/iopen.c
-+ *
-+ * Special support for open by inode number
-+ *
-+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
-+ *
-+ * This file may be redistributed under the terms of the GNU General
-+ * Public License.
-+ *
-+ *
-+ * Invariants:
-+ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
-+ *     for an inode at one time.
-+ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
-+ *     aliases on an inode at the same time.
-+ *
-+ * If we have any connected dentry aliases for an inode, use one of those
-+ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
-+ * dentry for this inode, which thereafter will be found by the dcache
-+ * when looking up this inode number in __iopen__, so we don't return here
-+ * until it is gone.
-+ *
-+ * If we get an inode via a regular name lookup, then we "rename" the
-+ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
-+ * existing users of the disconnected dentry will continue to use the same
-+ * dentry as the connected users, and there will never be both kinds of
-+ * dentry aliases at one time.
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/locks.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/smp_lock.h>
-+#include "iopen.h"
-+
-+#ifndef assert
-+#define assert(test) J_ASSERT(test)
-+#endif
-+
-+#define IOPEN_NAME_LEN        32
-+
-+/*
-+ * This implements looking up an inode by number.
-+ */
-+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
-+{
-+      struct inode *inode;
-+      unsigned long ino;
-+      struct list_head *lp;
-+      struct dentry *alternate;
-+      char buf[IOPEN_NAME_LEN];
-+
-+      if (dentry->d_name.len >= IOPEN_NAME_LEN)
-+              return ERR_PTR(-ENAMETOOLONG);
-+
-+      memcpy(buf, dentry->d_name.name, dentry->d_name.len);
-+      buf[dentry->d_name.len] = 0;
-+
-+      if (strcmp(buf, ".") == 0)
-+              ino = dir->i_ino;
-+      else if (strcmp(buf, "..") == 0)
-+              ino = EXT3_ROOT_INO;
-+      else
-+              ino = simple_strtoul(buf, 0, 0);
-+
-+      if ((ino != EXT3_ROOT_INO &&
-+           //ino != EXT3_ACL_IDX_INO &&
-+           //ino != EXT3_ACL_DATA_INO &&
-+           ino < EXT3_FIRST_INO(dir->i_sb)) ||
-+          ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
-+              return ERR_PTR(-ENOENT);
-+
-+      inode = iget(dir->i_sb, ino);
-+      if (!inode)
-+              return ERR_PTR(-EACCES);
-+      if (is_bad_inode(inode)) {
-+              iput(inode);
-+              return ERR_PTR(-ENOENT);
-+      }
-+
-+      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(list_empty(&dentry->d_hash));            /* d_rehash */
-+
-+      /* preferrably return a connected dentry */
-+      spin_lock(&dcache_lock);
-+      list_for_each(lp, &inode->i_dentry) {
-+              alternate = list_entry(lp, struct dentry, d_alias);
-+              assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
-+      }
-+
-+      if (!list_empty(&inode->i_dentry)) {
-+              alternate = list_entry(inode->i_dentry.next,
-+                                     struct dentry, d_alias);
-+              dget_locked(alternate);
-+              alternate->d_vfs_flags |= DCACHE_REFERENCED;
-+              iput(inode);
-+              spin_unlock(&dcache_lock);
-+              return alternate;
-+      }
-+      dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
-+
-+      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
-+      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
-+      dentry->d_inode = inode;
-+
-+      __d_rehash(dentry, 0);                          /* d_rehash */
-+      spin_unlock(&dcache_lock);
-+
-+      return NULL;
-+}
-+
-+#define do_switch(x,y) do { \
-+      __typeof__ (x) __tmp = x; \
-+      x = y; y = __tmp; } while (0)
-+
-+static inline void switch_names(struct dentry *dentry, struct dentry *target)
-+{
-+      const unsigned char *old_name, *new_name;
-+
-+      memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
-+      old_name = target->d_name.name;
-+      new_name = dentry->d_name.name;
-+      if (old_name == target->d_iname)
-+              old_name = dentry->d_iname;
-+      if (new_name == dentry->d_iname)
-+              new_name = target->d_iname;
-+      target->d_name.name = new_name;
-+      dentry->d_name.name = old_name;
-+}
-+
-+/* This function is spliced into ext3_lookup and does the move of a
-+ * disconnected dentry (if it exists) to a connected dentry.
-+ */
-+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
-+                                  int rehash)
-+{
-+      struct dentry *tmp, *goal = NULL;
-+      struct list_head *lp;
-+
-+      /* verify this dentry is really new */
-+      assert(dentry->d_inode == NULL);
-+      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      if (rehash)
-+              assert(list_empty(&dentry->d_hash));    /* d_rehash */
-+      assert(list_empty(&dentry->d_subdirs));
-+
-+      spin_lock(&dcache_lock);
-+      if (!inode)
-+              goto do_rehash;
-+
-+      if (!test_opt(inode->i_sb, IOPEN))
-+              goto do_instantiate;
-+
-+      /* preferrably return a connected dentry */
-+      list_for_each(lp, &inode->i_dentry) {
-+              tmp = list_entry(lp, struct dentry, d_alias);
-+              if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
-+                      assert(tmp->d_alias.next == &inode->i_dentry);
-+                      assert(tmp->d_alias.prev == &inode->i_dentry);
-+                      goal = tmp;
-+                      dget_locked(goal);
-+                      break;
-+              }
-+      }
-+
-+      if (!goal)
-+              goto do_instantiate;
-+
-+      /* Move the goal to the de hash queue - like d_move() */
-+      goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
-+      list_del_init(&goal->d_hash);
-+
-+      list_del(&goal->d_child);
-+      list_del(&dentry->d_child);
-+
-+      /* Switch the parents and the names.. */
-+      switch_names(goal, dentry);
-+      do_switch(goal->d_parent, dentry->d_parent);
-+      do_switch(goal->d_name.len, dentry->d_name.len);
-+      do_switch(goal->d_name.hash, dentry->d_name.hash);
-+
-+      /* And add them back to the (new) parent lists */
-+      list_add(&goal->d_child, &goal->d_parent->d_subdirs);
-+      list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
-+      __d_rehash(goal, 0);
-+      spin_unlock(&dcache_lock);
-+      iput(inode);
-+
-+      return goal;
-+
-+      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
-+do_instantiate:
-+      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
-+      dentry->d_inode = inode;
-+do_rehash:
-+      if (rehash)
-+              __d_rehash(dentry, 0);                  /* d_rehash */
-+      spin_unlock(&dcache_lock);
-+
-+      return NULL;
-+}
-+
-+/*
-+ * These are the special structures for the iopen pseudo directory.
-+ */
-+
-+static struct inode_operations iopen_inode_operations = {
-+      lookup:         iopen_lookup,           /* BKL held */
-+};
-+
-+static struct file_operations iopen_file_operations = {
-+      read:           generic_read_dir,
-+};
-+
-+static int match_dentry(struct dentry *dentry, const char *name)
-+{
-+      int     len;
-+
-+      len = strlen(name);
-+      if (dentry->d_name.len != len)
-+              return 0;
-+      if (strncmp(dentry->d_name.name, name, len))
-+              return 0;
-+      return 1;
-+}
-+
-+/*
-+ * This function is spliced into ext3_lookup and returns 1 the file
-+ * name is __iopen__ and dentry has been filled in appropriately.
-+ */
-+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
-+{
-+      struct inode *inode;
-+
-+      if (dir->i_ino != EXT3_ROOT_INO ||
-+          !test_opt(dir->i_sb, IOPEN) ||
-+          !match_dentry(dentry, "__iopen__"))
-+              return 0;
-+
-+      inode = iget(dir->i_sb, EXT3_BAD_INO);
-+
-+      if (!inode)
-+              return 0;
-+      d_add(dentry, inode);
-+      return 1;
-+}
-+
-+/*
-+ * This function is spliced into read_inode; it returns 1 if inode
-+ * number is the one for /__iopen__, in which case the inode is filled
-+ * in appropriately.  Otherwise, this fuction returns 0.
-+ */
-+int ext3_iopen_get_inode(struct inode *inode)
-+{
-+      if (inode->i_ino != EXT3_BAD_INO)
-+              return 0;
-+
-+      inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
-+      if (test_opt(inode->i_sb, IOPEN_NOPRIV))
-+              inode->i_mode |= 0777;
-+      inode->i_uid = 0;
-+      inode->i_gid = 0;
-+      inode->i_nlink = 1;
-+      inode->i_size = 4096;
-+      inode->i_atime = CURRENT_TIME;
-+      inode->i_ctime = CURRENT_TIME;
-+      inode->i_mtime = CURRENT_TIME;
-+      inode->u.ext3_i.i_dtime = 0;
-+      inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
-+                                       * (for stat), not the fs block
-+                                       * size */
-+      inode->i_blocks = 0;
-+      inode->i_version = 1;
-+      inode->i_generation = 0;
-+
-+      inode->i_op = &iopen_inode_operations;
-+      inode->i_fop = &iopen_file_operations;
-+      inode->i_mapping->a_ops = 0;
-+
-+      return 1;
-+}
-Index: linux.mcp2/fs/ext3/iopen.h
-===================================================================
---- linux.mcp2.orig/fs/ext3/iopen.h    2002-04-11 07:25:15.000000000 -0700
-+++ linux.mcp2/fs/ext3/iopen.h 2004-05-17 15:21:55.000000000 -0700
-@@ -0,0 +1,15 @@
-+/*
-+ * iopen.h
-+ *
-+ * Special support for opening files by inode number.
-+ *
-+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
-+ *
-+ * This file may be redistributed under the terms of the GNU General
-+ * Public License.
-+ */
-+
-+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
-+extern int ext3_iopen_get_inode(struct inode *inode);
-+extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
-+                                         struct inode *inode, int rehash);
-Index: linux.mcp2/fs/ext3/namei.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/namei.c    2004-05-17 15:20:59.000000000 -0700
-+++ linux.mcp2/fs/ext3/namei.c 2004-05-17 15:21:55.000000000 -0700
-@@ -35,7 +35,7 @@
- #include <linux/string.h>
- #include <linux/locks.h>
- #include <linux/quotaops.h>
--
-+#include "iopen.h"
- /*
-  * define how far ahead to read directories while searching them.
-@@ -931,6 +931,9 @@
-       if (dentry->d_name.len > EXT3_NAME_LEN)
-               return ERR_PTR(-ENAMETOOLONG);
-+      if (ext3_check_for_iopen(dir, dentry))
-+              return NULL;
-+
-       bh = ext3_find_entry(dentry, &de);
-       inode = NULL;
-       if (bh) {
-@@ -942,8 +945,8 @@
-                       return ERR_PTR(-EACCES);
-               }
-       }
--      d_add(dentry, inode);
--      return NULL;
-+
-+      return iopen_connect_dentry(dentry, inode, 1);
- }
- #define S_SHIFT 12
-@@ -1932,10 +1935,6 @@
-                             inode->i_nlink);
-       inode->i_version = ++event;
-       inode->i_nlink = 0;
--      /* There's no need to set i_disksize: the fact that i_nlink is
--       * zero will ensure that the right thing happens during any
--       * recovery. */
--      inode->i_size = 0;
-       ext3_orphan_add(handle, inode);
-       ext3_mark_inode_dirty(handle, inode);
-       dir->i_nlink--;
-@@ -2054,6 +2053,23 @@
-       return err;
- }
-+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */
-+static int ext3_add_link(handle_t *handle, struct dentry *dentry,
-+                       struct inode *inode)
-+{
-+      int err = ext3_add_entry(handle, dentry, inode);
-+      if (!err) {
-+              err = ext3_mark_inode_dirty(handle, inode);
-+              if (err == 0) {
-+                      dput(iopen_connect_dentry(dentry, inode, 0));
-+                      return 0;
-+              }
-+      }
-+      ext3_dec_count(handle, inode);
-+      iput(inode);
-+      return err;
-+}
-+
- static int ext3_link (struct dentry * old_dentry,
-               struct inode * dir, struct dentry *dentry)
- {
-@@ -2081,7 +2097,8 @@
-       ext3_inc_count(handle, inode);
-       atomic_inc(&inode->i_count);
--      err = ext3_add_nondir(handle, dentry, inode);
-+      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle, inode);
-       ext3_journal_stop(handle, dir);
-       return err;
- }
-Index: linux.mcp2/fs/ext3/super.c
-===================================================================
---- linux.mcp2.orig/fs/ext3/super.c    2004-05-17 15:20:59.000000000 -0700
-+++ linux.mcp2/fs/ext3/super.c 2004-05-17 15:21:55.000000000 -0700
-@@ -836,6 +836,18 @@
-                        || !strcmp (this_char, "quota")
-                        || !strcmp (this_char, "usrquota"))
-                       /* Don't do anything ;-) */ ;
-+              else if (!strcmp (this_char, "iopen")) {
-+                      set_opt (sbi->s_mount_opt, IOPEN);
-+                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-+              else if (!strcmp (this_char, "noiopen")) {
-+                      clear_opt (sbi->s_mount_opt, IOPEN);
-+                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-+              else if (!strcmp (this_char, "iopen_nopriv")) {
-+                      set_opt (sbi->s_mount_opt, IOPEN);
-+                      set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-               else if (!strcmp (this_char, "journal")) {
-                       /* @@@ FIXME */
-                       /* Eventually we will want to be able to create
-Index: linux.mcp2/include/linux/ext3_fs.h
-===================================================================
---- linux.mcp2.orig/include/linux/ext3_fs.h    2004-05-17 15:20:59.000000000 -0700
-+++ linux.mcp2/include/linux/ext3_fs.h 2004-05-17 15:21:55.000000000 -0700
-@@ -323,6 +323,8 @@
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
- #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
diff --git a/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch b/lustre/kernel_patches/patches/iopen-2.4.19-suse.patch
deleted file mode 100644 (file)
index 3c10d3d..0000000
+++ /dev/null
@@ -1,497 +0,0 @@
- Documentation/filesystems/ext2.txt |   16 ++
- fs/ext3/Makefile                   |    2 
- fs/ext3/inode.c                    |    4 
- fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   13 +
- fs/ext3/namei.c                    |   13 +
- fs/ext3/super.c                    |   11 +
- include/linux/ext3_fs.h            |    2 
- 8 files changed, 318 insertions(+), 2 deletions(-)
-
-Index: linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt
-===================================================================
---- linux-2.4.19.SuSE.orig/Documentation/filesystems/ext2.txt  Wed Jul 11 15:44:45 2001
-+++ linux-2.4.19.SuSE/Documentation/filesystems/ext2.txt       Sun Nov 16 01:27:31 2003
-@@ -35,6 +35,22 @@
- sb=n                          Use alternate superblock at this location.
-+iopen                         Makes an invisible pseudo-directory called
-+                              __iopen__ available in the root directory
-+                              of the filesystem.  Allows open-by-inode-
-+                              number.  i.e., inode 3145 can be accessed
-+                              via /mntpt/__iopen__/3145
-+
-+iopen_nopriv                  This option makes the iopen directory be
-+                              world-readable.  This may be safer since it
-+                              allows daemons to run as an unprivileged user,
-+                              however it significantly changes the security
-+                              model of a Unix filesystem, since previously
-+                              all files under a mode 700 directory were not
-+                              generally avilable even if the
-+                              permissions on the file itself is
-+                              world-readable.
-+
- grpquota,noquota,quota,usrquota       Quota options are silently ignored by ext2.
-Index: linux-2.4.19.SuSE/fs/ext3/Makefile
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/Makefile    Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/fs/ext3/Makefile Sun Nov 16 01:27:31 2003
-@@ -11,7 +11,7 @@
- export-objs := ext3-exports.o
--obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
- obj-m    := $(O_TARGET)
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     Sun Nov 16 01:26:04 2003
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  Sun Nov 16 01:27:31 2003
-@@ -34,6 +34,7 @@
- #include <linux/highuid.h>
- #include <linux/quotaops.h>
- #include <linux/module.h>
-+#include "iopen.h"
- /*
-  * SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2350,6 +2351,9 @@
-       struct buffer_head *bh;
-       int block;
-       
-+      if (ext3_iopen_get_inode(inode))
-+              return;
-+
-       if(ext3_get_inode_loc(inode, &iloc))
-               goto bad_inode;
-       bh = iloc.bh;
-Index: lum/fs/ext3/iopen.c
-===================================================================
---- lum.orig/fs/ext3/iopen.c   2004-03-09 16:46:37.000000000 -0700
-+++ lum/fs/ext3/iopen.c        2004-03-09 16:48:03.000000000 -0700
-@@ -0,0 +1,285 @@
-+/*
-+ * linux/fs/ext3/iopen.c
-+ *
-+ * Special support for open by inode number
-+ *
-+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
-+ *
-+ * This file may be redistributed under the terms of the GNU General
-+ * Public License.
-+ *
-+ *
-+ * Invariants:
-+ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
-+ *     for an inode at one time.
-+ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
-+ *     aliases on an inode at the same time.
-+ *
-+ * If we have any connected dentry aliases for an inode, use one of those
-+ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
-+ * dentry for this inode, which thereafter will be found by the dcache
-+ * when looking up this inode number in __iopen__, so we don't return here
-+ * until it is gone.
-+ *
-+ * If we get an inode via a regular name lookup, then we "rename" the
-+ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
-+ * existing users of the disconnected dentry will continue to use the same
-+ * dentry as the connected users, and there will never be both kinds of
-+ * dentry aliases at one time.
-+ */
-+
-+#include <linux/sched.h>
-+#include <linux/fs.h>
-+#include <linux/locks.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/smp_lock.h>
-+#include "iopen.h"
-+
-+#ifndef assert
-+#define assert(test) J_ASSERT(test)
-+#endif
-+
-+#define IOPEN_NAME_LEN        32
-+
-+/*
-+ * This implements looking up an inode by number.
-+ */
-+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
-+{
-+      struct inode *inode;
-+      unsigned long ino;
-+      struct list_head *lp;
-+      struct dentry *alternate;
-+      char buf[IOPEN_NAME_LEN];
-+
-+      if (dentry->d_name.len >= IOPEN_NAME_LEN)
-+              return ERR_PTR(-ENAMETOOLONG);
-+
-+      memcpy(buf, dentry->d_name.name, dentry->d_name.len);
-+      buf[dentry->d_name.len] = 0;
-+
-+      if (strcmp(buf, ".") == 0)
-+              ino = dir->i_ino;
-+      else if (strcmp(buf, "..") == 0)
-+              ino = EXT3_ROOT_INO;
-+      else
-+              ino = simple_strtoul(buf, 0, 0);
-+
-+      if ((ino != EXT3_ROOT_INO &&
-+           //ino != EXT3_ACL_IDX_INO &&
-+           //ino != EXT3_ACL_DATA_INO &&
-+           ino < EXT3_FIRST_INO(dir->i_sb)) ||
-+          ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
-+              return ERR_PTR(-ENOENT);
-+
-+      inode = iget(dir->i_sb, ino);
-+      if (!inode)
-+              return ERR_PTR(-EACCES);
-+      if (is_bad_inode(inode)) {
-+              iput(inode);
-+              return ERR_PTR(-ENOENT);
-+      }
-+
-+      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(list_empty(&dentry->d_hash));            /* d_rehash */
-+
-+      /* preferrably return a connected dentry */
-+      spin_lock(&dcache_lock);
-+      list_for_each(lp, &inode->i_dentry) {
-+              alternate = list_entry(lp, struct dentry, d_alias);
-+              assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
-+      }
-+
-+      if (!list_empty(&inode->i_dentry)) {
-+              alternate = list_entry(inode->i_dentry.next,
-+                                     struct dentry, d_alias);
-+              dget_locked(alternate);
-+              alternate->d_vfs_flags |= DCACHE_REFERENCED;
-+              iput(inode);
-+              spin_unlock(&dcache_lock);
-+              return alternate;
-+      }
-+      dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
-+
-+      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
-+      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
-+      dentry->d_inode = inode;
-+
-+      __d_rehash(dentry, 0);                          /* d_rehash */
-+      spin_unlock(&dcache_lock);
-+
-+      return NULL;
-+}
-+
-+#define do_switch(x,y) do { \
-+      __typeof__ (x) __tmp = x; \
-+      x = y; y = __tmp; } while (0)
-+
-+static inline void switch_names(struct dentry *dentry, struct dentry *target)
-+{
-+      const unsigned char *old_name, *new_name;
-+
-+      memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
-+      old_name = target->d_name.name;
-+      new_name = dentry->d_name.name;
-+      if (old_name == target->d_iname)
-+              old_name = dentry->d_iname;
-+      if (new_name == dentry->d_iname)
-+              new_name = target->d_iname;
-+      target->d_name.name = new_name;
-+      dentry->d_name.name = old_name;
-+}
-+
-+/* This function is spliced into ext3_lookup and does the move of a
-+ * disconnected dentry (if it exists) to a connected dentry.
-+ */
-+struct dentry *iopen_connect_dentry(struct dentry *dentry, struct inode *inode,
-+                                  int rehash)
-+{
-+      struct dentry *tmp, *goal = NULL;
-+      struct list_head *lp;
-+
-+      /* verify this dentry is really new */
-+      assert(dentry->d_inode == NULL);
-+      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      if (rehash)
-+              assert(list_empty(&dentry->d_hash));    /* d_rehash */
-+      assert(list_empty(&dentry->d_subdirs));
-+
-+      spin_lock(&dcache_lock);
-+      if (!inode)
-+              goto do_rehash;
-+
-+      if (!test_opt(inode->i_sb, IOPEN))
-+              goto do_instantiate;
-+
-+      /* preferrably return a connected dentry */
-+      list_for_each(lp, &inode->i_dentry) {
-+              tmp = list_entry(lp, struct dentry, d_alias);
-+              if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
-+                      assert(tmp->d_alias.next == &inode->i_dentry);
-+                      assert(tmp->d_alias.prev == &inode->i_dentry);
-+                      goal = tmp;
-+                      dget_locked(goal);
-+                      break;
-+              }
-+      }
-+
-+      if (!goal)
-+              goto do_instantiate;
-+
-+      /* Move the goal to the de hash queue - like d_move() */
-+      goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
-+      list_del_init(&goal->d_hash);
-+
-+      list_del(&goal->d_child);
-+      list_del(&dentry->d_child);
-+
-+      /* Switch the parents and the names.. */
-+      switch_names(goal, dentry);
-+      do_switch(goal->d_parent, dentry->d_parent);
-+      do_switch(goal->d_name.len, dentry->d_name.len);
-+      do_switch(goal->d_name.hash, dentry->d_name.hash);
-+
-+      /* And add them back to the (new) parent lists */
-+      list_add(&goal->d_child, &goal->d_parent->d_subdirs);
-+      list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
-+      __d_rehash(goal, 0);
-+      spin_unlock(&dcache_lock);
-+      iput(inode);
-+
-+      return goal;
-+
-+      /* d_add(), but don't drop dcache_lock before adding dentry to inode */
-+do_instantiate:
-+      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
-+      dentry->d_inode = inode;
-+do_rehash:
-+      if (rehash)
-+              __d_rehash(dentry, 0);                  /* d_rehash */
-+      spin_unlock(&dcache_lock);
-+
-+      return NULL;
-+}
-+
-+/*
-+ * These are the special structures for the iopen pseudo directory.
-+ */
-+
-+static struct inode_operations iopen_inode_operations = {
-+      lookup:         iopen_lookup,           /* BKL held */
-+};
-+
-+static struct file_operations iopen_file_operations = {
-+      read:           generic_read_dir,
-+};
-+
-+static int match_dentry(struct dentry *dentry, const char *name)
-+{
-+      int     len;
-+
-+      len = strlen(name);
-+      if (dentry->d_name.len != len)
-+              return 0;
-+      if (strncmp(dentry->d_name.name, name, len))
-+              return 0;
-+      return 1;
-+}
-+
-+/*
-+ * This function is spliced into ext3_lookup and returns 1 the file
-+ * name is __iopen__ and dentry has been filled in appropriately.
-+ */
-+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
-+{
-+      struct inode *inode;
-+
-+      if (dir->i_ino != EXT3_ROOT_INO ||
-+          !test_opt(dir->i_sb, IOPEN) ||
-+          !match_dentry(dentry, "__iopen__"))
-+              return 0;
-+
-+      inode = iget(dir->i_sb, EXT3_BAD_INO);
-+
-+      if (!inode)
-+              return 0;
-+      d_add(dentry, inode);
-+      return 1;
-+}
-+
-+/*
-+ * This function is spliced into read_inode; it returns 1 if inode
-+ * number is the one for /__iopen__, in which case the inode is filled
-+ * in appropriately.  Otherwise, this fuction returns 0.
-+ */
-+int ext3_iopen_get_inode(struct inode *inode)
-+{
-+      if (inode->i_ino != EXT3_BAD_INO)
-+              return 0;
-+
-+      inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
-+      if (test_opt(inode->i_sb, IOPEN_NOPRIV))
-+              inode->i_mode |= 0777;
-+      inode->i_uid = 0;
-+      inode->i_gid = 0;
-+      inode->i_nlink = 1;
-+      inode->i_size = 4096;
-+      inode->i_atime = CURRENT_TIME;
-+      inode->i_ctime = CURRENT_TIME;
-+      inode->i_mtime = CURRENT_TIME;
-+      inode->u.ext3_i.i_dtime = 0;
-+      inode->i_blksize = PAGE_SIZE;   /* This is the optimal IO size
-+                                       * (for stat), not the fs block
-+                                       * size */
-+      inode->i_blocks = 0;
-+      inode->i_version = 1;
-+      inode->i_generation = 0;
-+
-+      inode->i_op = &iopen_inode_operations;
-+      inode->i_fop = &iopen_file_operations;
-+      inode->i_mapping->a_ops = 0;
-+
-+      return 1;
-+}
-Index: lum/fs/ext3/iopen.h
-===================================================================
---- lum.orig/fs/ext3/iopen.h   2004-03-09 16:46:37.000000000 -0700
-+++ lum/fs/ext3/iopen.h        2004-03-09 16:48:03.000000000 -0700
-@@ -0,0 +1,15 @@
-+/*
-+ * iopen.h
-+ *
-+ * Special support for opening files by inode number.
-+ *
-+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
-+ *
-+ * This file may be redistributed under the terms of the GNU General
-+ * Public License.
-+ */
-+
-+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
-+extern int ext3_iopen_get_inode(struct inode *inode);
-+extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
-+                                         struct inode *inode, int rehash);
-Index: linux-2.4.19.SuSE/fs/ext3/namei.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/namei.c     Sun Nov 16 01:23:20 2003
-+++ linux-2.4.19.SuSE/fs/ext3/namei.c  Sun Nov 16 01:27:31 2003
-@@ -36,7 +36,7 @@
- #include <linux/string.h>
- #include <linux/locks.h>
- #include <linux/quotaops.h>
--
-+#include "iopen.h"
- /*
-  * define how far ahead to read directories while searching them.
-@@ -926,6 +927,9 @@
-       if (dentry->d_name.len > EXT3_NAME_LEN)
-               return ERR_PTR(-ENAMETOOLONG);
-+      if (ext3_check_for_iopen(dir, dentry))
-+              return NULL;
-+
-       bh = ext3_find_entry(dentry, &de);
-       inode = NULL;
-       if (bh) {
-@@ -943,8 +948,8 @@
-                       return ERR_PTR(-EACCES);
-               }
-       }
--      d_add(dentry, inode);
--      return NULL;
-+
-+      return iopen_connect_dentry(dentry, inode, 1);
- }
- #define S_SHIFT 12
-@@ -1932,10 +1935,6 @@
-                             inode->i_nlink);
-       inode->i_version = ++event;
-       inode->i_nlink = 0;
--      /* There's no need to set i_disksize: the fact that i_nlink is
--       * zero will ensure that the right thing happens during any
--       * recovery. */
--      inode->i_size = 0;
-       ext3_orphan_add(handle, inode);
-       dir->i_nlink--;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-@@ -2086,6 +2085,23 @@
-       return err;
- }
-+/* Like ext3_add_nondir() except for call to iopen_connect_dentry */
-+static int ext3_add_link(handle_t *handle, struct dentry *dentry,
-+                       struct inode *inode)
-+{
-+      int err = ext3_add_entry(handle, dentry, inode);
-+      if (!err) {
-+              err = ext3_mark_inode_dirty(handle, inode);
-+              if (err == 0) {
-+                      dput(iopen_connect_dentry(dentry, inode, 0));
-+                      return 0;
-+              }
-+      }
-+      ext3_dec_count(handle, inode);
-+      iput(inode);
-+      return err;
-+}
-+
- static int ext3_link (struct dentry * old_dentry,
-               struct inode * dir, struct dentry *dentry)
- {
-@@ -2113,7 +2129,8 @@
-       ext3_inc_count(handle, inode);
-       atomic_inc(&inode->i_count);
--      err = ext3_add_nondir(handle, dentry, inode);
-+      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle, inode);
-       ext3_journal_stop(handle, dir);
-       return err;
- }
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Sun Nov 16 01:19:22 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 01:27:31 2003
-@@ -864,6 +864,18 @@
-                        || !strcmp (this_char, "quota")
-                        || !strcmp (this_char, "usrquota"))
-                       /* Don't do anything ;-) */ ;
-+              else if (!strcmp (this_char, "iopen")) {
-+                      set_opt (sbi->s_mount_opt, IOPEN);
-+                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-+              else if (!strcmp (this_char, "noiopen")) {
-+                      clear_opt (sbi->s_mount_opt, IOPEN);
-+                      clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-+              else if (!strcmp (this_char, "iopen_nopriv")) {
-+                      set_opt (sbi->s_mount_opt, IOPEN);
-+                      set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
-+              }
-               else if (!strcmp (this_char, "journal")) {
-                       /* @@@ FIXME */
-                       /* Eventually we will want to be able to create
-Index: linux-2.4.19.SuSE/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/ext3_fs.h     Sun Nov 16 01:25:42 2003
-+++ linux-2.4.19.SuSE/include/linux/ext3_fs.h  Sun Nov 16 01:30:05 2003
-@@ -324,6 +324,8 @@
- #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
- #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
index 4b869a6..d5a28e6 100644 (file)
@@ -489,7 +489,7 @@ Index: lum/include/linux/ext3_fs.h
 @@ -324,4 +324,6 @@
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
index 94d8ab9..1510c9b 100644 (file)
@@ -490,8 +490,8 @@ Index: linux-ia64/include/linux/ext3_fs.h
  #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
  #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
  #define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 53a293f..98dbca4 100644 (file)
@@ -7,7 +7,7 @@ Index: linux-stage/fs/ext3/Makefile
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
 -ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
++ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
@@ -124,7 +124,7 @@ Index: linux-stage/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -188,7 +188,7 @@ Index: linux-stage/fs/ext3/iopen.c
 +      assert(dentry->d_inode == NULL);
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
 +      if (rehash)
-+              assert(d_unhashed(dentry));     /* d_rehash */
++              assert(d_unhashed(dentry));             /* d_rehash */
 +      assert(list_empty(&dentry->d_subdirs));
 +
 +      spin_lock(&dcache_lock);
@@ -411,7 +411,7 @@ Index: linux-stage/fs/ext3/namei.c
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
@@ -420,20 +420,20 @@ Index: linux-stage/fs/ext3/super.c
 --- linux-stage.orig/fs/ext3/super.c   2005-02-25 14:37:30.987717392 +0200
 +++ linux-stage/fs/ext3/super.c        2005-02-25 14:44:50.495901992 +0200
 @@ -586,6 +586,7 @@
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
+ static match_table_t tokens = {
 @@ -633,6 +634,9 @@
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
@@ -464,8 +464,8 @@ Index: linux-stage/include/linux/ext3_fs.h
  #define EXT3_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */
  #define EXT3_MOUNT_BARRIER            0x10000 /* Use block barriers */
  #define EXT3_MOUNT_RESERVATION                0x20000 /* Preallocation */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index bd133cb..1c5e900 100644 (file)
@@ -1,15 +1,7 @@
- fs/ext3/inode.c                    |    3 
- fs/ext3/iopen.c                    |  239 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h                    |   15 ++
- fs/ext3/namei.c                    |   13 ++
- fs/ext3/super.c                    |   17 ++
- include/linux/ext3_fs.h            |    2 
- 7 files changed, 304 insertions(+), 1 deletion(-)
-
-Index: linux-2.6.5-sles9/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/Makefile    2004-04-04 07:36:18.000000000 +0400
-+++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300
+--- linux-stage.orig/fs/ext3/Makefile  2005-02-25 14:31:53.151076368 +0200
++++ linux-stage/fs/ext3/Makefile       2005-02-25 14:41:51.259150120 +0200
 @@ -4,7 +4,7 @@
  
  obj-$(CONFIG_EXT3_FS) += ext3.o
@@ -19,10 +11,10 @@ Index: linux-2.6.5-sles9/fs/ext3/Makefile
           ioctl.o namei.o super.o symlink.o hash.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
-Index: linux-2.6.5-sles9/fs/ext3/inode.c
+Index: linux-stage/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/inode.c     2004-11-09 02:15:44.739673656 +0300
-+++ linux-2.6.5-sles9/fs/ext3/inode.c  2004-11-09 02:18:27.608913768 +0300
+--- linux-stage.orig/fs/ext3/inode.c   2005-02-25 14:37:30.983718000 +0200
++++ linux-stage/fs/ext3/inode.c        2005-02-25 14:47:42.069818792 +0200
 @@ -37,6 +37,7 @@
  #include <linux/mpage.h>
  #include <linux/uio.h>
@@ -31,7 +23,7 @@ Index: linux-2.6.5-sles9/fs/ext3/inode.c
  #include "acl.h"
  
  /*
-@@ -2402,6 +2403,9 @@
+@@ -2408,6 +2409,9 @@
  #endif
        ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
  
@@ -41,7 +33,7 @@ Index: linux-2.6.5-sles9/fs/ext3/inode.c
        if (ext3_get_inode_loc(inode, &iloc, 0))
                goto bad_inode;
        bh = iloc.bh;
-Index: linux-2.6.5-sles9/fs/ext3/iopen.c
+Index: linux-stage/fs/ext3/iopen.c
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/iopen.c     2003-01-30 13:24:37.000000000 +0300
 +++ linux-2.6.5-sles9/fs/ext3/iopen.c  2004-11-09 02:18:27.611913312 +0300
@@ -133,7 +125,7 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -324,10 +316,10 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.c
 +
 +      return 1;
 +}
-Index: linux-2.6.5-sles9/fs/ext3/iopen.h
+Index: linux-stage/fs/ext3/iopen.h
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/iopen.h     2003-01-30 13:24:37.000000000 +0300
-+++ linux-2.6.5-sles9/fs/ext3/iopen.h  2004-11-09 02:18:27.613913008 +0300
+--- linux-stage.orig/fs/ext3/iopen.h   2005-02-25 14:41:01.017787968 +0200
++++ linux-stage/fs/ext3/iopen.h        2005-02-25 14:41:01.045783712 +0200
 @@ -0,0 +1,15 @@
 +/*
 + * iopen.h
@@ -344,10 +336,10 @@ Index: linux-2.6.5-sles9/fs/ext3/iopen.h
 +extern int ext3_iopen_get_inode(struct inode *inode);
 +extern struct dentry *iopen_connect_dentry(struct dentry *dentry,
 +                                         struct inode *inode, int rehash);
-Index: linux-2.6.5-sles9/fs/ext3/namei.c
+Index: linux-stage/fs/ext3/namei.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/namei.c     2004-11-09 02:15:44.614692656 +0300
-+++ linux-2.6.5-sles9/fs/ext3/namei.c  2004-11-09 02:18:27.616912552 +0300
+--- linux-stage.orig/fs/ext3/namei.c   2005-02-25 14:37:28.975023368 +0200
++++ linux-stage/fs/ext3/namei.c        2005-02-25 14:46:43.090784968 +0200
 @@ -37,6 +37,7 @@
  #include <linux/buffer_head.h>
  #include <linux/smp_lock.h>
@@ -356,7 +348,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  #include "acl.h"
  
  /*
-@@ -979,6 +980,9 @@
+@@ -980,6 +981,9 @@
        if (dentry->d_name.len > EXT3_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
  
@@ -366,7 +358,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
        bh = ext3_find_entry(dentry, &de);
        inode = NULL;
        if (bh) {
-@@ -989,10 +993,8 @@
+@@ -990,10 +994,8 @@
                if (!inode)
                        return ERR_PTR(-EACCES);
        }
@@ -379,7 +371,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  }
  
  
-@@ -2029,10 +2031,6 @@
+@@ -2037,10 +2039,6 @@
                              inode->i_nlink);
        inode->i_version++;
        inode->i_nlink = 0;
@@ -390,7 +382,7 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
        ext3_orphan_add(handle, inode);
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        ext3_mark_inode_dirty(handle, inode);
-@@ -2152,6 +2150,23 @@
+@@ -2163,6 +2161,23 @@
        return err;
  }
  
@@ -414,40 +406,39 @@ Index: linux-2.6.5-sles9/fs/ext3/namei.c
  static int ext3_link (struct dentry * old_dentry,
                struct inode * dir, struct dentry *dentry)
  {
-@@ -2175,7 +2190,8 @@
+@@ -2186,7 +2201,8 @@
        ext3_inc_count(handle, inode);
        atomic_inc(&inode->i_count);
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
-Index: linux-2.6.5-sles9/fs/ext3/super.c
+Index: linux-stage/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-sles9.orig/fs/ext3/super.c     2004-11-09 02:15:44.743673048 +0300
-+++ linux-2.6.5-sles9/fs/ext3/super.c  2004-11-09 02:18:27.620911944 +0300
-@@ -534,7 +534,7 @@
-       Opt_reservation, Opt_noreservation, Opt_noload,
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
+--- linux-stage.orig/fs/ext3/super.c   2005-02-25 14:37:30.987717392 +0200
++++ linux-stage/fs/ext3/super.c        2005-02-25 14:44:50.495901992 +0200
+@@ -586,6 +586,7 @@
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
--      Opt_ignore, Opt_barrier,
-+      Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_ignore, Opt_barrier,
        Opt_err,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
-@@ -577,6 +577,9 @@
+ static match_table_t tokens = {
+@@ -633,6 +634,9 @@
+       {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-       {Opt_barrier, "barrier=%u"},
 +      {Opt_iopen, "iopen"},
 +      {Opt_noiopen, "noiopen"},
 +      {Opt_iopen_nopriv, "iopen_nopriv"},
+       {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -778,6 +781,18 @@
+@@ -914,6 +918,18 @@
                        else
                                clear_opt(sbi->s_mount_opt, BARRIER);
                        break;
@@ -466,16 +457,16 @@ Index: linux-2.6.5-sles9/fs/ext3/super.c
                case Opt_ignore:
                        break;
                default:
-Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+Index: linux-stage/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h     2004-11-09 02:15:44.616692352 +0300
-+++ linux-2.6.5-sles9/include/linux/ext3_fs.h  2004-11-09 02:18:27.622911640 +0300
-@@ -329,6 +329,8 @@
+--- linux-stage.orig/include/linux/ext3_fs.h   2005-02-25 14:37:28.977023064 +0200
++++ linux-stage/include/linux/ext3_fs.h        2005-02-25 14:49:00.569884968 +0200
+@@ -355,6 +355,8 @@
  #define EXT3_MOUNT_POSIX_ACL          0x08000 /* POSIX Access Control Lists */
  #define EXT3_MOUNT_RESERVATION                0x10000 /* Preallocation */
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
-+#define EXT3_MOUNT_IOPEN              0x40000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
index 5141bbc..8d456ac 100644 (file)
@@ -7,7 +7,7 @@ Index: linux-2.6.12-rc6/fs/ext3/Makefile
  obj-$(CONFIG_EXT3_FS) += ext3.o
  
 -ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o\
++ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
@@ -124,7 +124,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      }
 +
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
-+      assert(d_unhashed(dentry));             /* d_rehash */
++      assert(d_unhashed(dentry));                     /* d_rehash */
 +
 +      /* preferrably return a connected dentry */
 +      spin_lock(&dcache_lock);
@@ -150,7 +150,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      list_add(&dentry->d_alias, &inode->i_dentry);   /* d_instantiate */
 +      dentry->d_inode = inode;
 +
-+      d_rehash_cond(dentry, 0);                               /* d_rehash */
++      d_rehash_cond(dentry, 0);                       /* d_rehash */
 +      spin_unlock(&dcache_lock);
 +
 +      return NULL;
@@ -188,7 +188,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      assert(dentry->d_inode == NULL);
 +      assert(list_empty(&dentry->d_alias));           /* d_instantiate */
 +      if (rehash)
-+              assert(d_unhashed(dentry));     /* d_rehash */
++              assert(d_unhashed(dentry));             /* d_rehash */
 +      assert(list_empty(&dentry->d_subdirs));
 +
 +      spin_lock(&dcache_lock);
@@ -230,7 +230,7 @@ Index: linux-2.6.12-rc6/fs/ext3/iopen.c
 +      dentry->d_inode = inode;
 +do_rehash:
 +      if (rehash)
-+              d_rehash_cond(dentry, 0);                       /* d_rehash */
++              d_rehash_cond(dentry, 0);               /* d_rehash */
 +      spin_unlock(&dcache_lock);
 +
 +      return NULL;
@@ -411,7 +411,7 @@ Index: linux-2.6.12-rc6/fs/ext3/namei.c
  
 -      err = ext3_add_nondir(handle, dentry, inode);
 +      err = ext3_add_link(handle, dentry, inode);
-+      ext3_orphan_del(handle,inode);
++      ext3_orphan_del(handle, inode);
        ext3_journal_stop(handle);
        if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
@@ -420,20 +420,20 @@ Index: linux-2.6.12-rc6/fs/ext3/super.c
 --- linux-2.6.12-rc6.orig/fs/ext3/super.c      2005-06-14 16:01:16.287775299 +0200
 +++ linux-2.6.12-rc6/fs/ext3/super.c   2005-06-14 16:14:33.656906156 +0200
 @@ -590,6 +590,7 @@
-       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
++      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
+ static match_table_t tokens = {
 @@ -638,6 +639,9 @@
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
        {Opt_ignore, "usrquota"},
-+      {Opt_iopen,  "iopen"},
-+      {Opt_noiopen,  "noiopen"},
-+      {Opt_iopen_nopriv,  "iopen_nopriv"},
++      {Opt_iopen, "iopen"},
++      {Opt_noiopen, "noiopen"},
++      {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
@@ -464,8 +464,8 @@ Index: linux-2.6.12-rc6/include/linux/ext3_fs.h
  #define EXT3_MOUNT_RESERVATION                0x10000 /* Preallocation */
  #define EXT3_MOUNT_BARRIER            0x20000 /* Use block barriers */
  #define EXT3_MOUNT_NOBH                       0x40000 /* No bufferheads */
-+#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
-+#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000 /* Make iopen world-readable */
++#define EXT3_MOUNT_IOPEN              0x80000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
  
  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
  #ifndef _LINUX_EXT2_FS_H
diff --git a/lustre/kernel_patches/patches/jbd-2.4.18-jcberr.patch b/lustre/kernel_patches/patches/jbd-2.4.18-jcberr.patch
deleted file mode 100644 (file)
index 81b4136..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-Index: linux-2.4.19.SuSE/include/linux/jbd.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/jbd.h Sun Nov 16 13:51:03 2003
-+++ linux-2.4.19.SuSE/include/linux/jbd.h      Sun Nov 16 15:10:48 2003
-@@ -283,6 +283,13 @@
-       return bh->b_private;
- }
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+struct journal_callback {
-+      struct list_head jcb_list;
-+      void (*jcb_func)(struct journal_callback *jcb, int error);
-+      /* user data goes here */
-+};
-+
- struct jbd_revoke_table_s;
- /* The handle_t type represents a single atomic update being performed
-@@ -313,6 +320,12 @@
-          operations */
-       int                     h_err;
-+      /* List of application registered callbacks for this handle.
-+       * The function(s) will be called after the transaction that
-+       * this handle is part of has been committed to disk.
-+       */
-+      struct list_head        h_jcb;
-+
-       /* Flags */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-@@ -432,6 +445,10 @@
-       /* How many handles used this transaction? */
-       int t_handle_count;
-+
-+      /* List of registered callback functions for this transaction.
-+       * Called when the transaction is committed. */
-+      struct list_head        t_jcb;
- };
-@@ -676,6 +693,9 @@
- extern int     journal_try_to_free_buffers(journal_t *, struct page *, int);
- extern int     journal_stop(handle_t *);
- extern int     journal_flush (journal_t *);
-+extern void    journal_callback_set(handle_t *handle,
-+                                    void (*fn)(struct journal_callback *,int),
-+                                    struct journal_callback *jcb);
- extern void    journal_lock_updates (journal_t *);
- extern void    journal_unlock_updates (journal_t *);
-Index: linux-2.4.19.SuSE/fs/jbd/checkpoint.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/jbd/checkpoint.c Mon Feb 25 11:38:08 2002
-+++ linux-2.4.19.SuSE/fs/jbd/checkpoint.c      Sun Nov 16 15:10:48 2003
-@@ -594,7 +594,8 @@
-       J_ASSERT (transaction->t_log_list == NULL);
-       J_ASSERT (transaction->t_checkpoint_list == NULL);
-       J_ASSERT (transaction->t_updates == 0);
--      
-+      J_ASSERT (list_empty(&transaction->t_jcb));
-+
-       J_ASSERT (transaction->t_journal->j_committing_transaction !=
-                                       transaction);
-       
-Index: linux-2.4.19.SuSE/fs/jbd/commit.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/jbd/commit.c     Mon Jan 27 05:08:04 2003
-+++ linux-2.4.19.SuSE/fs/jbd/commit.c  Sun Nov 16 15:13:53 2003
-@@ -485,7 +485,7 @@
-            transaction's t_log_list queue, and metadata buffers are on
-            the t_iobuf_list queue.
--         Wait for the transactions in reverse order.  That way we are
-+         Wait for the buffers in reverse order.  That way we are
-          less likely to be woken up until all IOs have completed, and
-          so we incur less scheduling load.
-       */
-@@ -576,8 +576,10 @@
-       jbd_debug(3, "JBD: commit phase 6\n");
--      if (is_journal_aborted(journal))
-+      if (is_journal_aborted(journal)) {
-+              unlock_journal(journal);
-               goto skip_commit;
-+      }
-       /* Done it all: now write the commit record.  We should have
-        * cleaned up our previous buffers by now, so if we are in abort
-@@ -587,9 +589,10 @@
-       descriptor = journal_get_descriptor_buffer(journal);
-       if (!descriptor) {
-               __journal_abort_hard(journal);
-+              unlock_journal(journal);
-               goto skip_commit;
-       }
--      
-+
-       /* AKPM: buglet - add `i' to tmp! */
-       for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
-               journal_header_t *tmp =
-@@ -610,14 +614,32 @@
-               put_bh(bh);             /* One for getblk() */
-               journal_unlock_journal_head(descriptor);
-       }
--      lock_journal(journal);
-       /* End of a transaction!  Finally, we can do checkpoint
-            processing: any buffers committed as a result of this
-            transaction can be removed from any checkpoint list it was on
-            before. */
--skip_commit:
-+skip_commit: /* The journal should be unlocked by now. */
-+
-+      /* Call any callbacks that had been registered for handles in this
-+       * transaction.  It is up to the callback to free any allocated
-+       * memory.
-+       */
-+      if (!list_empty(&commit_transaction->t_jcb)) {
-+              struct list_head *p, *n;
-+              int error = is_journal_aborted(journal);
-+
-+              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+                      struct journal_callback *jcb;
-+
-+                      jcb = list_entry(p, struct journal_callback, jcb_list);
-+                      list_del(p);
-+                      jcb->jcb_func(jcb, error);
-+              }
-+      }
-+
-+      lock_journal(journal);
-       jbd_debug(3, "JBD: commit phase 7\n");
-Index: linux-2.4.19.SuSE/fs/jbd/journal.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/jbd/journal.c    Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/jbd/journal.c Sun Nov 16 15:10:48 2003
-@@ -59,6 +59,7 @@
- #endif
- EXPORT_SYMBOL(journal_flush);
- EXPORT_SYMBOL(journal_revoke);
-+EXPORT_SYMBOL(journal_callback_set);
- EXPORT_SYMBOL(journal_init_dev);
- EXPORT_SYMBOL(journal_init_inode);
-Index: linux-2.4.19.SuSE/fs/jbd/transaction.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/jbd/transaction.c        Sun Nov 16 01:45:26 2003
-+++ linux-2.4.19.SuSE/fs/jbd/transaction.c     Sun Nov 16 15:15:34 2003
-@@ -58,6 +58,7 @@
-       transaction->t_state = T_RUNNING;
-       transaction->t_tid = journal->j_transaction_sequence++;
-       transaction->t_expires = jiffies + bdflush_interval();
-+      INIT_LIST_HEAD(&transaction->t_jcb);
-       /* Set up the commit timer for the new transaction. */
-       J_ASSERT (!journal->j_commit_timer_active);
-@@ -91,7 +92,14 @@
-       transaction_t *transaction;
-       int needed;
-       int nblocks = handle->h_buffer_credits;
--      
-+
-+      if (nblocks > journal->j_max_transaction_buffers) {
-+              jbd_debug(1, "JBD: %s wants too many credits (%d > %d)\n",
-+                        current->comm, nblocks,
-+                        journal->j_max_transaction_buffers);
-+              return -ENOSPC;
-+      }
-+
-       jbd_debug(3, "New handle %p going live.\n", handle);
- repeat:
-@@ -202,6 +210,20 @@
-       return 0;
- }
-+/* Allocate a new handle.  This should probably be in a slab... */
-+static handle_t *new_handle(int nblocks)
-+{
-+      handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+      if (!handle)
-+              return NULL;
-+      memset(handle, 0, sizeof (handle_t));
-+      handle->h_buffer_credits = nblocks;
-+      handle->h_ref = 1;
-+      INIT_LIST_HEAD(&handle->h_jcb);
-+
-+      return handle;
-+}
-+
- /*
-  * Obtain a new handle.  
-  *
-@@ -228,14 +250,11 @@
-               handle->h_ref++;
-               return handle;
-       }
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = start_this_handle(journal, handle);
-@@ -334,14 +353,11 @@
-       
-       if (is_journal_aborted(journal))
-               return ERR_PTR(-EIO);
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = try_start_this_handle(journal, handle);
-@@ -1321,6 +1337,28 @@
- #endif
- /*
-+ * Register a callback function for this handle.  The function will be
-+ * called when the transaction that this handle is part of has been
-+ * committed to disk with the original callback data struct and the
-+ * error status of the journal as parameters.  There is no guarantee of
-+ * ordering between handles within a single transaction, nor between
-+ * callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations.  The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void journal_callback_set(handle_t *handle,
-+                        void (*func)(struct journal_callback *jcb, int error),
-+                        struct journal_callback *jcb)
-+{
-+      list_add_tail(&jcb->jcb_list, &handle->h_jcb);
-+      jcb->jcb_func = func;
-+}
-+
-+/*
-  * All done for a particular handle.
-  *
-  * There is not much action needed here.  We just return any remaining
-@@ -1385,7 +1423,10 @@
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
--      /* 
-+      /* Move callbacks from the handle to the transaction. */
-+      list_splice(&handle->h_jcb, &transaction->t_jcb);
-+
-+      /*
-        * If the handle is marked SYNC, we need to set another commit
-        * going!  We also want to force a commit if the current
-        * transaction is occupying too much of the log, or if the
diff --git a/lustre/kernel_patches/patches/jbd-2.4.19-pre1-jcberr.patch b/lustre/kernel_patches/patches/jbd-2.4.19-pre1-jcberr.patch
deleted file mode 100644 (file)
index bbbf613..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-Index: linux-2.4.19-pre1/include/linux/jbd.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/jbd.h 2003-11-21 03:00:11.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/jbd.h      2003-11-21 03:04:47.000000000 +0300
-@@ -275,6 +275,13 @@
-       return bh->b_private;
- }
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+struct journal_callback {
-+      struct list_head jcb_list;
-+      void (*jcb_func)(struct journal_callback *jcb, int error);
-+      /* user data goes here */
-+};
-+
- struct jbd_revoke_table_s;
- /* The handle_t type represents a single atomic update being performed
-@@ -305,6 +312,12 @@
-          operations */
-       int                     h_err;
-+      /* List of application registered callbacks for this handle.
-+       * The function(s) will be called after the transaction that
-+       * this handle is part of has been committed to disk.
-+       */
-+      struct list_head        h_jcb;
-+
-       /* Flags */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-@@ -424,6 +437,10 @@
-       /* How many handles used this transaction? */
-       int t_handle_count;
-+
-+      /* List of registered callback functions for this transaction.
-+       * Called when the transaction is committed. */
-+      struct list_head        t_jcb;
- };
-@@ -672,6 +689,9 @@
- extern int     journal_try_to_free_buffers(journal_t *, struct page *, int);
- extern int     journal_stop(handle_t *);
- extern int     journal_flush (journal_t *);
-+extern void    journal_callback_set(handle_t *handle,
-+                                    void (*fn)(struct journal_callback *,int),
-+                                    struct journal_callback *jcb);
- extern void    journal_lock_updates (journal_t *);
- extern void    journal_unlock_updates (journal_t *);
-Index: linux-2.4.19-pre1/fs/jbd/checkpoint.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/jbd/checkpoint.c 2003-11-21 02:53:20.000000000 +0300
-+++ linux-2.4.19-pre1/fs/jbd/checkpoint.c      2003-11-21 03:04:47.000000000 +0300
-@@ -601,7 +601,8 @@
-       J_ASSERT (transaction->t_log_list == NULL);
-       J_ASSERT (transaction->t_checkpoint_list == NULL);
-       J_ASSERT (transaction->t_updates == 0);
--      
-+      J_ASSERT (list_empty(&transaction->t_jcb));
-+
-       J_ASSERT (transaction->t_journal->j_committing_transaction !=
-                                       transaction);
-       
-Index: linux-2.4.19-pre1/fs/jbd/commit.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/jbd/commit.c     2003-11-21 02:53:20.000000000 +0300
-+++ linux-2.4.19-pre1/fs/jbd/commit.c  2003-11-21 03:04:47.000000000 +0300
-@@ -480,7 +480,7 @@
-            transaction's t_log_list queue, and metadata buffers are on
-            the t_iobuf_list queue.
--         Wait for the transactions in reverse order.  That way we are
-+         Wait for the buffers in reverse order.  That way we are
-          less likely to be woken up until all IOs have completed, and
-          so we incur less scheduling load.
-       */
-@@ -571,8 +571,10 @@
-       jbd_debug(3, "JBD: commit phase 6\n");
--      if (is_journal_aborted(journal))
-+      if (is_journal_aborted(journal)) {
-+              unlock_journal(journal);
-               goto skip_commit;
-+      }
-       /* Done it all: now write the commit record.  We should have
-        * cleaned up our previous buffers by now, so if we are in abort
-@@ -582,9 +584,10 @@
-       descriptor = journal_get_descriptor_buffer(journal);
-       if (!descriptor) {
-               __journal_abort_hard(journal);
-+              unlock_journal(journal);
-               goto skip_commit;
-       }
--      
-+
-       /* AKPM: buglet - add `i' to tmp! */
-       for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
-               journal_header_t *tmp =
-@@ -605,14 +608,32 @@
-               put_bh(bh);             /* One for getblk() */
-               journal_unlock_journal_head(descriptor);
-       }
--      lock_journal(journal);
-       /* End of a transaction!  Finally, we can do checkpoint
-            processing: any buffers committed as a result of this
-            transaction can be removed from any checkpoint list it was on
-            before. */
--skip_commit:
-+skip_commit: /* The journal should be unlocked by now. */
-+
-+      /* Call any callbacks that had been registered for handles in this
-+       * transaction.  It is up to the callback to free any allocated
-+       * memory.
-+       */
-+      if (!list_empty(&commit_transaction->t_jcb)) {
-+              struct list_head *p, *n;
-+              int error = is_journal_aborted(journal);
-+
-+              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+                      struct journal_callback *jcb;
-+
-+                      jcb = list_entry(p, struct journal_callback, jcb_list);
-+                      list_del(p);
-+                      jcb->jcb_func(jcb, error);
-+              }
-+      }
-+
-+      lock_journal(journal);
-       jbd_debug(3, "JBD: commit phase 7\n");
-Index: linux-2.4.19-pre1/fs/jbd/journal.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/jbd/journal.c    2003-11-21 02:53:20.000000000 +0300
-+++ linux-2.4.19-pre1/fs/jbd/journal.c 2003-11-21 03:04:47.000000000 +0300
-@@ -58,6 +58,7 @@
- #endif
- EXPORT_SYMBOL(journal_flush);
- EXPORT_SYMBOL(journal_revoke);
-+EXPORT_SYMBOL(journal_callback_set);
- EXPORT_SYMBOL(journal_init_dev);
- EXPORT_SYMBOL(journal_init_inode);
-Index: linux-2.4.19-pre1/fs/jbd/transaction.c
-===================================================================
---- linux-2.4.19-pre1.orig/fs/jbd/transaction.c        2003-11-21 02:53:20.000000000 +0300
-+++ linux-2.4.19-pre1/fs/jbd/transaction.c     2003-11-21 03:05:14.000000000 +0300
-@@ -57,6 +57,7 @@
-       transaction->t_state = T_RUNNING;
-       transaction->t_tid = journal->j_transaction_sequence++;
-       transaction->t_expires = jiffies + journal->j_commit_interval;
-+      INIT_LIST_HEAD(&transaction->t_jcb);
-       /* Set up the commit timer for the new transaction. */
-       J_ASSERT (!journal->j_commit_timer_active);
-@@ -90,7 +91,14 @@
-       transaction_t *transaction;
-       int needed;
-       int nblocks = handle->h_buffer_credits;
--      
-+
-+      if (nblocks > journal->j_max_transaction_buffers) {
-+              jbd_debug(1, "JBD: %s wants too many credits (%d > %d)\n",
-+                        current->comm, nblocks,
-+                        journal->j_max_transaction_buffers);
-+              return -ENOSPC;
-+      }
-+
-       jbd_debug(3, "New handle %p going live.\n", handle);
- repeat:
-@@ -196,6 +204,20 @@
-       return 0;
- }
-+/* Allocate a new handle.  This should probably be in a slab... */
-+static handle_t *new_handle(int nblocks)
-+{
-+      handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+      if (!handle)
-+              return NULL;
-+      memset(handle, 0, sizeof (handle_t));
-+      handle->h_buffer_credits = nblocks;
-+      handle->h_ref = 1;
-+      INIT_LIST_HEAD(&handle->h_jcb);
-+
-+      return handle;
-+}
-+
- /*
-  * Obtain a new handle.  
-  *
-@@ -222,14 +244,11 @@
-               handle->h_ref++;
-               return handle;
-       }
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = start_this_handle(journal, handle);
-@@ -328,14 +347,11 @@
-       
-       if (is_journal_aborted(journal))
-               return ERR_PTR(-EIO);
--      
--      handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
-+
-+      handle = new_handle(nblocks);
-       if (!handle)
-               return ERR_PTR(-ENOMEM);
--      memset (handle, 0, sizeof (handle_t));
--      handle->h_buffer_credits = nblocks;
--      handle->h_ref = 1;
-       current->journal_info = handle;
-       err = try_start_this_handle(journal, handle);
-@@ -1324,6 +1340,28 @@
- #endif
- /*
-+ * Register a callback function for this handle.  The function will be
-+ * called when the transaction that this handle is part of has been
-+ * committed to disk with the original callback data struct and the
-+ * error status of the journal as parameters.  There is no guarantee of
-+ * ordering between handles within a single transaction, nor between
-+ * callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations.  The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void journal_callback_set(handle_t *handle,
-+                        void (*func)(struct journal_callback *jcb, int error),
-+                        struct journal_callback *jcb)
-+{
-+      list_add_tail(&jcb->jcb_list, &handle->h_jcb);
-+      jcb->jcb_func = func;
-+}
-+
-+/*
-  * All done for a particular handle.
-  *
-  * There is not much action needed here.  We just return any remaining
-@@ -1389,7 +1427,10 @@
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
--      /* 
-+      /* Move callbacks from the handle to the transaction. */
-+      list_splice(&handle->h_jcb, &transaction->t_jcb);
-+
-+      /*
-        * If the handle is marked SYNC, we need to set another commit
-        * going!  We also want to force a commit if the current
-        * transaction is occupying too much of the log, or if the
diff --git a/lustre/kernel_patches/patches/jbd-flushtime-2.4.19-suse.patch b/lustre/kernel_patches/patches/jbd-flushtime-2.4.19-suse.patch
deleted file mode 100644 (file)
index 8411137..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-Index: linux-2.4.19.SuSE/fs/jbd/transaction.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/jbd/transaction.c        Sun Nov 16 01:38:25 2003
-+++ linux-2.4.19.SuSE/fs/jbd/transaction.c     Sun Nov 16 01:44:26 2003
-@@ -1094,7 +1094,6 @@
-       
-       spin_lock(&journal_datalist_lock);
-       set_bit(BH_JBDDirty, &bh->b_state);
--      set_buffer_flushtime(bh);
-       J_ASSERT_JH(jh, jh->b_transaction != NULL);
-       
-@@ -1995,6 +1994,13 @@
-       spin_unlock(&journal_datalist_lock);
- }
-+static void jbd_refile_buffer(struct buffer_head *bh)
-+{
-+      if (buffer_dirty(bh) && (bh->b_list != BUF_DIRTY))
-+              set_buffer_flushtime(bh);
-+      refile_buffer(bh);
-+}
-+
- /* 
-  * Remove a buffer from its current buffer list in preparation for
-  * dropping it from its current transaction entirely.  If the buffer has
-@@ -2022,7 +2028,7 @@
-               J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
-       } else {
-               /* Onto BUF_DIRTY for writeback */
--              refile_buffer(jh2bh(jh));
-+              jbd_refile_buffer(jh2bh(jh));
-       }
- }
diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch b/lustre/kernel_patches/patches/jbd-stats-2.6.13.4.patch
new file mode 100644 (file)
index 0000000..4db8dd3
--- /dev/null
@@ -0,0 +1,735 @@
+Index: linux-2.6.13.4/include/linux/jbd.h
+===================================================================
+--- linux-2.6.13.4.orig/include/linux/jbd.h    2005-10-10 22:54:29.000000000 +0400
++++ linux-2.6.13.4/include/linux/jbd.h 2005-11-20 01:35:08.000000000 +0300
+@@ -394,6 +394,16 @@
+ };
++/*
++ * Some stats for checkpoint phase
++ */
++struct transaction_chp_stats_s {
++      unsigned long           cs_chp_time;
++      unsigned long           cs_forced_to_close;
++      unsigned long           cs_written;
++      unsigned long           cs_dropped;
++};
++
+ /* The transaction_t type is the guts of the journaling mechanism.  It
+  * tracks a compound transaction through its various states:
+  *
+@@ -523,6 +533,21 @@
+       spinlock_t              t_handle_lock;
+       /*
++       * Longest time some handle had to wait for running transaction
++       */
++      unsigned long           t_max_wait;
++
++      /*
++       * When transaction started
++       */
++      unsigned long           t_start;
++
++      /*
++       * Checkpointing stats [j_checkpoint_sem]
++       */
++      struct transaction_chp_stats_s t_chp_stats;
++
++      /*
+        * Number of outstanding updates running on this transaction
+        * [t_handle_lock]
+        */
+@@ -553,6 +578,57 @@
+ };
++struct transaction_run_stats_s {
++      unsigned long           rs_wait;
++      unsigned long           rs_running;
++      unsigned long           rs_locked;
++      unsigned long           rs_flushing;
++      unsigned long           rs_logging;
++
++      unsigned long           rs_handle_count;
++      unsigned long           rs_blocks;
++      unsigned long           rs_blocks_logged;
++};
++
++struct transaction_stats_s
++{
++      int                     ts_type;
++      unsigned long           ts_tid;
++      union {
++              struct transaction_run_stats_s run;
++              struct transaction_chp_stats_s chp;
++      } u;
++};
++
++#define JBD_STATS_RUN         1
++#define JBD_STATS_CHECKPOINT  2
++
++#define ts_wait                       u.run.rs_wait
++#define ts_running            u.run.rs_running
++#define ts_locked             u.run.rs_locked
++#define ts_flushing           u.run.rs_flushing
++#define ts_logging            u.run.rs_logging
++#define ts_handle_count               u.run.rs_handle_count
++#define ts_blocks             u.run.rs_blocks
++#define ts_blocks_logged      u.run.rs_blocks_logged
++
++#define ts_chp_time           u.chp.cs_chp_time
++#define ts_forced_to_close    u.chp.cs_forced_to_close
++#define ts_written            u.chp.cs_written
++#define ts_dropped            u.chp.cs_dropped
++
++#define CURRENT_MSECS         (jiffies_to_msecs(jiffies))
++
++static inline unsigned int
++jbd_time_diff(unsigned int start, unsigned int end)
++{
++      if (unlikely(start > end))
++              end = end + (~0UL - start);
++      else
++              end -= start;
++      return end;
++}
++
+ /**
+  * struct journal_s - The journal_s type is the concrete type associated with
+  *     journal_t.
+@@ -800,6 +876,16 @@
+       int                     j_wbufsize;
+       /*
++       *
++       */
++      struct transaction_stats_s *j_history;
++      int                     j_history_max;
++      int                     j_history_cur;
++      spinlock_t              j_history_lock;
++      struct proc_dir_entry   *j_proc_entry;
++      struct transaction_stats_s j_stats;
++      
++      /*
+        * An opaque pointer to fs-private information.  ext3 puts its
+        * superblock pointer here
+        */
+Index: linux-2.6.13.4/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.13.4.orig/fs/jbd/transaction.c   2005-10-10 22:54:29.000000000 +0400
++++ linux-2.6.13.4/fs/jbd/transaction.c        2005-11-20 01:31:23.000000000 +0300
+@@ -58,6 +58,8 @@
+       J_ASSERT(journal->j_running_transaction == NULL);
+       journal->j_running_transaction = transaction;
++      transaction->t_max_wait = 0;
++      transaction->t_start = CURRENT_MSECS;
+       return transaction;
+ }
+@@ -84,6 +86,7 @@
+       int nblocks = handle->h_buffer_credits;
+       transaction_t *new_transaction = NULL;
+       int ret = 0;
++      unsigned long ts = CURRENT_MSECS;
+       if (nblocks > journal->j_max_transaction_buffers) {
+               printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+@@ -217,6 +220,12 @@
+       /* OK, account for the buffers that this operation expects to
+        * use and add the handle to the running transaction. */
++      if (time_after(transaction->t_start, ts)) {
++              ts = jbd_time_diff(ts, transaction->t_start);
++              if (ts > transaction->t_max_wait)
++                      transaction->t_max_wait= ts;
++      }
++
+       handle->h_transaction = transaction;
+       transaction->t_outstanding_credits += nblocks;
+       transaction->t_updates++;
+Index: linux-2.6.13.4/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.13.4.orig/fs/jbd/journal.c       2005-10-10 22:54:29.000000000 +0400
++++ linux-2.6.13.4/fs/jbd/journal.c    2005-11-20 02:07:44.000000000 +0300
+@@ -36,6 +36,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/page.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ EXPORT_SYMBOL(journal_start);
+ EXPORT_SYMBOL(journal_restart);
+@@ -646,6 +647,300 @@
+       return journal_add_journal_head(bh);
+ }
++struct jbd_stats_proc_session {
++      journal_t *journal;
++      struct transaction_stats_s *stats;
++      int start;
++      int max;
++};
++
++static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
++                                      struct transaction_stats_s *ts,
++                                      int first)
++{
++      if (ts == s->stats + s->max)
++              ts = s->stats;
++      if (!first && ts == s->stats + s->start)
++              return NULL;
++      while (ts->ts_type == 0) {
++              ts++;
++              if (ts == s->stats + s->max)
++                      ts = s->stats;
++              if (ts == s->stats + s->start)
++                      return NULL;
++      }
++      return ts;
++
++}
++
++static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts;
++      int l = *pos;
++
++      if (l == 0)
++              return SEQ_START_TOKEN;
++      ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
++      if (!ts)
++              return NULL;
++      while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
++      return ts;
++}
++
++static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      struct transaction_stats_s *ts = v;
++
++      ++*pos;
++      if (v == SEQ_START_TOKEN)
++              return jbd_history_skip_empty(s, s->stats + s->start, 1);
++      else
++              return jbd_history_skip_empty(s, ++ts, 0);
++}
++
++static int jbd_seq_history_show(struct seq_file *seq, void *v)
++{
++      struct transaction_stats_s *ts = v;
++      if (v == SEQ_START_TOKEN) {
++              seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
++                              "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
++                              "wait", "run", "lock", "flush", "log", "hndls",
++                              "block", "inlog", "ctime", "write", "drop",
++                              "close");
++              return 0;
++      }
++      if (ts->ts_type == JBD_STATS_RUN)
++              seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
++                              "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
++                              ts->ts_wait, ts->ts_running, ts->ts_locked,
++                              ts->ts_flushing, ts->ts_logging,
++                              ts->ts_handle_count, ts->ts_blocks,
++                              ts->ts_blocks_logged);
++      else if (ts->ts_type == JBD_STATS_CHECKPOINT)
++              seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
++                              "C", ts->ts_tid, " ", ts->ts_chp_time,
++                              ts->ts_written, ts->ts_dropped,
++                              ts->ts_forced_to_close);
++      else
++              J_ASSERT(0);
++      return 0;
++}
++
++static void jbd_seq_history_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_history_ops = {
++      .start  = jbd_seq_history_start,
++      .next   = jbd_seq_history_next,
++      .stop   = jbd_seq_history_stop,
++      .show   = jbd_seq_history_show,
++};
++
++static int jbd_seq_history_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, journal->j_history, size);
++      s->max = journal->j_history_max;
++      s->start = journal->j_history_cur % s->max;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_history_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static int jbd_seq_history_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_history_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_history_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_history_release,
++};
++
++static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
++{
++      return *pos ? NULL : SEQ_START_TOKEN;
++}
++
++static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      return NULL;
++}
++
++static int jbd_seq_info_show(struct seq_file *seq, void *v)
++{
++      struct jbd_stats_proc_session *s = seq->private;
++      if (v != SEQ_START_TOKEN)
++              return 0;
++      seq_printf(seq, "%lu transaction, each upto %u blocks\n",
++                      s->stats->ts_tid,
++                      s->journal->j_max_transaction_buffers);
++      if (s->stats->ts_tid == 0)
++              return 0;
++      seq_printf(seq, "average: \n  %lums waiting for transaction\n",
++                      s->stats->ts_wait / s->stats->ts_tid);
++      seq_printf(seq, "  %lums running transaction\n",
++                      s->stats->ts_running / s->stats->ts_tid);
++      seq_printf(seq, "  %lums transaction was being locked\n",
++                      s->stats->ts_locked / s->stats->ts_tid);
++      seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
++                      s->stats->ts_flushing / s->stats->ts_tid);
++      seq_printf(seq, "  %lums logging transaction\n",
++                      s->stats->ts_logging / s->stats->ts_tid);
++      seq_printf(seq, "  %lu handles per transaction\n",
++                      s->stats->ts_handle_count / s->stats->ts_tid);
++      seq_printf(seq, "  %lu blocks per transaction\n",
++                      s->stats->ts_blocks / s->stats->ts_tid);
++      seq_printf(seq, "  %lu logged blocks per transaction\n",
++                      s->stats->ts_blocks_logged / s->stats->ts_tid);
++      return 0;
++}
++
++static void jbd_seq_info_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations jbd_seq_info_ops = {
++      .start  = jbd_seq_info_start,
++      .next   = jbd_seq_info_next,
++      .stop   = jbd_seq_info_stop,
++      .show   = jbd_seq_info_show,
++};
++
++static int jbd_seq_info_open(struct inode *inode, struct file *file)
++{
++      journal_t *journal = PDE(inode)->data;
++      struct jbd_stats_proc_session *s;
++      int rc, size;
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL)
++              return -EIO;
++      size = sizeof(struct transaction_stats_s);
++      s->stats = kmalloc(size, GFP_KERNEL);
++      if (s == NULL) {
++              kfree(s);
++              return -EIO;
++      }
++      spin_lock(&journal->j_history_lock);
++      memcpy(s->stats, &journal->j_stats, size);
++      s->journal = journal;
++      spin_unlock(&journal->j_history_lock);
++      
++      rc = seq_open(file, &jbd_seq_info_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = s;
++      } else {
++              kfree(s->stats);
++              kfree(s);
++      }
++      return rc;
++
++}
++
++static int jbd_seq_info_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *seq = (struct seq_file *)file->private_data;
++      struct jbd_stats_proc_session *s = seq->private;
++      kfree(s->stats);
++      kfree(s);
++      return seq_release(inode, file);
++}
++
++static struct file_operations jbd_seq_info_fops = {
++      .owner          = THIS_MODULE,
++      .open           = jbd_seq_info_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = jbd_seq_info_release,
++};
++
++static struct proc_dir_entry *proc_jbd_stats = NULL;
++
++static void jbd_stats_proc_init(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
++      if (journal->j_proc_entry) {
++              struct proc_dir_entry *p;
++              p = create_proc_entry("history", S_IRUGO,
++                              journal->j_proc_entry);
++              if (p) {
++                      p->proc_fops = &jbd_seq_history_fops;
++                      p->data = journal;
++                      p = create_proc_entry("info", S_IRUGO,
++                                              journal->j_proc_entry);
++                      if (p) {
++                              p->proc_fops = &jbd_seq_info_fops;
++                              p->data = journal;
++                      }
++              }
++      }
++}
++
++static void jbd_stats_proc_exit(journal_t *journal)
++{
++      char name[64];
++
++      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
++      remove_proc_entry("info", journal->j_proc_entry);
++      remove_proc_entry("history", journal->j_proc_entry);
++      remove_proc_entry(name, proc_jbd_stats);
++}
++
++static void journal_init_stats(journal_t *journal)
++{
++      int size;
++
++      if (proc_jbd_stats == NULL)
++              return;
++
++      journal->j_history_max = 100;
++      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
++      journal->j_history = kmalloc(size, GFP_KERNEL);
++      if (journal->j_history == NULL) {
++              journal->j_history_max = 0;
++              return;
++      }
++      memset(journal->j_history, 0, size);
++      spin_lock_init(&journal->j_history_lock);
++}
++
+ /*
+  * Management for journal control blocks: functions to create and
+  * destroy journal_t structures, and to initialise and read existing
+@@ -688,6 +983,9 @@
+               kfree(journal);
+               goto fail;
+       }
++      
++      journal_init_stats(journal);
++
+       return journal;
+ fail:
+       return NULL;
+@@ -731,6 +1029,7 @@
+       journal->j_blk_offset = start;
+       journal->j_maxlen = len;
+       journal->j_blocksize = blocksize;
++      jbd_stats_proc_init(journal);
+       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+       J_ASSERT(bh != NULL);
+@@ -780,6 +1079,7 @@
+       journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+       journal->j_blocksize = inode->i_sb->s_blocksize;
++      jbd_stats_proc_init(journal);
+       /* journal descriptor can store up to n blocks -bzzz */
+       n = journal->j_blocksize / sizeof(journal_block_tag_t);
+@@ -1161,6 +1461,8 @@
+               brelse(journal->j_sb_buffer);
+       }
++      if (journal->j_proc_entry)
++              jbd_stats_proc_exit(journal);
+       if (journal->j_inode)
+               iput(journal->j_inode);
+       if (journal->j_revoke)
+@@ -1929,6 +2231,28 @@
+ #endif
++#if defined(CONFIG_PROC_FS)
++
++#define JBD_STATS_PROC_NAME "fs/jbd"
++
++static void __init create_jbd_stats_proc_entry(void)
++{
++      proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
++}
++
++static void __exit remove_jbd_stats_proc_entry(void)
++{
++      if (proc_jbd_stats)
++              remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
++}
++
++#else
++
++#define create_jbd_stats_proc_entry() do {} while (0)
++#define remove_jbd_stats_proc_entry() do {} while (0)
++
++#endif
++
+ kmem_cache_t *jbd_handle_cache;
+ static int __init journal_init_handle_cache(void)
+@@ -1983,6 +2307,7 @@
+       if (ret != 0)
+               journal_destroy_caches();
+       create_jbd_proc_entry();
++      create_jbd_stats_proc_entry();
+       return ret;
+ }
+@@ -1994,6 +2319,7 @@
+               printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+ #endif
+       remove_jbd_proc_entry();
++      remove_jbd_stats_proc_entry();
+       journal_destroy_caches();
+ }
+Index: linux-2.6.13.4/fs/jbd/checkpoint.c
+===================================================================
+--- linux-2.6.13.4.orig/fs/jbd/checkpoint.c    2005-11-19 22:46:03.000000000 +0300
++++ linux-2.6.13.4/fs/jbd/checkpoint.c 2005-11-20 02:24:09.000000000 +0300
+@@ -166,6 +166,7 @@
+                       transaction_t *t = jh->b_transaction;
+                       tid_t tid = t->t_tid;
++                      transaction->t_chp_stats.cs_forced_to_close++;
+                       spin_unlock(&journal->j_list_lock);
+                       jbd_unlock_bh_state(bh);
+                       log_start_commit(journal, tid);
+@@ -226,7 +227,7 @@
+  */
+ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
+                       struct buffer_head **bhs, int *batch_count,
+-                      int *drop_count)
++                      int *drop_count, transaction_t *transaction)
+ {
+       struct buffer_head *bh = jh2bh(jh);
+       int ret = 0;
+@@ -247,6 +248,7 @@
+               set_buffer_jwrite(bh);
+               bhs[*batch_count] = bh;
+               jbd_unlock_bh_state(bh);
++              transaction->t_chp_stats.cs_written++;
+               (*batch_count)++;
+               if (*batch_count == NR_BATCH) {
+                       __flush_batch(journal, bhs, batch_count);
+@@ -315,6 +317,8 @@
+               tid_t this_tid;
+               transaction = journal->j_checkpoint_transactions;
++              if (transaction->t_chp_stats.cs_chp_time == 0)
++                      transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
+               this_tid = transaction->t_tid;
+               jh = transaction->t_checkpoint_list;
+               last_jh = jh->b_cpprev;
+@@ -331,7 +335,8 @@
+                               retry = 1;
+                               break;
+                       }
+-                      retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
++                      retry = __flush_buffer(journal, jh, bhs, &batch_count,
++                                              &drop_count, transaction);
+                       if (cond_resched_lock(&journal->j_list_lock)) {
+                               retry = 1;
+                               break;
+@@ -609,6 +614,8 @@
+ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+ {
++      struct transaction_stats_s stats;
++
+       assert_spin_locked(&journal->j_list_lock);
+       if (transaction->t_cpnext) {
+               transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+@@ -633,5 +640,25 @@
+       J_ASSERT(journal->j_running_transaction != transaction);
+       jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
++
++      /*
++       * File the transaction for history
++       */
++      if (transaction->t_chp_stats.cs_written != 0 ||
++                      transaction->t_chp_stats.cs_chp_time != 0) {
++              stats.ts_type = JBD_STATS_CHECKPOINT;
++              stats.ts_tid = transaction->t_tid;
++              stats.u.chp = transaction->t_chp_stats;
++              if (stats.ts_chp_time)
++                      stats.ts_chp_time = 
++                              jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
++              spin_lock(&journal->j_history_lock);
++              memcpy(journal->j_history + journal->j_history_cur, &stats,
++                              sizeof(stats));
++              if (++journal->j_history_cur == journal->j_history_max)
++                      journal->j_history_cur = 0;
++              spin_unlock(&journal->j_history_lock);
++      }
++
+       kfree(transaction);
+ }
+Index: linux-2.6.13.4/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.13.4.orig/fs/jbd/commit.c        2005-10-10 22:54:29.000000000 +0400
++++ linux-2.6.13.4/fs/jbd/commit.c     2005-11-20 00:54:10.000000000 +0300
+@@ -21,6 +21,7 @@
+ #include <linux/mm.h>
+ #include <linux/pagemap.h>
+ #include <linux/smp_lock.h>
++#include <linux/jiffies.h>
+ /*
+  * Default IO end handler for temporary BJ_IO buffer_heads.
+@@ -168,6 +169,7 @@
+  */
+ void journal_commit_transaction(journal_t *journal)
+ {
++      struct transaction_stats_s stats;
+       transaction_t *commit_transaction;
+       struct journal_head *jh, *new_jh, *descriptor;
+       struct buffer_head **wbuf = journal->j_wbuf;
+@@ -214,6 +216,11 @@
+       spin_lock(&journal->j_state_lock);
+       commit_transaction->t_state = T_LOCKED;
++      stats.ts_wait = commit_transaction->t_max_wait;
++      stats.ts_locked = CURRENT_MSECS;
++      stats.ts_running = jbd_time_diff(commit_transaction->t_start,
++                                              stats.ts_locked);
++      
+       spin_lock(&commit_transaction->t_handle_lock);
+       while (commit_transaction->t_updates) {
+               DEFINE_WAIT(wait);
+@@ -286,6 +293,9 @@
+        */
+       journal_switch_revoke_table(journal);
++      stats.ts_flushing = CURRENT_MSECS;
++      stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
++
+       commit_transaction->t_state = T_FLUSH;
+       journal->j_committing_transaction = commit_transaction;
+       journal->j_running_transaction = NULL;
+@@ -444,6 +454,11 @@
+        */
+       commit_transaction->t_state = T_COMMIT;
++      stats.ts_logging = CURRENT_MSECS;
++      stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
++      stats.ts_blocks = commit_transaction->t_outstanding_credits;
++      stats.ts_blocks_logged = 0;
++
+       descriptor = NULL;
+       bufs = 0;
+       while (commit_transaction->t_buffers) {
+@@ -592,6 +607,7 @@
+                               submit_bh(WRITE, bh);
+                       }
+                       cond_resched();
++                      stats.ts_blocks_logged += bufs;
+                       /* Force a new descriptor to be generated next
+                            time round the loop. */
+@@ -756,6 +772,7 @@
+               cp_transaction = jh->b_cp_transaction;
+               if (cp_transaction) {
+                       JBUFFER_TRACE(jh, "remove from old cp transaction");
++                      cp_transaction->t_chp_stats.cs_dropped++;
+                       __journal_remove_checkpoint(jh);
+               }
+@@ -803,6 +820,36 @@
+       J_ASSERT(commit_transaction->t_state == T_COMMIT);
++      commit_transaction->t_start = CURRENT_MSECS;
++      stats.ts_logging = jbd_time_diff(stats.ts_logging,
++                                              commit_transaction->t_start);
++
++      /*
++       * File the transaction for history
++       */
++      stats.ts_type = JBD_STATS_RUN;
++      stats.ts_tid = commit_transaction->t_tid;
++      stats.ts_handle_count = commit_transaction->t_handle_count;
++      spin_lock(&journal->j_history_lock);
++      memcpy(journal->j_history + journal->j_history_cur, &stats,
++                      sizeof(stats));
++      if (++journal->j_history_cur == journal->j_history_max)
++              journal->j_history_cur = 0;
++
++      /*
++       * Calculate overall stats
++       */
++      journal->j_stats.ts_tid++;
++      journal->j_stats.ts_wait += stats.ts_wait;
++      journal->j_stats.ts_running += stats.ts_running;
++      journal->j_stats.ts_locked += stats.ts_locked;
++      journal->j_stats.ts_flushing += stats.ts_flushing;
++      journal->j_stats.ts_logging += stats.ts_logging;
++      journal->j_stats.ts_handle_count += stats.ts_handle_count;
++      journal->j_stats.ts_blocks += stats.ts_blocks;
++      journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
++      spin_unlock(&journal->j_history_lock);
++
+       /*
+        * This is a bit sleazy.  We borrow j_list_lock to protect
+        * journal->j_committing_transaction in __journal_remove_checkpoint.
diff --git a/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch b/lustre/kernel_patches/patches/kallsyms-2.4-bgl.patch
deleted file mode 100644 (file)
index 25f7954..0000000
+++ /dev/null
@@ -1,685 +0,0 @@
-Index: linux-bgl/arch/arm/vmlinux-armo.lds.in
-===================================================================
---- linux-bgl.orig/arch/arm/vmlinux-armo.lds.in        2003-07-02 08:44:12.000000000 -0700
-+++ linux-bgl/arch/arm/vmlinux-armo.lds.in     2004-10-26 22:52:50.037677957 -0700
-@@ -62,6 +62,10 @@
-                       *(__ksymtab)
-               __stop___ksymtab = .;
-+              __start___kallsyms = .; /* All kernel symbols */
-+                      *(__kallsyms)
-+              __stop___kallsyms = .;
-+
-               *(.got)                 /* Global offset table          */
-               _etext = .;             /* End of text section          */
-Index: linux-bgl/arch/arm/vmlinux-armv.lds.in
-===================================================================
---- linux-bgl.orig/arch/arm/vmlinux-armv.lds.in        2003-07-02 08:44:12.000000000 -0700
-+++ linux-bgl/arch/arm/vmlinux-armv.lds.in     2004-10-26 22:52:50.038677801 -0700
-@@ -67,6 +67,12 @@
-               __stop___ksymtab = .;
-       }
-+      __kallsyms : {                  /* Kernel debugging table       */
-+              __start___kallsyms = .; /* All kernel symbols */
-+                      *(__kallsyms)
-+              __stop___kallsyms = .;
-+      }
-+
-       . = ALIGN(8192);
-       .data : {
-Index: linux-bgl/arch/ppc/config.in
-===================================================================
---- linux-bgl.orig/arch/ppc/config.in  2004-10-04 09:55:49.000000000 -0700
-+++ linux-bgl/arch/ppc/config.in       2004-10-26 23:11:56.416643929 -0700
-@@ -732,6 +732,7 @@
-     string 'Additional compile arguments' CONFIG_COMPILE_OPTIONS "-g -ggdb"
-   fi
- fi
-+bool 'Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS
- if [ "$CONFIG_ALL_PPC" = "y" ]; then
-   bool 'Support for early boot text console (BootX or OpenFirmware only)' CONFIG_BOOTX_TEXT
-Index: linux-bgl/arch/ppc/vmlinux.lds
-===================================================================
---- linux-bgl.orig/arch/ppc/vmlinux.lds        2003-07-02 08:43:30.000000000 -0700
-+++ linux-bgl/arch/ppc/vmlinux.lds     2004-10-26 22:52:50.043677020 -0700
-@@ -73,6 +73,10 @@
-   __ksymtab : { *(__ksymtab) }
-   __stop___ksymtab = .;
-+  __start___kallsyms = .;     /* All kernel symbols */
-+  __kallsyms : { *(__kallsyms) }
-+  __stop___kallsyms = .;
-+
-   __start___ftr_fixup = .;
-   __ftr_fixup : { *(__ftr_fixup) }
-   __stop___ftr_fixup = .;
-Index: linux-bgl/arch/i386/config.in
-===================================================================
---- linux-bgl.orig/arch/i386/config.in 2003-07-02 08:43:46.000000000 -0700
-+++ linux-bgl/arch/i386/config.in      2004-10-26 22:52:50.040677488 -0700
-@@ -363,6 +363,7 @@
-    if [ "$CONFIG_ISDN" != "n" ]; then
-       source drivers/isdn/Config.in
-    fi
-+   bool '  Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS
- fi
- endmenu
-Index: linux-bgl/arch/i386/vmlinux.lds
-===================================================================
---- linux-bgl.orig/arch/i386/vmlinux.lds       2003-07-02 08:44:32.000000000 -0700
-+++ linux-bgl/arch/i386/vmlinux.lds    2004-10-26 22:52:50.040677488 -0700
-@@ -27,6 +27,9 @@
-   __start___ksymtab = .;      /* Kernel symbol table */
-   __ksymtab : { *(__ksymtab) }
-   __stop___ksymtab = .;
-+   __start___kallsyms = .;     /* All kernel symbols */
-+   __kallsyms : { *(__kallsyms) }
-+   __stop___kallsyms = .;
-   .data : {                   /* Data */
-       *(.data)
-Index: linux-bgl/arch/ia64/config.in
-===================================================================
---- linux-bgl.orig/arch/ia64/config.in 2003-07-02 08:44:12.000000000 -0700
-+++ linux-bgl/arch/ia64/config.in      2004-10-26 22:52:50.055675147 -0700
-@@ -278,4 +278,6 @@
-    bool '  Turn on irq debug checks (slow!)' CONFIG_IA64_DEBUG_IRQ
- fi
-+bool '  Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS
-+
- endmenu
-Index: linux-bgl/arch/alpha/vmlinux.lds.in
-===================================================================
---- linux-bgl.orig/arch/alpha/vmlinux.lds.in   2003-07-02 08:43:45.000000000 -0700
-+++ linux-bgl/arch/alpha/vmlinux.lds.in        2004-10-26 22:52:50.036678113 -0700
-@@ -28,6 +28,10 @@
-   __stop___ksymtab = .;
-   .kstrtab : { *(.kstrtab) }
-+  __start___kallsyms = .;       /* All kernel symbols */
-+  __kallsyms : { *(__kallsyms) }
-+  __stop___kallsyms = .;
-+
-   /* Startup code */
-   . = ALIGN(8192);
-   __init_begin = .;
-Index: linux-bgl/Makefile
-===================================================================
---- linux-bgl.orig/Makefile    2004-10-04 09:55:49.000000000 -0700
-+++ linux-bgl/Makefile 2004-10-26 22:54:44.018588371 -0700
-@@ -38,10 +38,13 @@
- MAKEFILES     = $(TOPDIR)/.config
- GENKSYMS      = /sbin/genksyms
- DEPMOD                = /sbin/depmod
-+KALLSYMS      = /sbin/kallsyms
- MODFLAGS      = -DMODULE
- CFLAGS_KERNEL =
- PERL          = perl
-+TMPPREFIX     =
-+
- export        VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \
-       CONFIG_SHELL TOPDIR HPATH HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC \
-       CPP AR NM STRIP OBJCOPY OBJDUMP MAKE MAKEFILES GENKSYMS MODFLAGS PERL
-@@ -198,7 +201,7 @@
- CLEAN_FILES = \
-       kernel/ksyms.lst include/linux/compile.h \
-       vmlinux System.map \
--      .tmp* \
-+      $(TMPPREFIX).tmp* \
-       drivers/char/consolemap_deftbl.c drivers/video/promcon_tbl.c \
-       drivers/char/conmakehash \
-       drivers/char/drm/*-mod.c \
-@@ -278,16 +281,39 @@
- boot: vmlinux
-       @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" -C arch/$(ARCH)/boot
-+LD_VMLINUX    := $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \
-+                      --start-group \
-+                      $(CORE_FILES) \
-+                      $(DRIVERS) \
-+                      $(NETWORKS) \
-+                      $(LIBS) \
-+                      --end-group
-+ifeq ($(CONFIG_KALLSYMS),y)
-+LD_VMLINUX_KALLSYMS   := $(TMPPREFIX).tmp_kallsyms3.o
-+else
-+LD_VMLINUX_KALLSYMS   :=
-+endif
-+
- vmlinux: include/linux/version.h $(CONFIGURATION) init/main.o init/version.o init/do_mounts.o linuxsubdirs
--      $(LD) $(LINKFLAGS) $(HEAD) init/main.o init/version.o init/do_mounts.o \
--              --start-group \
--              $(CORE_FILES) \
--              $(DRIVERS) \
--              $(NETWORKS) \
--              $(LIBS) \
--              --end-group \
--              -o vmlinux
-+      @$(MAKE) CFLAGS="$(CFLAGS) $(CFLAGS_KERNEL)" kallsyms
-+
-+.PHONY:       kallsyms
-+
-+kallsyms:
-+ifeq ($(CONFIG_KALLSYMS),y)
-+      @echo kallsyms pass 1
-+      $(LD_VMLINUX) -o $(TMPPREFIX).tmp_vmlinux1
-+      @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux1 > $(TMPPREFIX).tmp_kallsyms1.o
-+      @echo kallsyms pass 2
-+      @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms1.o -o $(TMPPREFIX).tmp_vmlinux2
-+      @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux2 > $(TMPPREFIX).tmp_kallsyms2.o
-+      @echo kallsyms pass 3
-+      @$(LD_VMLINUX) $(TMPPREFIX).tmp_kallsyms2.o -o $(TMPPREFIX).tmp_vmlinux3
-+      @$(KALLSYMS) $(TMPPREFIX).tmp_vmlinux3 > $(TMPPREFIX).tmp_kallsyms3.o
-+endif
-+      $(LD_VMLINUX) $(LD_VMLINUX_KALLSYMS) -o vmlinux
-       $(NM) vmlinux | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' | sort > System.map
-+      @rm -f $(TMPPREFIX).tmp_vmlinux* $(TMPPREFIX).tmp_kallsyms*
- symlinks:
-       rm -f include/asm
-Index: linux-bgl/kernel/Makefile
-===================================================================
---- linux-bgl.orig/kernel/Makefile     2003-07-02 08:44:29.000000000 -0700
-+++ linux-bgl/kernel/Makefile  2004-10-26 22:59:34.101037916 -0700
-@@ -19,6 +19,7 @@
- obj-$(CONFIG_UID16) += uid16.o
- obj-$(CONFIG_MODULES) += ksyms.o
- obj-$(CONFIG_PM) += pm.o
-+obj-$(CONFIG_KALLSYMS) += kallsyms.o
- ifneq ($(CONFIG_IA64),y)
- # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
-Index: linux-bgl/kernel/ksyms.c
-===================================================================
---- linux-bgl.orig/kernel/ksyms.c      2004-10-26 21:49:59.922431839 -0700
-+++ linux-bgl/kernel/ksyms.c   2004-10-26 22:52:50.050675927 -0700
-@@ -56,6 +56,9 @@
- #ifdef CONFIG_KMOD
- #include <linux/kmod.h>
- #endif
-+#ifdef CONFIG_KALLSYMS
-+#include <linux/kallsyms.h>
-+#endif
- extern void set_device_ro(kdev_t dev,int flag);
-@@ -81,6 +84,15 @@
- EXPORT_SYMBOL(inter_module_put);
- EXPORT_SYMBOL(try_inc_mod_count);
-+#ifdef CONFIG_KALLSYMS
-+extern const char __start___kallsyms[];
-+extern const char __stop___kallsyms[];
-+EXPORT_SYMBOL(__start___kallsyms);
-+EXPORT_SYMBOL(__stop___kallsyms);
-+
-+
-+#endif
-+
- /* process memory management */
- EXPORT_SYMBOL(do_mmap_pgoff);
- EXPORT_SYMBOL(do_munmap);
-Index: linux-bgl/kernel/kallsyms.c
-===================================================================
---- linux-bgl.orig/kernel/kallsyms.c   2004-10-26 17:10:51.404753448 -0700
-+++ linux-bgl/kernel/kallsyms.c        2004-10-26 22:52:50.048676240 -0700
-@@ -0,0 +1,306 @@
-+/* An example of using kallsyms data in a kernel debugger.
-+
-+   Copyright 2000 Keith Owens <kaos@ocs.com.au> April 2000
-+
-+   This file is part of the Linux modutils.
-+
-+   This program is free software; you can redistribute it and/or modify it
-+   under the terms of the GNU General Public License as published by the
-+   Free Software Foundation; either version 2 of the License, or (at your
-+   option) any later version.
-+
-+   This program is distributed in the hope that it will be useful, but
-+   WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   General Public License for more details.
-+
-+   You should have received a copy of the GNU General Public License
-+   along with this program; if not, write to the Free Software Foundation,
-+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+  */
-+
-+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.2 2005/04/01 21:30:19 green Exp $"
-+
-+/*
-+   This code uses the list of all kernel and module symbols to :-
-+
-+   * Find any non-stack symbol in a kernel or module.  Symbols do
-+     not have to be exported for debugging.
-+
-+   * Convert an address to the module (or kernel) that owns it, the
-+     section it is in and the nearest symbol.  This finds all non-stack
-+     symbols, not just exported ones.
-+
-+   You need modutils >= 2.3.11 and a kernel with the kallsyms patch
-+   which was compiled with CONFIG_KALLSYMS.
-+ */
-+
-+#include <linux/elf.h>
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/kallsyms.h>
-+
-+/* These external symbols are only set on kernels compiled with
-+ * CONFIG_KALLSYMS.
-+ */
-+
-+extern const char __start___kallsyms[];
-+extern const char __stop___kallsyms[];
-+
-+static struct module **kallsyms_module_list;
-+
-+static void kallsyms_get_module_list(void)
-+{
-+      const struct kallsyms_header    *ka_hdr;
-+      const struct kallsyms_section   *ka_sec;
-+      const struct kallsyms_symbol    *ka_sym;
-+      const char                      *ka_str;
-+      int i;
-+      const char *p;
-+
-+      if (__start___kallsyms >= __stop___kallsyms)
-+              return;
-+      ka_hdr = (struct kallsyms_header *)__start___kallsyms;
-+      ka_sec = (struct kallsyms_section *)
-+              ((char *)(ka_hdr) + ka_hdr->section_off);
-+      ka_sym = (struct kallsyms_symbol *)
-+              ((char *)(ka_hdr) + ka_hdr->symbol_off);
-+      ka_str = 
-+              ((char *)(ka_hdr) + ka_hdr->string_off);
-+
-+      for (i = 0; i < ka_hdr->symbols; kallsyms_next_sym(ka_hdr, ka_sym), ++i) {
-+              p = ka_str + ka_sym->name_off;
-+              if (strcmp(p, "module_list") == 0) {
-+                      if (ka_sym->symbol_addr)
-+                              kallsyms_module_list = (struct module **)(ka_sym->symbol_addr);
-+                      break;
-+              }
-+      }
-+}
-+
-+static inline void kallsyms_do_first_time(void)
-+{
-+      static int first_time = 1;
-+      if (first_time)
-+              kallsyms_get_module_list();
-+      first_time = 0;
-+}
-+
-+/* A symbol can appear in more than one module.  A token is used to
-+ * restart the scan at the next module, set the token to 0 for the
-+ * first scan of each symbol.
-+ */
-+
-+int kallsyms_symbol_to_address(
-+      const char       *name,         /* Name to lookup */
-+      unsigned long    *token,        /* Which module to start at */
-+      const char      **mod_name,     /* Set to module name */
-+      unsigned long    *mod_start,    /* Set to start address of module */
-+      unsigned long    *mod_end,      /* Set to end address of module */
-+      const char      **sec_name,     /* Set to section name */
-+      unsigned long    *sec_start,    /* Set to start address of section */
-+      unsigned long    *sec_end,      /* Set to end address of section */
-+      const char      **sym_name,     /* Set to full symbol name */
-+      unsigned long    *sym_start,    /* Set to start address of symbol */
-+      unsigned long    *sym_end       /* Set to end address of symbol */
-+      )
-+{
-+      const struct kallsyms_header    *ka_hdr = NULL; /* stupid gcc */
-+      const struct kallsyms_section   *ka_sec;
-+      const struct kallsyms_symbol    *ka_sym = NULL;
-+      const char                      *ka_str = NULL;
-+      const struct module *m;
-+      int i = 0, l;
-+      const char *p, *pt_R;
-+      char *p2;
-+
-+      kallsyms_do_first_time();
-+      if (!kallsyms_module_list)
-+              return(0);
-+
-+      /* Restart? */
-+      m = *kallsyms_module_list;
-+      if (token && *token) {
-+              for (; m; m = m->next)
-+                      if ((unsigned long)m == *token)
-+                              break;
-+              if (m)
-+                      m = m->next;
-+      }
-+
-+      for (; m; m = m->next) {
-+              if (!mod_member_present(m, kallsyms_start) || 
-+                  !mod_member_present(m, kallsyms_end) ||
-+                  m->kallsyms_start >= m->kallsyms_end)
-+                      continue;
-+              ka_hdr = (struct kallsyms_header *)m->kallsyms_start;
-+              ka_sym = (struct kallsyms_symbol *)
-+                      ((char *)(ka_hdr) + ka_hdr->symbol_off);
-+              ka_str = 
-+                      ((char *)(ka_hdr) + ka_hdr->string_off);
-+              for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) {
-+                      p = ka_str + ka_sym->name_off;
-+                      if (strcmp(p, name) == 0)
-+                              break;
-+                      /* Unversioned requests match versioned names */
-+                      if (!(pt_R = strstr(p, "_R")))
-+                              continue;
-+                      l = strlen(pt_R);
-+                      if (l < 10)
-+                              continue;       /* Not _R.*xxxxxxxx */
-+                      (void)simple_strtoul(pt_R+l-8, &p2, 16);
-+                      if (*p2)
-+                              continue;       /* Not _R.*xxxxxxxx */
-+                      if (strncmp(p, name, pt_R-p) == 0)
-+                              break;  /* Match with version */
-+              }
-+              if (i < ka_hdr->symbols)
-+                      break;
-+      }
-+
-+      if (token)
-+              *token = (unsigned long)m;
-+      if (!m)
-+              return(0);      /* not found */
-+
-+      ka_sec = (const struct kallsyms_section *)
-+              ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off);
-+      *mod_name = *(m->name) ? m->name : "kernel";
-+      *mod_start = ka_hdr->start;
-+      *mod_end = ka_hdr->end;
-+      *sec_name = ka_sec->name_off + ka_str;
-+      *sec_start = ka_sec->start;
-+      *sec_end = ka_sec->start + ka_sec->size;
-+      *sym_name = ka_sym->name_off + ka_str;
-+      *sym_start = ka_sym->symbol_addr;
-+      if (i < ka_hdr->symbols-1) {
-+              const struct kallsyms_symbol *ka_symn = ka_sym;
-+              kallsyms_next_sym(ka_hdr, ka_symn);
-+              *sym_end = ka_symn->symbol_addr;
-+      }
-+      else
-+              *sym_end = *sec_end;
-+      return(1);
-+}
-+
-+int kallsyms_address_to_symbol(
-+      unsigned long     address,      /* Address to lookup */
-+      const char      **mod_name,     /* Set to module name */
-+      unsigned long    *mod_start,    /* Set to start address of module */
-+      unsigned long    *mod_end,      /* Set to end address of module */
-+      const char      **sec_name,     /* Set to section name */
-+      unsigned long    *sec_start,    /* Set to start address of section */
-+      unsigned long    *sec_end,      /* Set to end address of section */
-+      const char      **sym_name,     /* Set to full symbol name */
-+      unsigned long    *sym_start,    /* Set to start address of symbol */
-+      unsigned long    *sym_end       /* Set to end address of symbol */
-+      )
-+{
-+      const struct kallsyms_header    *ka_hdr = NULL; /* stupid gcc */
-+      const struct kallsyms_section   *ka_sec = NULL;
-+      const struct kallsyms_symbol    *ka_sym;
-+      const char                      *ka_str;
-+      const struct module *m;
-+      int i;
-+      unsigned long end;
-+
-+      kallsyms_do_first_time();
-+      if (!kallsyms_module_list)
-+              return(0);
-+
-+      for (m = *kallsyms_module_list; m; m = m->next) {
-+              if (!mod_member_present(m, kallsyms_start) || 
-+                  !mod_member_present(m, kallsyms_end) ||
-+                  m->kallsyms_start >= m->kallsyms_end)
-+                      continue;
-+              ka_hdr = (struct kallsyms_header *)m->kallsyms_start;
-+              ka_sec = (const struct kallsyms_section *)
-+                      ((char *)ka_hdr + ka_hdr->section_off);
-+              /* Is the address in any section in this module? */
-+              for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) {
-+                      if (ka_sec->start <= address &&
-+                          (ka_sec->start + ka_sec->size) > address)
-+                              break;
-+              }
-+              if (i < ka_hdr->sections)
-+                      break;  /* Found a matching section */
-+      }
-+
-+      if (!m)
-+              return(0);      /* not found */
-+
-+      ka_sym = (struct kallsyms_symbol *)
-+              ((char *)(ka_hdr) + ka_hdr->symbol_off);
-+      ka_str = 
-+              ((char *)(ka_hdr) + ka_hdr->string_off);
-+      *mod_name = *(m->name) ? m->name : "kernel";
-+      *mod_start = ka_hdr->start;
-+      *mod_end = ka_hdr->end;
-+      *sec_name = ka_sec->name_off + ka_str;
-+      *sec_start = ka_sec->start;
-+      *sec_end = ka_sec->start + ka_sec->size;
-+      *sym_name = *sec_name;          /* In case we find no matching symbol */
-+      *sym_start = *sec_start;
-+      *sym_end = *sec_end;
-+
-+      for (i = 0; i < ka_hdr->symbols; ++i, kallsyms_next_sym(ka_hdr, ka_sym)) {
-+              if (ka_sym->symbol_addr > address)
-+                      continue;
-+              if (i < ka_hdr->symbols-1) {
-+                      const struct kallsyms_symbol *ka_symn = ka_sym;
-+                      kallsyms_next_sym(ka_hdr, ka_symn);
-+                      end = ka_symn->symbol_addr;
-+              }
-+              else
-+                      end = *sec_end;
-+              if (end <= address)
-+                      continue;
-+              if ((char *)ka_hdr + ka_hdr->section_off + ka_sym->section_off
-+                  != (char *)ka_sec)
-+                      continue;       /* wrong section */
-+              *sym_name = ka_str + ka_sym->name_off;
-+              *sym_start = ka_sym->symbol_addr;
-+              *sym_end = end;
-+              break;
-+      }
-+      return(1);
-+}
-+
-+/* List all sections in all modules.  The callback routine is invoked with
-+ * token, module name, section name, section start, section end, section flags.
-+ */
-+int kallsyms_sections(void *token,
-+                    int (*callback)(void *, const char *, const char *, ElfW(Addr), ElfW(Addr), ElfW(Word)))
-+{
-+      const struct kallsyms_header    *ka_hdr = NULL; /* stupid gcc */
-+      const struct kallsyms_section   *ka_sec = NULL;
-+      const char                      *ka_str;
-+      const struct module *m;
-+      int i;
-+
-+      kallsyms_do_first_time();
-+      if (!kallsyms_module_list)
-+              return(0);
-+
-+      for (m = *kallsyms_module_list; m; m = m->next) {
-+              if (!mod_member_present(m, kallsyms_start) || 
-+                  !mod_member_present(m, kallsyms_end) ||
-+                  m->kallsyms_start >= m->kallsyms_end)
-+                      continue;
-+              ka_hdr = (struct kallsyms_header *)m->kallsyms_start;
-+              ka_sec = (const struct kallsyms_section *) ((char *)ka_hdr + ka_hdr->section_off);
-+              ka_str = ((char *)(ka_hdr) + ka_hdr->string_off);
-+              for (i = 0; i < ka_hdr->sections; ++i, kallsyms_next_sec(ka_hdr, ka_sec)) {
-+                      if (callback(
-+                              token,
-+                              *(m->name) ? m->name : "kernel",
-+                              ka_sec->name_off + ka_str,
-+                              ka_sec->start,
-+                              ka_sec->start + ka_sec->size,
-+                              ka_sec->flags))
-+                              return(0);
-+              }
-+      }
-+      return(1);
-+}
-Index: linux-bgl/include/linux/kallsyms.h
-===================================================================
---- linux-bgl.orig/include/linux/kallsyms.h    2004-10-26 17:10:51.404753448 -0700
-+++ linux-bgl/include/linux/kallsyms.h 2004-10-26 22:52:50.045676708 -0700
-@@ -0,0 +1,141 @@
-+/* kallsyms headers
-+   Copyright 2000 Keith Owens <kaos@ocs.com.au>
-+
-+   This file is part of the Linux modutils.  It is exported to kernel
-+   space so debuggers can access the kallsyms data.
-+
-+   The kallsyms data contains all the non-stack symbols from a kernel
-+   or a module.  The kernel symbols are held between __start___kallsyms
-+   and __stop___kallsyms.  The symbols for a module are accessed via
-+   the struct module chain which is based at module_list.
-+
-+   This program is free software; you can redistribute it and/or modify it
-+   under the terms of the GNU General Public License as published by the
-+   Free Software Foundation; either version 2 of the License, or (at your
-+   option) any later version.
-+
-+   This program is distributed in the hope that it will be useful, but
-+   WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   General Public License for more details.
-+
-+   You should have received a copy of the GNU General Public License
-+   along with this program; if not, write to the Free Software Foundation,
-+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+ */
-+
-+#ident "$Id: kallsyms-2.4-bgl.patch,v 1.1.20.2 2005/04/01 21:30:19 green Exp $"
-+
-+#ifndef MODUTILS_KALLSYMS_H
-+#define MODUTILS_KALLSYMS_H 1
-+
-+/* Have to (re)define these ElfW entries here because external kallsyms
-+ * code does not have access to modutils/include/obj.h.  This code is
-+ * included from user spaces tools (modutils) and kernel, they need
-+ * different includes.
-+ */
-+
-+#ifndef ELFCLASS32
-+#ifdef __KERNEL__
-+#include <linux/elf.h>
-+#else /* __KERNEL__ */
-+#include <elf.h>
-+#endif        /* __KERNEL__ */
-+#endif        /* ELFCLASS32 */
-+
-+#ifndef ELFCLASSM
-+#define ELFCLASSM ELF_CLASS
-+#endif
-+
-+#ifndef ElfW
-+# if ELFCLASSM == ELFCLASS32
-+#  define ElfW(x)  Elf32_ ## x
-+#  define ELFW(x)  ELF32_ ## x
-+# else
-+#  define ElfW(x)  Elf64_ ## x
-+#  define ELFW(x)  ELF64_ ## x
-+# endif
-+#endif
-+
-+/* Format of data in the kallsyms section.
-+ * Most of the fields are small numbers but the total size and all
-+ * offsets can be large so use the 32/64 bit types for these fields.
-+ *
-+ * Do not use sizeof() on these structures, modutils may be using extra
-+ * fields.  Instead use the size fields in the header to access the
-+ * other bits of data.
-+ */  
-+
-+struct kallsyms_header {
-+      int             size;           /* Size of this header */
-+      ElfW(Word)      total_size;     /* Total size of kallsyms data */
-+      int             sections;       /* Number of section entries */
-+      ElfW(Off)       section_off;    /* Offset to first section entry */
-+      int             section_size;   /* Size of one section entry */
-+      int             symbols;        /* Number of symbol entries */
-+      ElfW(Off)       symbol_off;     /* Offset to first symbol entry */
-+      int             symbol_size;    /* Size of one symbol entry */
-+      ElfW(Off)       string_off;     /* Offset to first string */
-+      ElfW(Addr)      start;          /* Start address of first section */
-+      ElfW(Addr)      end;            /* End address of last section */
-+};
-+
-+struct kallsyms_section {
-+      ElfW(Addr)      start;          /* Start address of section */
-+      ElfW(Word)      size;           /* Size of this section */
-+      ElfW(Off)       name_off;       /* Offset to section name */
-+      ElfW(Word)      flags;          /* Flags from section */
-+};
-+
-+struct kallsyms_symbol {
-+      ElfW(Off)       section_off;    /* Offset to section that owns this symbol */
-+      ElfW(Addr)      symbol_addr;    /* Address of symbol */
-+      ElfW(Off)       name_off;       /* Offset to symbol name */
-+};
-+
-+#define KALLSYMS_SEC_NAME "__kallsyms"
-+#define KALLSYMS_IDX 2                        /* obj_kallsyms creates kallsyms as section 2 */
-+
-+#define kallsyms_next_sec(h,s) \
-+      ((s) = (struct kallsyms_section *)((char *)(s) + (h)->section_size))
-+#define kallsyms_next_sym(h,s) \
-+      ((s) = (struct kallsyms_symbol *)((char *)(s) + (h)->symbol_size))
-+
-+int kallsyms_symbol_to_address(
-+      const char       *name,                 /* Name to lookup */
-+      unsigned long    *token,                /* Which module to start with */
-+      const char      **mod_name,             /* Set to module name or "kernel" */
-+      unsigned long    *mod_start,            /* Set to start address of module */
-+      unsigned long    *mod_end,              /* Set to end address of module */
-+      const char      **sec_name,             /* Set to section name */
-+      unsigned long    *sec_start,            /* Set to start address of section */
-+      unsigned long    *sec_end,              /* Set to end address of section */
-+      const char      **sym_name,             /* Set to full symbol name */
-+      unsigned long    *sym_start,            /* Set to start address of symbol */
-+      unsigned long    *sym_end               /* Set to end address of symbol */
-+      );
-+
-+int kallsyms_address_to_symbol(
-+      unsigned long     address,              /* Address to lookup */
-+      const char      **mod_name,             /* Set to module name */
-+      unsigned long    *mod_start,            /* Set to start address of module */
-+      unsigned long    *mod_end,              /* Set to end address of module */
-+      const char      **sec_name,             /* Set to section name */
-+      unsigned long    *sec_start,            /* Set to start address of section */
-+      unsigned long    *sec_end,              /* Set to end address of section */
-+      const char      **sym_name,             /* Set to full symbol name */
-+      unsigned long    *sym_start,            /* Set to start address of symbol */
-+      unsigned long    *sym_end               /* Set to end address of symbol */
-+      );
-+
-+int kallsyms_sections(void *token,
-+                    int (*callback)(void *,   /* token */
-+                      const char *,           /* module name */
-+                      const char *,           /* section name */
-+                      ElfW(Addr),             /* Section start */
-+                      ElfW(Addr),             /* Section end */
-+                      ElfW(Word)              /* Section flags */
-+                    )
-+              );
-+
-+#endif /* kallsyms.h */
diff --git a/lustre/kernel_patches/patches/kksymoops-2.4-bgl.patch b/lustre/kernel_patches/patches/kksymoops-2.4-bgl.patch
deleted file mode 100644 (file)
index 9d33973..0000000
+++ /dev/null
@@ -1,678 +0,0 @@
-Index: linux-bgl/arch/i386/kernel/traps.c
-===================================================================
---- linux-bgl.orig/arch/i386/kernel/traps.c    2003-07-02 08:43:23.000000000 -0700
-+++ linux-bgl/arch/i386/kernel/traps.c 2004-10-26 23:25:17.950442396 -0700
-@@ -24,6 +24,7 @@
- #include <linux/spinlock.h>
- #include <linux/interrupt.h>
- #include <linux/highmem.h>
-+#include <linux/version.h>
- #ifdef CONFIG_MCA
- #include <linux/mca.h>
-@@ -135,6 +136,8 @@
- {
-       int i;
-       unsigned long addr;
-+      /* static to not take up stackspace; if we race here too bad */
-+      static char buffer[512];
-       if (!stack)
-               stack = (unsigned long*)&stack;
-@@ -144,9 +147,8 @@
-       while (((long) stack & (THREAD_SIZE-1)) != 0) {
-               addr = *stack++;
-               if (kernel_text_address(addr)) {
--                      if (i && ((i % 6) == 0))
--                              printk("\n ");
--                      printk(" [<%08lx>]", addr);
-+                      lookup_symbol(addr, buffer, 512);
-+                      printk("[<%08lx>] %s (0x%p)\n", addr,buffer,stack-1);
-                       i++;
-               }
-       }
-@@ -186,12 +188,19 @@
-       show_trace(esp);
- }
-+#ifdef CONFIG_MK7
-+#define ARCHIT "/athlon"
-+#else
-+#define ARCHIT "/i686"
-+#endif
-+
- void show_registers(struct pt_regs *regs)
- {
-       int i;
-       int in_kernel = 1;
-       unsigned long esp;
-       unsigned short ss;
-+      static char buffer[512];
-       esp = (unsigned long) (&regs->esp);
-       ss = __KERNEL_DS;
-@@ -200,8 +209,12 @@
-               esp = regs->esp;
-               ss = regs->xss & 0xffff;
-       }
-+
-+      print_modules();
-+      lookup_symbol(regs->eip, buffer, 512);
-       printk("CPU:    %d\nEIP:    %04x:[<%08lx>]    %s\nEFLAGS: %08lx\n",
-               smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags);
-+      printk("\nEIP is at %s (" UTS_RELEASE ARCHIT ")\n",buffer);
-       printk("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
-               regs->eax, regs->ebx, regs->ecx, regs->edx);
-       printk("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
-@@ -261,7 +274,7 @@
-       if (__get_user(file, (char **)(eip + 4)) ||
-               (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
-               file = "<bad filename>";
--
-+      printk("------------[ cut here ]------------\n");
-       printk("kernel BUG at %s:%d!\n", file, line);
- no_bug:
-Index: linux-bgl/arch/i386/kernel/process.c
-===================================================================
---- linux-bgl.orig/arch/i386/kernel/process.c  2003-07-02 08:44:07.000000000 -0700
-+++ linux-bgl/arch/i386/kernel/process.c       2004-10-26 23:28:53.017015082 -0700
-@@ -33,6 +33,7 @@
- #include <linux/reboot.h>
- #include <linux/init.h>
- #include <linux/mc146818rtc.h>
-+#include <linux/version.h>
- #include <asm/uaccess.h>
- #include <asm/pgtable.h>
-@@ -437,10 +438,14 @@
- void show_regs(struct pt_regs * regs)
- {
-       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
-+      static char buffer[512];
-+
-+      lookup_symbol(regs->eip, buffer, 512);
-       printk("\n");
-       printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-       printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs,regs->eip, smp_processor_id());
-+      printk("\nEIP is at %s (" UTS_RELEASE ")\n", buffer);
-       if (regs->xcs & 3)
-               printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-       printk(" EFLAGS: %08lx    %s\n",regs->eflags, print_tainted());
-Index: linux-bgl/arch/ia64/kernel/process.c
-===================================================================
---- linux-bgl.orig/arch/ia64/kernel/process.c  2003-07-02 08:43:26.000000000 -0700
-+++ linux-bgl/arch/ia64/kernel/process.c       2004-10-26 23:29:56.340005959 -0700
-@@ -18,6 +18,7 @@
- #include <linux/smp_lock.h>
- #include <linux/stddef.h>
- #include <linux/unistd.h>
-+#include <linux/version.h>
- #include <asm/delay.h>
- #include <asm/efi.h>
-@@ -33,9 +34,10 @@
- #include <asm/sn/idle.h>
- #endif
--static void
--do_show_stack (struct unw_frame_info *info, void *arg)
-+void
-+ia64_do_show_stack (struct unw_frame_info *info, void *arg)
- {
-+      static char buffer[512];
-       unsigned long ip, sp, bsp;
-       printk("\nCall Trace: ");
-@@ -46,7 +48,8 @@
-               unw_get_sp(info, &sp);
-               unw_get_bsp(info, &bsp);
--              printk("[<%016lx>] sp=0x%016lx bsp=0x%016lx\n", ip, sp, bsp);
-+              lookup_symbol(ip, buffer, 512);
-+              printk("[<%016lx>] sp=0x%016lx bsp=0x%016lx %s\n", ip, sp, bsp, buffer);
-       } while (unw_unwind(info) >= 0);
- }
-@@ -56,19 +59,19 @@
-       struct unw_frame_info info;
-       unw_init_from_blocked_task(&info, task);
--      do_show_stack(&info, 0);
-+      ia64_do_show_stack(&info, 0);
- }
- void
- show_stack (struct task_struct *task)
- {
-       if (!task)
--              unw_init_running(do_show_stack, 0);
-+              unw_init_running(ia64_do_show_stack, 0);
-       else {
-               struct unw_frame_info info;
-               unw_init_from_blocked_task(&info, task);
--              do_show_stack(&info, 0);
-+              ia64_do_show_stack(&info, 0);
-       }
- }
-@@ -76,8 +79,11 @@
- show_regs (struct pt_regs *regs)
- {
-       unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-+      static char buffer[512];
-       printk("\nPid: %d, comm: %20s\n", current->pid, current->comm);
-+      lookup_symbol(ip, buffer, 512);
-+      printk("EIP is at %s (" UTS_RELEASE ")\n", buffer);
-       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
-              regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
-       printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-Index: linux-bgl/arch/s390/config.in
-===================================================================
---- linux-bgl.orig/arch/s390/config.in 2003-07-02 08:43:27.000000000 -0700
-+++ linux-bgl/arch/s390/config.in      2004-10-26 23:25:17.961440685 -0700
-@@ -73,5 +73,6 @@
- #  bool 'Remote GDB kernel debugging' CONFIG_REMOTE_DEBUG
- #fi
- bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
-+bool 'Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS
- endmenu
-Index: linux-bgl/arch/s390/kernel/traps.c
-===================================================================
---- linux-bgl.orig/arch/s390/kernel/traps.c    2003-07-02 08:44:02.000000000 -0700
-+++ linux-bgl/arch/s390/kernel/traps.c 2004-10-26 23:25:17.964440218 -0700
-@@ -27,6 +27,7 @@
- #include <linux/init.h>
- #include <linux/delay.h>
- #include <linux/module.h>
-+#include <linux/version.h>
- #include <asm/system.h>
- #include <asm/uaccess.h>
-@@ -108,27 +109,26 @@
- void show_trace(unsigned long * stack)
- {
-+      static char buffer[512];
-       unsigned long backchain, low_addr, high_addr, ret_addr;
-       int i;
-       if (!stack)
-               stack = (unsigned long*)&stack;
--      printk("Call Trace: ");
-       low_addr = ((unsigned long) stack) & PSW_ADDR_MASK;
-       high_addr = (low_addr & (-THREAD_SIZE)) + THREAD_SIZE;
-       /* Skip the first frame (biased stack) */
-       backchain = *((unsigned long *) low_addr) & PSW_ADDR_MASK;
--      /* Print up to 8 lines */
--      for (i = 0; i < 8; i++) {
-+      /* Print up to 20 lines */
-+      for (i = 0; i < 20; i++) {
-               if (backchain < low_addr || backchain >= high_addr)
-                       break;
-               ret_addr = *((unsigned long *) (backchain+56)) & PSW_ADDR_MASK;
-               if (!kernel_text_address(ret_addr))
-                       break;
--              if (i && ((i % 6) == 0))
--                      printk("\n   ");
--              printk("[<%08lx>] ", ret_addr);
-+              lookup_symbol(ret_addr, buffer, 512);
-+              printk("[<%08lx>] %s (0x%lx)\n", ret_addr,buffer,backchain+56);
-               low_addr = backchain;
-               backchain = *((unsigned long *) backchain) & PSW_ADDR_MASK;
-       }
-@@ -171,6 +171,7 @@
- void show_registers(struct pt_regs *regs)
- {
-+      static char buffer[512];
-       mm_segment_t old_fs;
-       char *mode;
-       int i;
-@@ -179,6 +180,10 @@
-       printk("%s PSW : %08lx %08lx\n",
-              mode, (unsigned long) regs->psw.mask,
-              (unsigned long) regs->psw.addr);
-+      if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
-+              lookup_symbol(regs->psw.addr & 0x7FFFFFFF, buffer, 512);
-+              printk("           %s (" UTS_RELEASE ")\n", buffer);
-+      }
-       printk("%s GPRS: %08x %08x %08x %08x\n", mode,
-              regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
-       printk("           %08x %08x %08x %08x\n",
-Index: linux-bgl/arch/s390x/config.in
-===================================================================
---- linux-bgl.orig/arch/s390x/config.in        2003-07-02 08:43:07.000000000 -0700
-+++ linux-bgl/arch/s390x/config.in     2004-10-26 23:25:17.964440218 -0700
-@@ -75,5 +75,6 @@
- #  bool 'Remote GDB kernel debugging' CONFIG_REMOTE_DEBUG
- #fi
- bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
-+bool 'Load all symbols for debugging/kksymoops' CONFIG_KALLSYMS
- endmenu
-Index: linux-bgl/arch/s390x/kernel/traps.c
-===================================================================
---- linux-bgl.orig/arch/s390x/kernel/traps.c   2003-07-02 08:43:25.000000000 -0700
-+++ linux-bgl/arch/s390x/kernel/traps.c        2004-10-26 23:25:17.966439907 -0700
-@@ -27,6 +27,7 @@
- #include <linux/init.h>
- #include <linux/delay.h>
- #include <linux/module.h>
-+#include <linux/version.h>
- #include <asm/system.h>
- #include <asm/uaccess.h>
-@@ -112,25 +113,25 @@
- {
-       unsigned long backchain, low_addr, high_addr, ret_addr;
-       int i;
-+      /* static to not take up stackspace; if we race here too bad */
-+      static char buffer[512];
-       if (!stack)
-               stack = (unsigned long*)&stack;
--      printk("Call Trace: ");
-       low_addr = ((unsigned long) stack) & PSW_ADDR_MASK;
-       high_addr = (low_addr & (-THREAD_SIZE)) + THREAD_SIZE;
-       /* Skip the first frame (biased stack) */
-       backchain = *((unsigned long *) low_addr) & PSW_ADDR_MASK;
--      /* Print up to 8 lines */
--      for (i = 0; i < 8; i++) {
-+      /* Print up to 20 lines */
-+      for (i = 0; i < 20; i++) {
-               if (backchain < low_addr || backchain >= high_addr)
-                       break;
-               ret_addr = *((unsigned long *) (backchain+112)) & PSW_ADDR_MASK;
-               if (!kernel_text_address(ret_addr))
-                       break;
--              if (i && ((i % 3) == 0))
--                      printk("\n   ");
--              printk("[<%016lx>] ", ret_addr);
-+              lookup_symbol(ret_addr, buffer, 512);
-+              printk("[<%016lx>] %s (0x%lx)\n", ret_addr, buffer, backchain+112);
-               low_addr = backchain;
-               backchain = *((unsigned long *) backchain) & PSW_ADDR_MASK;
-       }
-@@ -173,6 +174,7 @@
- void show_registers(struct pt_regs *regs)
- {
-+      static char buffer[512];
-       mm_segment_t old_fs;
-       char *mode;
-       int i;
-@@ -181,6 +183,10 @@
-       printk("%s PSW : %016lx %016lx\n",
-              mode, (unsigned long) regs->psw.mask,
-              (unsigned long) regs->psw.addr);
-+      if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
-+              lookup_symbol(regs->psw.addr, buffer, 512);
-+              printk("           %s (" UTS_RELEASE ")\n", buffer);
-+      }
-       printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
-              regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
-       printk("           %016lx %016lx %016lx %016lx\n",
-Index: linux-bgl/arch/ppc64/mm/fault.c
-===================================================================
---- linux-bgl.orig/arch/ppc64/mm/fault.c       2003-07-02 08:43:12.000000000 -0700
-+++ linux-bgl/arch/ppc64/mm/fault.c    2004-10-26 23:30:24.467942247 -0700
-@@ -224,7 +224,6 @@
-       if (debugger_kernel_faults)
-               debugger(regs);
- #endif
--      print_backtrace( (unsigned long *)regs->gpr[1] );
-       panic("kernel access of bad area pc %lx lr %lx address %lX tsk %s/%d",
-             regs->nip,regs->link,address,current->comm,current->pid);
- }
-Index: linux-bgl/arch/ppc64/kernel/traps.c
-===================================================================
---- linux-bgl.orig/arch/ppc64/kernel/traps.c   2003-07-02 08:44:03.000000000 -0700
-+++ linux-bgl/arch/ppc64/kernel/traps.c        2004-10-26 23:33:45.297572484 -0700
-@@ -89,7 +89,6 @@
- #if defined(CONFIG_KDB)
-               kdb(KDB_REASON_OOPS, 0, (kdb_eframe_t) regs);
- #endif
--              print_backtrace((unsigned long *)regs->gpr[1]);
-               panic("Exception in kernel pc %lx signal %d",regs->nip,signr);
- #if defined(CONFIG_PPCDBG) && (defined(CONFIG_XMON) || defined(CONFIG_KGDB))
-       /* Allow us to catch SIGILLs for 64-bit app/glibc debugging. -Peter */
-@@ -187,7 +186,6 @@
-               if (kdb(KDB_REASON_FAULT, 0, regs))
-                       return ;
- #endif
--              print_backtrace((unsigned long *)regs->gpr[1]);
-               panic("machine check");
-       }
-       _exception(SIGSEGV, regs);      
-@@ -209,7 +207,6 @@
-       }
- #endif
-       show_regs(regs);
--      print_backtrace((unsigned long *)regs->gpr[1]);
-       panic("System Management Interrupt");
- }
-Index: linux-bgl/arch/ppc64/kernel/process.c
-===================================================================
---- linux-bgl.orig/arch/ppc64/kernel/process.c 2003-07-02 08:44:31.000000000 -0700
-+++ linux-bgl/arch/ppc64/kernel/process.c      2004-10-26 23:33:01.060713583 -0700
-@@ -30,6 +30,8 @@
- #include <linux/user.h>
- #include <linux/elf.h>
- #include <linux/init.h>
-+#include <linux/version.h>
-+#include <linux/module.h>
- #include <asm/pgtable.h>
- #include <asm/uaccess.h>
-@@ -130,12 +132,61 @@
-       __restore_flags(s);
- }
-+/*
-+ * If the address is either in the .text section of the
-+ * kernel, or in the vmalloc'ed module regions, it *may* 
-+ * be the address of a calling routine
-+ */
-+
-+#ifdef CONFIG_MODULES
-+
-+extern struct module *module_list;
-+extern struct module kernel_module;
-+extern char _stext[], _etext[];
-+
-+static inline int kernel_text_address(unsigned long addr)
-+{
-+      int retval = 0;
-+      struct module *mod;
-+
-+      if (addr >= (unsigned long) &_stext &&
-+          addr <= (unsigned long) &_etext)
-+              return 1;
-+
-+      for (mod = module_list; mod != &kernel_module; mod = mod->next) {
-+              /* mod_bound tests for addr being inside the vmalloc'ed
-+               * module area. Of course it'd be better to test only
-+               * for the .text subset... */
-+              if (mod_bound(addr, 0, mod)) {
-+                      retval = 1;
-+                      break;
-+              }
-+      }
-+
-+      return retval;
-+}
-+
-+#else
-+
-+static inline int kernel_text_address(unsigned long addr)
-+{
-+      return (addr >= (unsigned long) &_stext &&
-+              addr <= (unsigned long) &_etext);
-+}
-+
-+#endif
-+
-+
- void show_regs(struct pt_regs * regs)
- {
-       int i;
-+      static char buffer[512];
--      printk("NIP: %016lX XER: %016lX LR: %016lX REGS: %p TRAP: %04lx    %s\n",
-+      print_modules();
-+      printk("NIP: %016lx XER: %016lx LR: %016lx REGS: %p TRAP: %04lx    %s\n",
-              regs->nip, regs->xer, regs->link, regs,regs->trap, print_tainted());
-+      lookup_symbol(regs->nip, buffer, 512);
-+      printk("NIP is at %s (" UTS_RELEASE ")\n", buffer);
-       printk("MSR: %016lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
-              regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
-              regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
-@@ -147,27 +198,22 @@
-       printk("\nlast math %p ", last_task_used_math);
-       
- #ifdef CONFIG_SMP
--      /* printk(" CPU: %d last CPU: %d", current->processor,current->last_processor); */
-+      printk("CPU: %d", smp_processor_id());
- #endif /* CONFIG_SMP */
-       
--      printk("\n");
-       for (i = 0;  i < 32;  i++)
-       {
-               long r;
-               if ((i % 4) == 0)
--              {
--                      printk("GPR%02d: ", i);
--              }
-+                      printk("\nGPR%02d: ", i);
-               if ( __get_user(r, &(regs->gpr[i])) )
-                   return;
--              printk("%016lX ", r);
--              if ((i % 4) == 3)
--              {
--                      printk("\n");
--              }
-+              printk("%016lx ", r);
-       }
-+      printk("\n");
-+      print_backtrace((unsigned long *)regs->gpr[1]);
- }
- void exit_thread(void)
-@@ -415,67 +461,24 @@
-       }
- }
--extern char _stext[], _etext[];
--
--char * ppc_find_proc_name( unsigned * p, char * buf, unsigned buflen )
--{
--      unsigned long tb_flags;
--      unsigned short name_len;
--      unsigned long tb_start, code_start, code_ptr, code_offset;
--      unsigned code_len;
--      strcpy( buf, "Unknown" );
--      code_ptr = (unsigned long)p;
--      code_offset = 0;
--      if ( ( (unsigned long)p >= (unsigned long)_stext ) && ( (unsigned long)p <= (unsigned long)_etext ) ) {
--              while ( (unsigned long)p <= (unsigned long)_etext ) {
--                      if ( *p == 0 ) {
--                              tb_start = (unsigned long)p;
--                              ++p;    /* Point to traceback flags */
--                              tb_flags = *((unsigned long *)p);
--                              p += 2; /* Skip over traceback flags */
--                              if ( tb_flags & TB_NAME_PRESENT ) {
--                                      if ( tb_flags & TB_PARMINFO )
--                                              ++p;    /* skip over parminfo data */
--                                      if ( tb_flags & TB_HAS_TBOFF ) {
--                                              code_len = *p;  /* get code length */
--                                              code_start = tb_start - code_len;
--                                              code_offset = code_ptr - code_start + 1;
--                                              if ( code_offset > 0x100000 )
--                                                      break;
--                                              ++p;            /* skip over code size */
--                                      }
--                                      name_len = *((unsigned short *)p);
--                                      if ( name_len > (buflen-20) )
--                                              name_len = buflen-20;
--                                      memcpy( buf, ((char *)p)+2, name_len );
--                                      buf[name_len] = 0;
--                                      if ( code_offset )
--                                              sprintf( buf+name_len, "+0x%lx", code_offset-1 ); 
--                              }
--                              break;
--                      }
--                      ++p;
--              }
--      }
--      return buf;
--}
--
- void
- print_backtrace(unsigned long *sp)
- {
-       int cnt = 0;
-       unsigned long i;
--      char name_buf[256];
-+      char buffer[512];
--      printk("Call backtrace: \n");
-+      printk("Call Trace: \n");
-       while (sp) {
-               if (__get_user( i, &sp[2] ))
-                       break;
--              printk("%016lX ", i);
--              printk("%s\n", ppc_find_proc_name( (unsigned *)i, name_buf, 256 ));
-+              if (kernel_text_address(i)) {
-+                      if (__get_user(sp, (unsigned long **)sp))
-+                              break;
-+                      lookup_symbol(i, buffer, 512);
-+                      printk("[<%016lx>] %s\n", i, buffer);
-+              }
-               if (cnt > 32) break;
--              if (__get_user(sp, (unsigned long **)sp))
--                      break;
-       }
-       printk("\n");
- }
-@@ -515,6 +518,7 @@
-       unsigned long ip, sp;
-       unsigned long stack_page = (unsigned long)p;
-       int count = 0;
-+      static char buffer[512];
-       if (!p)
-               return;
-@@ -528,7 +532,8 @@
-                       break;
-               if (count > 0) {
-                       ip = *(unsigned long *)(sp + 16);
--                      printk("[%016lx] ", ip);
-+                      lookup_symbol(ip, buffer, 512);
-+                      printk("[<%016lx>] %s\n", ip, buffer);
-               }
-       } while (count++ < 16);
-       printk("\n");
-Index: linux-bgl/kernel/Makefile
-===================================================================
---- linux-bgl.orig/kernel/Makefile     2004-10-26 23:23:00.516655289 -0700
-+++ linux-bgl/kernel/Makefile  2004-10-26 23:35:04.930451186 -0700
-@@ -14,7 +14,7 @@
- obj-y     = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
-           module.o exit.o itimer.o info.o time.o softirq.o resource.o \
-           sysctl.o acct.o capability.o ptrace.o timer.o user.o \
--          signal.o sys.o kmod.o context.o
-+          signal.o sys.o kmod.o context.o kksymoops.o
- obj-$(CONFIG_UID16) += uid16.o
- obj-$(CONFIG_MODULES) += ksyms.o
-Index: linux-bgl/kernel/kksymoops.c
-===================================================================
---- linux-bgl.orig/kernel/kksymoops.c  2004-10-26 17:10:51.404753448 -0700
-+++ linux-bgl/kernel/kksymoops.c       2004-10-26 23:25:17.971439129 -0700
-@@ -0,0 +1,82 @@
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/errno.h>
-+#include <linux/kernel.h>
-+#include <linux/config.h>
-+#ifdef CONFIG_KALLSYMS
-+#include <linux/kallsyms.h>
-+#endif
-+
-+
-+
-+int lookup_symbol(unsigned long address, char *buffer, int buflen)
-+{
-+      struct module *this_mod;
-+      unsigned long bestsofar;
-+
-+      const char *mod_name = NULL, *sec_name = NULL, *sym_name = NULL;
-+      unsigned long mod_start,mod_end,sec_start,sec_end,sym_start,sym_end;
-+      
-+      if (!buffer)
-+              return -EFAULT;
-+      
-+      if (buflen<256)
-+              return -ENOMEM;
-+      
-+      memset(buffer,0,buflen);
-+
-+#ifdef CONFIG_KALLSYMS
-+      if (!kallsyms_address_to_symbol(address,&mod_name,&mod_start,&mod_end,&sec_name,
-+              &sec_start, &sec_end, &sym_name, &sym_start, &sym_end)) {
-+              /* kallsyms doesn't have a clue; lets try harder */
-+              bestsofar = 0;
-+              snprintf(buffer,buflen-1,"[unresolved]");
-+              
-+              this_mod = module_list;
-+
-+              while (this_mod != NULL) {
-+                      int i;
-+                      /* walk the symbol list of this module. Only symbols
-+                         who's address is smaller than the searched for address
-+                         are relevant; and only if it's better than the best so far */
-+                      for (i=0; i< this_mod->nsyms; i++)
-+                              if ((this_mod->syms[i].value<=address) &&
-+                                      (bestsofar<this_mod->syms[i].value)) {
-+                                      snprintf(buffer,buflen-1,"%s [%s] 0x%x",
-+                                              this_mod->syms[i].name,
-+                                              this_mod->name,
-+                                              (unsigned int)(address - this_mod->syms[i].value));
-+                                      bestsofar = this_mod->syms[i].value;
-+                              }
-+                      this_mod = this_mod->next;
-+              }
-+
-+      } else { /* kallsyms success */
-+              snprintf(buffer,buflen-1,"%s [%s] 0x%x",sym_name,mod_name,(unsigned int)(address-sym_start));
-+      }
-+#endif
-+      return strlen(buffer);
-+}
-+
-+static char modlist[4096];
-+/* this function isn't smp safe but that's not really a problem; it's called from
-+ * oops context only and any locking could actually prevent the oops from going out;
-+ * the line that is generated is informational only and should NEVER prevent the real oops
-+ * from going out. 
-+ */
-+void print_modules(void)
-+{
-+      struct module *this_mod;
-+      int pos = 0, i;
-+      memset(modlist,0,4096);
-+
-+#ifdef CONFIG_KALLSYMS
-+      this_mod = module_list;
-+      while (this_mod != NULL) {
-+              if (this_mod->name != NULL)
-+                      pos +=snprintf(modlist+pos,160-pos-1,"%s ",this_mod->name);
-+              this_mod = this_mod->next;
-+      }
-+      printk("%s\n",modlist);
-+#endif
-+}
-Index: linux-bgl/include/linux/kernel.h
-===================================================================
---- linux-bgl.orig/include/linux/kernel.h      2003-07-02 08:44:16.000000000 -0700
-+++ linux-bgl/include/linux/kernel.h   2004-10-26 23:25:17.968439596 -0700
-@@ -107,6 +107,9 @@
- extern int tainted;
- extern const char *print_tainted(void);
-+extern int lookup_symbol(unsigned long address, char *buffer, int buflen);
-+extern void print_modules(void);
-+
- #if DEBUG
- #define pr_debug(fmt,arg...) \
-       printk(KERN_DEBUG fmt,##arg)
diff --git a/lustre/kernel_patches/patches/linux-2.4.18-netdump.patch b/lustre/kernel_patches/patches/linux-2.4.18-netdump.patch
deleted file mode 100644 (file)
index f8db708..0000000
+++ /dev/null
@@ -1,1842 +0,0 @@
-Index: linux-2.4.24/arch/i386/kernel/i386_ksyms.c
-===================================================================
---- linux-2.4.24.orig/arch/i386/kernel/i386_ksyms.c    2003-11-28 13:26:19.000000000 -0500
-+++ linux-2.4.24/arch/i386/kernel/i386_ksyms.c 2004-05-07 16:58:39.000000000 -0400
-@@ -186,3 +186,8 @@
- EXPORT_SYMBOL(edd);
- EXPORT_SYMBOL(eddnr);
- #endif
-+
-+EXPORT_SYMBOL_GPL(show_mem);
-+EXPORT_SYMBOL_GPL(show_state);
-+EXPORT_SYMBOL_GPL(show_regs);
-+
-Index: linux-2.4.24/arch/i386/kernel/process.c
-===================================================================
---- linux-2.4.24.orig/arch/i386/kernel/process.c       2003-11-28 13:26:19.000000000 -0500
-+++ linux-2.4.24/arch/i386/kernel/process.c    2004-05-07 17:08:18.000000000 -0400
-@@ -400,7 +400,8 @@
-        * Stop all CPUs and turn off local APICs and the IO-APIC, so
-        * other OSs see a clean IRQ state.
-        */
--      smp_send_stop();
-+      if (!netdump_func)
-+              smp_send_stop();
- #elif CONFIG_X86_LOCAL_APIC
-       if (cpu_has_apic) {
-               __cli();
-Index: linux-2.4.24/arch/i386/kernel/traps.c
-===================================================================
---- linux-2.4.24.orig/arch/i386/kernel/traps.c 2004-05-07 16:57:00.000000000 -0400
-+++ linux-2.4.24/arch/i386/kernel/traps.c      2004-05-07 17:09:17.000000000 -0400
-@@ -280,6 +280,9 @@
-       printk("Kernel BUG\n");
- }
-+void (*netdump_func) (struct pt_regs *regs) = NULL;
-+int netdump_mode = 0;
-+
- spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
- void die(const char * str, struct pt_regs * regs, long err)
-@@ -290,6 +293,8 @@
-       handle_BUG(regs);
-       printk("%s: %04lx\n", str, err & 0xffff);
-       show_registers(regs);
-+      if (netdump_func)
-+              netdump_func(regs);
-       bust_spinlocks(0);
-       spin_unlock_irq(&die_lock);
-       do_exit(SIGSEGV);
-@@ -1041,3 +1046,9 @@
- EXPORT_SYMBOL_GPL(is_kernel_text_address);
- EXPORT_SYMBOL_GPL(lookup_symbol);
-+
-+EXPORT_SYMBOL_GPL(netdump_func);
-+EXPORT_SYMBOL_GPL(netdump_mode);
-+#if CONFIG_X86_LOCAL_APIC
-+EXPORT_SYMBOL_GPL(nmi_watchdog);
-+#endif
-Index: linux-2.4.24/arch/x86_64/kernel/x8664_ksyms.c
-===================================================================
---- linux-2.4.24.orig/arch/x86_64/kernel/x8664_ksyms.c 2003-11-28 13:26:19.000000000 -0500
-+++ linux-2.4.24/arch/x86_64/kernel/x8664_ksyms.c      2004-05-07 17:01:51.000000000 -0400
-@@ -41,6 +41,9 @@
- EXPORT_SYMBOL(drive_info);
- #endif
-+//extern void (*netdump_func) (struct pt_regs *regs) = NULL;
-+int netdump_mode = 0;
-+
- /* platform dependent support */
- EXPORT_SYMBOL(boot_cpu_data);
- EXPORT_SYMBOL(dump_fpu);
-@@ -229,3 +232,6 @@
- EXPORT_SYMBOL(touch_nmi_watchdog);
- EXPORT_SYMBOL(do_fork);
-+
-+EXPORT_SYMBOL_GPL(netdump_func);
-+EXPORT_SYMBOL_GPL(netdump_mode);
-Index: linux-2.4.24/drivers/net/3c59x.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/3c59x.c      2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/3c59x.c   2004-05-07 17:01:00.000000000 -0400
-@@ -874,6 +874,7 @@
- static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
- static void vortex_tx_timeout(struct net_device *dev);
- static void acpi_set_WOL(struct net_device *dev);
-+static void vorboom_poll(struct net_device *dev);
- static struct ethtool_ops vortex_ethtool_ops;
\f
- /* This driver uses 'options' to pass the media type, full-duplex flag, etc. */
-@@ -1343,6 +1344,9 @@
-       dev->set_multicast_list = set_rx_mode;
-       dev->tx_timeout = vortex_tx_timeout;
-       dev->watchdog_timeo = (watchdog * HZ) / 1000;
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = &vorboom_poll;
-+#endif
-       if (pdev && vp->enable_wol) {
-               vp->pm_state_valid = 1;
-               pci_save_state(vp->pdev, vp->power_state);
-@@ -2322,6 +2326,29 @@
-       spin_unlock(&vp->lock);
- }
-+#ifdef HAVE_POLL_CONTROLLER
-+
-+/*
-+ * Polling 'interrupt' - used by things like netconsole to send skbs
-+ * without having to re-enable interrupts. It's not called while
-+ * the interrupt routine is executing.
-+ */
-+
-+static void vorboom_poll (struct net_device *dev)
-+{
-+      struct vortex_private *vp = (struct vortex_private *)dev->priv;
-+
-+      if (!netdump_mode) disable_irq(dev->irq);
-+      if (vp->full_bus_master_tx)
-+              boomerang_interrupt(dev->irq, dev, 0);
-+      else
-+              vortex_interrupt(dev->irq, dev, 0);
-+      if (!netdump_mode) enable_irq(dev->irq);
-+}
-+
-+#endif
-+
-+
- static int vortex_rx(struct net_device *dev)
- {
-       struct vortex_private *vp = (struct vortex_private *)dev->priv;
-Index: linux-2.4.24/drivers/net/Config.in
-===================================================================
---- linux-2.4.24.orig/drivers/net/Config.in    2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/Config.in 2004-05-07 16:58:39.000000000 -0400
-@@ -295,6 +295,8 @@
-    dep_tristate '  SysKonnect FDDI PCI support' CONFIG_SKFP $CONFIG_PCI
- fi
-+tristate 'Network logging support' CONFIG_NETCONSOLE
-+
- if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-    if [ "$CONFIG_INET" = "y" ]; then
-       bool 'HIPPI driver support (EXPERIMENTAL)' CONFIG_HIPPI
-Index: linux-2.4.24/drivers/net/eepro100.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/eepro100.c   2003-08-25 07:44:42.000000000 -0400
-+++ linux-2.4.24/drivers/net/eepro100.c        2004-05-07 16:58:39.000000000 -0400
-@@ -543,6 +543,7 @@
- static int speedo_rx(struct net_device *dev);
- static void speedo_tx_buffer_gc(struct net_device *dev);
- static void speedo_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
-+static void poll_speedo (struct net_device *dev);
- static int speedo_close(struct net_device *dev);
- static struct net_device_stats *speedo_get_stats(struct net_device *dev);
- static int speedo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-@@ -879,6 +880,9 @@
-       dev->get_stats = &speedo_get_stats;
-       dev->set_multicast_list = &set_rx_mode;
-       dev->do_ioctl = &speedo_ioctl;
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = &poll_speedo;
-+#endif
-       return 0;
- }
-@@ -1176,10 +1180,8 @@
- /* Media monitoring and control. */
--static void speedo_timer(unsigned long data)
-+static void speedo_timeout(struct net_device *dev, struct speedo_private *sp)
- {
--      struct net_device *dev = (struct net_device *)data;
--      struct speedo_private *sp = (struct speedo_private *)dev->priv;
-       long ioaddr = dev->base_addr;
-       int phy_num = sp->phy[0] & 0x1f;
-@@ -1217,6 +1219,15 @@
-                                  dev->name, sp->rx_mode, jiffies, sp->last_rx_time);
-               set_rx_mode(dev);
-       }
-+}
-+
-+static void speedo_timer(unsigned long data)
-+{
-+      struct net_device *dev = (struct net_device *)data;
-+      struct speedo_private *sp = (struct speedo_private *)dev->priv;
-+
-+      speedo_timeout(dev, sp);
-+
-       /* We must continue to monitor the media. */
-       sp->timer.expires = RUN_AT(2*HZ);                       /* 2.0 sec. */
-       add_timer(&sp->timer);
-@@ -1661,6 +1672,29 @@
-       return;
- }
-+#ifdef HAVE_POLL_CONTROLLER
-+
-+/*
-+ * Polling 'interrupt' - used by things like netconsole to send skbs
-+ * without having to re-enable interrupts. It's not called while
-+ * the interrupt routine is executing.
-+ */
-+
-+static void poll_speedo (struct net_device *dev)
-+{
-+      struct speedo_private *sp = (struct speedo_private *)dev->priv;
-+
-+        if (!netdump_mode) disable_irq(dev->irq);
-+        if (sp->timer.expires == jiffies) {
-+                sp->timer.expires = RUN_AT(2*HZ);
-+                speedo_timeout(dev, sp);
-+        }
-+        speedo_interrupt (dev->irq, dev, NULL);
-+        if (!netdump_mode) enable_irq(dev->irq);
-+}
-+
-+#endif
-+
- static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry)
- {
-       struct speedo_private *sp = (struct speedo_private *)dev->priv;
-Index: linux-2.4.24/drivers/net/Makefile
-===================================================================
---- linux-2.4.24.orig/drivers/net/Makefile     2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/Makefile  2004-05-07 16:58:39.000000000 -0400
-@@ -250,6 +250,8 @@
- obj-y         += ../acorn/net/acorn-net.o
- endif
-+obj-$(CONFIG_NETCONSOLE) += netconsole.o
-+
- #
- # HIPPI adapters
- #
-Index: linux-2.4.24/drivers/net/netconsole.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/netconsole.c 1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.4.24/drivers/net/netconsole.c      2004-05-07 16:58:39.000000000 -0400
-@@ -0,0 +1,1246 @@
-+/*
-+ *  linux/drivers/net/netconsole.c
-+ *
-+ *  Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
-+ *  Copyright (C) 2002  Red Hat, Inc.
-+ *
-+ *  This file contains the implementation of an IRQ-safe, crash-safe
-+ *  kernel console implementation that outputs kernel messages to the
-+ *  network.
-+ *
-+ * Modification history:
-+ *
-+ * 2001-09-17    started by Ingo Molnar.
-+ * 2002-03-14    simultaneous syslog packet option by Michael K. Johnson
-+ */
-+
-+/****************************************************************
-+ *      This program is free software; you can redistribute it and/or modify
-+ *      it under the terms of the GNU General Public License as published by
-+ *      the Free Software Foundation; either version 2, or (at your option)
-+ *      any later version.
-+ *
-+ *      This program is distributed in the hope that it will be useful,
-+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *      GNU General Public License for more details.
-+ *
-+ *      You should have received a copy of the GNU General Public License
-+ *      along with this program; if not, write to the Free Software
-+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
-+ ****************************************************************/
-+
-+#include <net/tcp.h>
-+#include <net/udp.h>
-+#include <linux/mm.h>
-+#include <linux/tty.h>
-+#include <linux/init.h>
-+#include <linux/delay.h>
-+#include <linux/random.h>
-+#include <linux/reboot.h>
-+#include <linux/module.h>
-+#include <asm/unaligned.h>
-+#include <asm/pgtable.h>
-+#if CONFIG_X86_LOCAL_APIC
-+#include <asm/apic.h>
-+#endif
-+#include <linux/console.h>
-+#include <linux/smp_lock.h>
-+#include <linux/netdevice.h>
-+#include <linux/tty_driver.h>
-+#include <linux/etherdevice.h>
-+#include <linux/elf.h>
-+
-+static struct net_device *netconsole_dev;
-+static u16 source_port, netdump_target_port, netlog_target_port, syslog_target_port;
-+static u32 source_ip, netdump_target_ip, netlog_target_ip, syslog_target_ip;
-+static unsigned char netdump_daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ;
-+static unsigned char netlog_daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ;
-+static unsigned char syslog_daddr[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff} ;
-+
-+static unsigned int mhz = 500, idle_timeout;
-+static unsigned long long mhz_cycles, jiffy_cycles;
-+
-+#include "netconsole.h"
-+
-+#define MAX_UDP_CHUNK 1460
-+#define MAX_PRINT_CHUNK (MAX_UDP_CHUNK-HEADER_LEN)
-+
-+#define DEBUG 0
-+#if DEBUG
-+# define Dprintk(x...) printk(KERN_INFO x)
-+#else
-+# define Dprintk(x...)
-+#endif
-+/*
-+ * We maintain a small pool of fully-sized skbs,
-+ * to make sure the message gets out even in
-+ * extreme OOM situations.
-+ */
-+#define MAX_NETCONSOLE_SKBS 128
-+
-+static spinlock_t netconsole_lock = SPIN_LOCK_UNLOCKED;
-+static int nr_netconsole_skbs;
-+static struct sk_buff *netconsole_skbs;
-+
-+#define MAX_SKB_SIZE \
-+              (MAX_UDP_CHUNK + sizeof(struct udphdr) + \
-+                              sizeof(struct iphdr) + sizeof(struct ethhdr))
-+
-+static int new_arp = 0;
-+static unsigned char arp_sha[ETH_ALEN], arp_tha[ETH_ALEN];
-+static u32 arp_sip, arp_tip;
-+
-+static void send_netconsole_arp(struct net_device *dev);
-+
-+static void __refill_netconsole_skbs(void)
-+{
-+      struct sk_buff *skb;
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&netconsole_lock, flags);
-+      while (nr_netconsole_skbs < MAX_NETCONSOLE_SKBS) {
-+              skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
-+              if (!skb)
-+                      break;
-+              if (netconsole_skbs)
-+                      skb->next = netconsole_skbs;
-+              else
-+                      skb->next = NULL;
-+              netconsole_skbs = skb;
-+              nr_netconsole_skbs++;
-+      }
-+      spin_unlock_irqrestore(&netconsole_lock, flags);
-+}
-+
-+static struct sk_buff * get_netconsole_skb(void)
-+{
-+      struct sk_buff *skb;
-+
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&netconsole_lock, flags);
-+      skb = netconsole_skbs;
-+      if (skb) {
-+              netconsole_skbs = skb->next;
-+              skb->next = NULL;
-+              nr_netconsole_skbs--;
-+      }
-+      spin_unlock_irqrestore(&netconsole_lock, flags);
-+
-+      return skb;
-+}
-+
-+static unsigned long long t0;
-+
-+/*
-+ * Do cleanups:
-+ * - zap completed output skbs.
-+ * - send ARPs if requested
-+ * - reboot the box if inactive for more than N seconds.
-+ */
-+static void zap_completion_queue(void)
-+{
-+      unsigned long long t1;
-+      int cpu = smp_processor_id();
-+
-+      if (softnet_data[cpu].completion_queue) {
-+              struct sk_buff *clist;
-+
-+              local_irq_disable();
-+              clist = softnet_data[cpu].completion_queue;
-+              softnet_data[cpu].completion_queue = NULL;
-+              local_irq_enable();
-+
-+              while (clist != NULL) {
-+                      struct sk_buff *skb = clist;
-+                      clist = clist->next;
-+                      __kfree_skb(skb);
-+              }
-+      }
-+
-+      if (new_arp) {
-+              Dprintk("got ARP req - sending reply.\n");
-+              new_arp = 0;
-+              send_netconsole_arp(netconsole_dev);
-+      }
-+
-+      rdtscll(t1);
-+      if (idle_timeout) {
-+              if (t0) {
-+                      if (((t1 - t0) >> 20) > mhz_cycles * (unsigned long long)idle_timeout) {
-+                              t0 = t1;
-+                              printk("netdump idle timeout - rebooting in 3 seconds.\n");
-+                              mdelay(3000);
-+                              machine_restart(NULL);
-+                      }
-+              }
-+      }
-+      /* maintain jiffies in a polling fashion, based on rdtsc. */
-+      {
-+              static unsigned long long prev_tick;
-+
-+              if (t1 - prev_tick >= jiffy_cycles) {
-+                      prev_tick += jiffy_cycles;
-+                      jiffies++;
-+              }
-+      }
-+}
-+
-+static struct sk_buff * alloc_netconsole_skb(struct net_device *dev, int len, int reserve)
-+{
-+      int once = 1;
-+      int count = 0;
-+      struct sk_buff *skb = NULL;
-+
-+repeat:
-+      zap_completion_queue();
-+      if (nr_netconsole_skbs < MAX_NETCONSOLE_SKBS)
-+              __refill_netconsole_skbs();
-+
-+      skb = alloc_skb(len, GFP_ATOMIC);
-+      if (!skb) {
-+              skb = get_netconsole_skb();
-+              if (!skb) {
-+                      count++;
-+                      if (once && (count == 1000000)) {
-+                              printk("possibly FATAL: out of netconsole skbs!!! will keep retrying.\n");
-+                              once = 0;
-+                      }
-+                      Dprintk("alloc skb: polling controller ...\n");
-+                      dev->poll_controller(dev);
-+                      goto repeat;
-+              }
-+      }
-+
-+      atomic_set(&skb->users, 1);
-+      skb_reserve(skb, reserve);
-+      return skb;
-+}
-+
-+static void transmit_raw_skb(struct sk_buff *skb, struct net_device *dev)
-+{
-+
-+repeat_poll:
-+      spin_lock(&dev->xmit_lock);
-+      dev->xmit_lock_owner = smp_processor_id();
-+
-+      if (netif_queue_stopped(dev)) {
-+              dev->xmit_lock_owner = -1;
-+              spin_unlock(&dev->xmit_lock);
-+
-+              Dprintk("xmit skb: polling controller ...\n");
-+              dev->poll_controller(dev);
-+              zap_completion_queue();
-+              goto repeat_poll;
-+      }
-+
-+      dev->hard_start_xmit(skb, dev);
-+
-+      dev->xmit_lock_owner = -1;
-+      spin_unlock(&dev->xmit_lock);
-+}
-+
-+static void transmit_netconsole_skb(struct sk_buff *skb, struct net_device *dev,
-+      int ip_len, int udp_len,
-+      u16 source_port, u16 target_port, u32 source_ip, u32 target_ip,
-+      unsigned char * macdaddr)
-+{
-+      struct udphdr *udph;
-+      struct iphdr *iph;
-+      struct ethhdr *eth;
-+
-+      udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
-+      udph->source = source_port;
-+      udph->dest = target_port;
-+      udph->len = htons(udp_len);
-+      udph->check = 0;
-+
-+      iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
-+
-+      iph->version  = 4;
-+      iph->ihl      = 5;
-+      iph->tos      = 0;
-+      iph->tot_len  = htons(ip_len);
-+      iph->id       = 0;
-+      iph->frag_off = 0;
-+      iph->ttl      = 64;
-+      iph->protocol = IPPROTO_UDP;
-+      iph->check    = 0;
-+      iph->saddr    = source_ip;
-+      iph->daddr    = target_ip;
-+      iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
-+
-+      eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-+
-+      eth->h_proto = htons(ETH_P_IP);
-+      memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
-+      memcpy(eth->h_dest, macdaddr, dev->addr_len);
-+
-+      transmit_raw_skb(skb, dev);
-+}
-+
-+static void send_netconsole_arp(struct net_device *dev)
-+{
-+      int total_len, arp_len, arp_data_len;
-+      struct sk_buff *skb;
-+      unsigned char *arp;
-+      struct arphdr *arph;
-+      struct ethhdr *eth;
-+
-+      arp_data_len = 2*4 + 2*ETH_ALEN;
-+      arp_len = arp_data_len + sizeof(struct arphdr);
-+      total_len = arp_len + ETH_HLEN;
-+
-+      skb = alloc_netconsole_skb(dev, total_len, total_len - arp_data_len);
-+
-+      arp = skb->data;
-+
-+      memcpy(arp, dev->dev_addr, ETH_ALEN);
-+      arp += ETH_ALEN;
-+
-+      memcpy(arp, &source_ip, 4);
-+      arp += 4;
-+
-+      memcpy(arp, arp_sha, ETH_ALEN);
-+      arp += ETH_ALEN;
-+
-+      memcpy(arp, &arp_sip, 4);
-+      arp += 4;
-+
-+      skb->len += 2*4 + 2*ETH_ALEN;
-+
-+      arph = (struct arphdr *)skb_push(skb, sizeof(*arph));
-+
-+      arph->ar_hrd = htons(dev->type);
-+      arph->ar_pro = __constant_htons(ETH_P_IP);
-+      arph->ar_hln = ETH_ALEN;
-+      arph->ar_pln = 4;
-+      arph->ar_op = __constant_htons(ARPOP_REPLY);
-+
-+      eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-+
-+      eth->h_proto = htons(ETH_P_ARP);
-+      memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
-+      memcpy(eth->h_dest, arp_sha, dev->addr_len);
-+
-+      transmit_raw_skb(skb, dev);
-+}
-+
-+static void send_netdump_skb(struct net_device *dev, const char *msg, unsigned int msg_len, reply_t *reply)
-+{
-+      int total_len, ip_len, udp_len;
-+      struct sk_buff *skb;
-+
-+      udp_len = msg_len + HEADER_LEN + sizeof(struct udphdr);
-+      ip_len = udp_len + sizeof(struct iphdr);
-+      total_len = ip_len + ETH_HLEN;
-+
-+      skb = alloc_netconsole_skb(dev, total_len, total_len - msg_len - HEADER_LEN);
-+
-+      skb->data[0] = NETCONSOLE_VERSION;
-+      put_unaligned(htonl(reply->nr), (u32 *) (skb->data + 1));
-+      put_unaligned(htonl(reply->code), (u32 *) (skb->data + 5));
-+      put_unaligned(htonl(reply->info), (u32 *) (skb->data + 9));
-+
-+      memcpy(skb->data + HEADER_LEN, msg, msg_len);
-+      skb->len += msg_len + HEADER_LEN;
-+
-+      transmit_netconsole_skb(skb, dev, ip_len, udp_len,
-+              source_port, netdump_target_port, source_ip, netdump_target_ip, netdump_daddr);
-+}
-+
-+#define SYSLOG_HEADER_LEN 4
-+
-+static void send_netlog_skb(struct net_device *dev, const char *msg, unsigned int msg_len, reply_t *reply)
-+{
-+      int total_len, ip_len, udp_len;
-+      struct sk_buff *skb;
-+
-+      udp_len = msg_len + HEADER_LEN + sizeof(struct udphdr);
-+      ip_len = udp_len + sizeof(struct iphdr);
-+      total_len = ip_len + ETH_HLEN;
-+
-+      skb = alloc_netconsole_skb(dev, total_len, total_len - msg_len - HEADER_LEN);
-+
-+      skb->data[0] = NETCONSOLE_VERSION;
-+      put_unaligned(htonl(reply->nr), (u32 *) (skb->data + 1));
-+      put_unaligned(htonl(reply->code), (u32 *) (skb->data + 5));
-+      put_unaligned(htonl(reply->info), (u32 *) (skb->data + 9));
-+
-+      memcpy(skb->data + HEADER_LEN, msg, msg_len);
-+      skb->len += msg_len + HEADER_LEN;
-+
-+      transmit_netconsole_skb(skb, dev, ip_len, udp_len,
-+              source_port, netlog_target_port, source_ip, netlog_target_ip, netlog_daddr);
-+}
-+
-+#define SYSLOG_HEADER_LEN 4
-+
-+static void send_syslog_skb(struct net_device *dev, const char *msg, unsigned int msg_len, int pri)
-+{
-+      int total_len, ip_len, udp_len;
-+      struct sk_buff *skb;
-+
-+      udp_len = msg_len + SYSLOG_HEADER_LEN + sizeof(struct udphdr);
-+      ip_len = udp_len + sizeof(struct iphdr);
-+      total_len = ip_len + ETH_HLEN;
-+
-+      skb = alloc_netconsole_skb(dev, total_len, total_len - msg_len - SYSLOG_HEADER_LEN);
-+
-+      skb->data[0] = '<';
-+      skb->data[1] = pri + '0';
-+      skb->data[2]= '>';
-+      skb->data[3]= ' ';
-+
-+      memcpy(skb->data + SYSLOG_HEADER_LEN, msg, msg_len);
-+      skb->len += msg_len + SYSLOG_HEADER_LEN;
-+
-+      transmit_netconsole_skb(skb, dev, ip_len, udp_len, source_port,
-+              syslog_target_port, source_ip, syslog_target_ip, syslog_daddr);
-+}
-+
-+#define MAX_SYSLOG_CHARS 1000
-+
-+static spinlock_t syslog_lock = SPIN_LOCK_UNLOCKED;
-+static int syslog_chars;
-+static unsigned char syslog_line [MAX_SYSLOG_CHARS + 10];
-+
-+/*
-+ * We feed kernel messages char by char, and send the UDP packet
-+ * one linefeed. We buffer all characters received.
-+ */
-+static inline void feed_syslog_char(struct net_device *dev, const unsigned char c)
-+{
-+      if (syslog_chars == MAX_SYSLOG_CHARS)
-+              syslog_chars--;
-+      syslog_line[syslog_chars] = c;
-+      syslog_chars++;
-+      if (c == '\n') {
-+              send_syslog_skb(dev, syslog_line, syslog_chars, 5);
-+              syslog_chars = 0;
-+      }
-+}
-+
-+static spinlock_t sequence_lock = SPIN_LOCK_UNLOCKED;
-+static unsigned int log_offset;
-+
-+static void write_netconsole_msg(struct console *con, const char *msg0, unsigned int msg_len)
-+{
-+      int len, left, i;
-+      struct net_device *dev;
-+      const char *msg = msg0;
-+      reply_t reply;
-+
-+      dev = netconsole_dev;
-+      if (!dev || netdump_mode)
-+              return;
-+
-+      if (dev->poll_controller && netif_running(dev)) {
-+              unsigned long flags;
-+
-+              __save_flags(flags);
-+              __cli();
-+              left = msg_len;
-+              if (netlog_target_ip) {
-+                      while (left) {
-+                              if (left > MAX_PRINT_CHUNK)
-+                                      len = MAX_PRINT_CHUNK;
-+                              else
-+                                      len = left;
-+                              reply.code = REPLY_LOG;
-+                              reply.nr = 0;
-+                              spin_lock(&sequence_lock);
-+                              reply.info = log_offset;
-+                              log_offset += len;
-+                              spin_unlock(&sequence_lock);
-+                              send_netlog_skb(dev, msg, len, &reply);
-+                              msg += len;
-+                              left -= len;
-+                      }
-+              }
-+              if (syslog_target_ip) {
-+                      spin_lock(&syslog_lock);
-+                      for (i = 0; i < msg_len; i++)
-+                              feed_syslog_char(dev, msg0[i]);
-+                      spin_unlock(&syslog_lock);
-+              }
-+
-+              __restore_flags(flags);
-+      }
-+}
-+
-+static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
-+{
-+      return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
-+}
-+
-+static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
-+                           unsigned short ulen, u32 saddr, u32 daddr)
-+{
-+      if (uh->check == 0) {
-+              skb->ip_summed = CHECKSUM_UNNECESSARY;
-+      } else if (skb->ip_summed == CHECKSUM_HW) {
-+              skb->ip_summed = CHECKSUM_UNNECESSARY;
-+              if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
-+                      return 0;
-+              skb->ip_summed = CHECKSUM_NONE;
-+      }
-+      if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-+              skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP,
-+0);
-+      /* Probably, we should checksum udp header (it should be in cache
-+       * in any case) and data in tiny packets (< rx copybreak).
-+       */
-+      return 0;
-+}
-+
-+static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
-+{
-+      return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
-+}
-+
-+static __inline__ int udp_checksum_complete(struct sk_buff *skb)
-+{
-+      return skb->ip_summed != CHECKSUM_UNNECESSARY &&
-+              __udp_checksum_complete(skb);
-+}
-+
-+/*
-+ * NOTE: security depends on the trusted path between the netconsole
-+ *       server and netconsole client, since none of the packets are
-+ *       encrypted. The random magic number protects the protocol
-+ *       against spoofing.
-+ */
-+static u64 netconsole_magic;
-+static u32 magic1, magic2;
-+
-+static spinlock_t req_lock = SPIN_LOCK_UNLOCKED;
-+static int nr_req = 0;
-+static LIST_HEAD(request_list);
-+
-+static void add_new_req(req_t *req)
-+{
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&req_lock, flags);
-+      list_add_tail(&req->list, &request_list);
-+      nr_req++;
-+      Dprintk("pending requests: %d.\n", nr_req);
-+      spin_unlock_irqrestore(&req_lock, flags);
-+
-+      rdtscll(t0);
-+}
-+
-+static req_t *get_new_req(void)
-+{
-+      req_t *req = NULL;
-+      unsigned long flags;
-+
-+      spin_lock_irqsave(&req_lock, flags);
-+      if (nr_req) {
-+              req = list_entry(request_list.next, req_t, list);
-+              list_del(&req->list);
-+              nr_req--;
-+      }
-+      spin_unlock_irqrestore(&req_lock, flags);
-+
-+      return req;
-+}
-+
-+static req_t *alloc_req(void)
-+{
-+      req_t *req;
-+
-+      req = (req_t *) kmalloc(sizeof(*req), GFP_ATOMIC);
-+      return req;
-+}
-+
-+static int netconsole_rx_hook(struct sk_buff *skb)
-+{
-+      int proto;
-+      struct iphdr *iph;
-+      struct udphdr *uh;
-+      __u32 len, saddr, daddr, ulen;
-+      req_t *__req;
-+      req_t *req;
-+      struct net_device *dev;
-+
-+      if (!netdump_mode)
-+              return NET_RX_SUCCESS;
-+#if DEBUG
-+      {
-+              static int packet_count;
-+              Dprintk("        %d\r", ++packet_count);
-+      }
-+#endif
-+      dev = skb->dev;
-+      if (dev->type != ARPHRD_ETHER)
-+              goto out;
-+      proto = ntohs(skb->mac.ethernet->h_proto);
-+      Dprintk("rx got skb %p (len: %d, users: %d), dev %s, h_proto: %04x.\n", skb, skb->len, atomic_read(&skb->users), dev->name, proto);
-+      #define D(x) skb->mac.ethernet->h_dest[x]
-+      Dprintk("... h_dest:   %02X:%02X:%02X:%02X:%02X:%02X.\n", D(0), D(1), D(2), D(3), D(4), D(5));
-+      #undef D
-+      #define D(x) skb->mac.ethernet->h_source[x]
-+      Dprintk("... h_source: %02X:%02X:%02X:%02X:%02X:%02X.\n", D(0), D(1), D(2), D(3), D(4), D(5));
-+      #undef D
-+      if (skb->pkt_type == PACKET_OTHERHOST)
-+              goto out;
-+      if (skb_shared(skb))
-+              goto out;
-+      if (proto == ETH_P_ARP) {
-+              struct arphdr *arp;
-+              unsigned char *arp_ptr;
-+
-+              Dprintk("got arp skb.\n");
-+              arp = (struct arphdr *)skb->data;
-+              if (!pskb_may_pull(skb, sizeof(struct arphdr) + 2*4 + 2*ETH_ALEN))
-+                      goto out;
-+              if (htons(dev->type) != arp->ar_hrd)
-+                      goto out;
-+              if (arp->ar_pro != __constant_htons(ETH_P_IP))
-+                      goto out;
-+              if (arp->ar_hln != ETH_ALEN)
-+                      goto out;
-+              if (arp->ar_pln != 4)
-+                      goto out;
-+              if (arp->ar_op != __constant_htons(ARPOP_REQUEST))
-+                      goto out;
-+              /*
-+               * ARP header looks ok so far, extract fields:
-+               */
-+              arp_ptr = (unsigned char *)(arp + 1);
-+
-+              memcpy(arp_sha, arp_ptr, ETH_ALEN);
-+              arp_ptr += ETH_ALEN;
-+
-+              memcpy(&arp_sip, arp_ptr, 4);
-+              arp_ptr += 4;
-+
-+              memcpy(arp_tha, arp_ptr, ETH_ALEN);
-+              arp_ptr += ETH_ALEN;
-+
-+              memcpy(&arp_tip, arp_ptr, 4);
-+
-+              #define D(x) arp_sha[x]
-+              Dprintk("... arp_sha:   %02X:%02X:%02X:%02X:%02X:%02X.\n", D(0), D(1), D(2), D(3), D(4), D(5));
-+              #undef D
-+              #define D(x) ((unsigned char *)&arp_sip)[x]
-+              Dprintk("... arp_sip:   %d.%d.%d.%d.\n", D(0), D(1), D(2), D(3));
-+              #undef D
-+              #define D(x) arp_tha[x]
-+              Dprintk("... arp_tha:   %02X:%02X:%02X:%02X:%02X:%02X.\n", D(0), D(1), D(2), D(3), D(4), D(5));
-+              #undef D
-+              #define D(x) ((unsigned char *)&arp_tip)[x]
-+              Dprintk("... arp_tip:   %d.%d.%d.%d.\n", D(0), D(1), D(2), D(3));
-+              #undef D
-+              #define D(x) ((unsigned char *)&source_ip)[x]
-+              Dprintk("... (source_ip):   %d.%d.%d.%d.\n", D(0), D(1), D(2), D(3));
-+              #undef D
-+
-+              if (LOOPBACK(arp_tip) || MULTICAST(arp_tip))
-+                      goto out;
-+
-+              if (arp_tip != source_ip)
-+                      goto out;
-+              new_arp = 1;
-+              goto out;
-+      }
-+      if (proto != ETH_P_IP)
-+              goto out;
-+      /*
-+       * IP header correctness testing:
-+       */
-+      iph = (struct iphdr *)skb->data;
-+      if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-+              goto out;
-+      Dprintk("... IP ihl*4: %d, version: %d.\n", iph->ihl*4, iph->version);
-+      if (iph->ihl < 5 || iph->version != 4)
-+              goto out;
-+      if (!pskb_may_pull(skb, iph->ihl*4))
-+              goto out;
-+      if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
-+              goto out;
-+      len = ntohs(iph->tot_len);
-+      Dprintk("... IP len: %d.\n", len);
-+      if (skb->len < len || len < iph->ihl*4)
-+              goto out;
-+      saddr = iph->saddr;
-+      daddr = iph->daddr;
-+      Dprintk("... IP src: %08x, dst: %08x.\n", saddr, daddr);
-+      Dprintk("... IP protocol: %d.\n", iph->protocol);
-+      if (iph->protocol != IPPROTO_UDP)
-+              goto out;
-+      Dprintk("... netdump src: %08x, dst: %08x.\n", source_ip, netlog_target_ip);
-+      if (source_ip != daddr)
-+              goto out;
-+      if (netlog_target_ip != saddr)
-+              goto out;
-+      len -= iph->ihl*4;
-+      uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
-+      ulen = ntohs(uh->len);
-+      Dprintk("... UDP len: %d (left %d).\n", ulen, len);
-+
-+#define MIN_COMM_SIZE (sizeof(*uh) + NETDUMP_REQ_SIZE)
-+      if (ulen != len || ulen < MIN_COMM_SIZE) {
-+              Dprintk("... UDP, hm, len not ok.\n");
-+              goto out;
-+      }
-+      if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0) {
-+              Dprintk("... UDP, hm, checksum init not ok.\n");
-+              goto out;
-+      }
-+      if (udp_checksum_complete(skb)) {
-+              Dprintk("... UDP, hm, checksum complete not ok.\n");
-+              goto out;
-+      }
-+      Dprintk("... UDP packet OK!\n");
-+      Dprintk("... UDP src port: %d, dst port: %d.\n", uh->source, uh->dest);
-+      if (source_port != uh->source)
-+              goto out;
-+      if (netlog_target_port != uh->dest)
-+              goto out;
-+      __req = (req_t *)(uh + 1);
-+      Dprintk("... UDP netdump packet OK!\n");
-+
-+      req = alloc_req();
-+      if (!req) {
-+              printk("no more RAM to allocate request - dropping it.\n");
-+              goto out;
-+      }
-+
-+      req->magic = ntohl(__req->magic);
-+      req->command = ntohl(__req->command);
-+      req->from = ntohl(__req->from);
-+      req->to = ntohl(__req->to);
-+      req->nr = ntohl(__req->nr);
-+
-+      Dprintk("... netdump magic:   %08Lx.\n", req->magic);
-+      Dprintk("... netdump command: %08x.\n", req->command);
-+      Dprintk("... netdump from:    %08x.\n", req->from);
-+      Dprintk("... netdump to:      %08x.\n", req->to);
-+
-+      add_new_req(req);
-+out:
-+      return NET_RX_DROP;
-+}
-+
-+#define INVALID_PAGE "page is not valid!\n"
-+
-+static void send_netdump_mem (struct net_device *dev, req_t *req)
-+{
-+      int i;
-+      char *kaddr;
-+      char str[1024];
-+      struct page *page;
-+      unsigned long nr = req->from;
-+      int nr_chunks = PAGE_SIZE/1024;
-+      reply_t reply;
-+      
-+      reply.nr = req->nr;
-+      reply.info = 0;
-+      if (req->from >= max_mapnr) {
-+              sprintf(str, "page %08lx is bigger than max page # %08lx!\n", nr, max_mapnr);
-+              reply.code = REPLY_ERROR;
-+              send_netdump_skb(dev, str, strlen(str), &reply);
-+              return;
-+      }
-+      page = mem_map + nr;
-+      if (PageReserved(page))
-+              page = ZERO_PAGE(0);
-+
-+      kaddr = (char *)kmap_atomic(page, KM_NETDUMP);
-+
-+      for (i = 0; i < nr_chunks; i++) {
-+              unsigned int offset = i*1024;
-+              reply.code = REPLY_MEM;
-+              reply.info = offset;
-+              send_netdump_skb(dev, kaddr + offset, 1024, &reply);
-+      }
-+
-+      kunmap_atomic(kaddr, KM_NETDUMP);
-+}
-+
-+/*
-+ * This function waits for the client to acknowledge the receipt
-+ * of the netdump startup reply, with the possibility of packets
-+ * getting lost. We resend the startup packet if no ACK is received,
-+ * after a 1 second delay.
-+ *
-+ * (The client can test the success of the handshake via the HELLO
-+ * command, and send ACKs until we enter netdump mode.)
-+ */
-+static void netdump_startup_handshake(struct net_device *dev)
-+{
-+      char tmp[200];
-+      reply_t reply;
-+      req_t *req = NULL;
-+      int i;
-+
-+      netdump_mode = 1;
-+
-+repeat:
-+      sprintf(tmp, "NETDUMP start, waiting for start-ACK.\n");
-+      reply.code = REPLY_START_NETDUMP;
-+      reply.nr = 0;
-+      reply.info = 0;
-+      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+
-+      for (i = 0; i < 10000; i++) {
-+              // wait 1 sec.
-+              udelay(100);
-+              Dprintk("handshake: polling controller ...\n");
-+              dev->poll_controller(dev);
-+              zap_completion_queue();
-+              req = get_new_req();
-+              if (req)
-+                      break;
-+      }
-+      if (!req)
-+              goto repeat;
-+      if (req->command != COMM_START_NETDUMP_ACK) {
-+              kfree(req);
-+              goto repeat;
-+      }
-+      kfree(req);
-+
-+      printk("NETDUMP START!\n");
-+}
-+
-+#if 0
-+
-+static inline void print_status (req_t *req)
-+{
-+      static int count = 0;
-+
-+      switch (++count & 3) {
-+              case 0: printk("/\r"); break;
-+              case 1: printk("|\r"); break;
-+              case 2: printk("\\\r"); break;
-+              case 3: printk("-\r"); break;
-+      }
-+}
-+
-+#else
-+
-+static inline void print_status (req_t *req)
-+{
-+      static int count = 0;
-+      static int prev_jiffies = 0;
-+
-+      if (jiffies/HZ != prev_jiffies/HZ) {
-+              prev_jiffies = jiffies;
-+              count++;
-+              switch (count & 3) {
-+                      case 0: printk("%d(%ld)/\r", nr_req, jiffies); break;
-+                      case 1: printk("%d(%ld)|\r", nr_req, jiffies); break;
-+                      case 2: printk("%d(%ld)\\\r", nr_req, jiffies); break;
-+                      case 3: printk("%d(%ld)-\r", nr_req, jiffies); break;
-+              }
-+      }
-+}
-+
-+#endif
-+
-+#define CLI 1
-+
-+#if CONFIG_SMP
-+static void freeze_cpu (void * dummy)
-+{
-+      printk("CPU#%d is frozen.\n", smp_processor_id());
-+#if CLI
-+      for (;;) __cli();
-+#else
-+      for (;;) __sti();
-+#endif
-+}
-+#endif
-+
-+static void netconsole_netdump (struct pt_regs *regs)
-+{
-+      reply_t reply;
-+      char tmp[200];
-+      unsigned long flags;
-+      struct net_device *dev = netconsole_dev;
-+      unsigned long esp;
-+      unsigned short ss;
-+      struct pt_regs myregs;
-+      req_t *req;
-+
-+      __save_flags(flags);
-+      __cli();
-+#if CONFIG_X86_LOCAL_APIC
-+      nmi_watchdog = 0;
-+#endif
-+#if CONFIG_SMP
-+      smp_call_function(freeze_cpu, NULL, 1, 0);
-+#endif
-+      mdelay(1000);
-+      /*
-+       * Just in case we are crashing within the networking code
-+       * ... attempt to fix up.
-+       */
-+      spin_lock_init(&dev->xmit_lock);
-+
-+      esp = (unsigned long) ((char *)regs + sizeof (struct pt_regs));
-+      ss = __KERNEL_DS;
-+      if (regs->xcs & 3) {
-+              esp = regs->esp;
-+              ss = regs->xss & 0xffff;
-+      }
-+      myregs = *regs;
-+      myregs.esp = esp;
-+      myregs.xss = (myregs.xss & 0xffff0000) | ss;
-+
-+      rdtscll(t0);
-+
-+      printk("< netdump activated - performing handshake with the client. >\n");
-+      netdump_startup_handshake(dev);
-+
-+      printk("< handshake completed - listening for dump requests. >\n");
-+
-+      while (netdump_mode) {
-+              __cli();
-+              Dprintk("main netdump loop: polling controller ...\n");
-+              dev->poll_controller(dev);
-+              zap_completion_queue();
-+#if !CLI
-+              __sti();
-+#endif
-+              req = get_new_req();
-+              if (!req)
-+                      continue;
-+              Dprintk("got new req, command %d.\n", req->command);
-+              print_status(req);
-+              switch (req->command) {
-+              case COMM_NONE:
-+                      Dprintk("got NO command.\n");
-+                      break;
-+
-+              case COMM_SEND_MEM:
-+                      Dprintk("got MEM command.\n");
-+                      // send ->from ->to.
-+                      send_netdump_mem(dev, req);
-+                      break;
-+
-+              case COMM_EXIT:
-+                      Dprintk("got EXIT command.\n");
-+                      netdump_mode = 0;
-+                      break;
-+
-+              case COMM_REBOOT:
-+                      Dprintk("got REBOOT command.\n");
-+                      printk("netdump: rebooting in 3 seconds.\n");
-+                      mdelay(3000);
-+                      machine_restart(NULL);
-+                      break;
-+
-+              case COMM_HELLO:
-+                      sprintf(tmp, "Hello, this is netdump version 0.%02d\n", NETCONSOLE_VERSION);
-+                      reply.code = REPLY_HELLO;
-+                      reply.nr = req->nr;
-+                      reply.info = NETCONSOLE_VERSION;
-+                      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+                      break;
-+
-+              case COMM_GET_PAGE_SIZE:
-+                      sprintf(tmp, "PAGE_SIZE: %ld\n", PAGE_SIZE);
-+                      reply.code = REPLY_PAGE_SIZE;
-+                      reply.nr = req->nr;
-+                      reply.info = PAGE_SIZE;
-+                      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+                      break;
-+
-+              case COMM_GET_REGS:
-+              {
-+                      char *tmp2 = tmp;
-+                      elf_gregset_t elf_regs;
-+
-+                      reply.code = REPLY_REGS;
-+                      reply.nr = req->nr;
-+                      reply.info = max_mapnr;
-+                      tmp2 = tmp + sprintf(tmp, "Sending register info.\n");
-+                      ELF_CORE_COPY_REGS(elf_regs, regs);
-+                      memcpy(tmp2, &elf_regs, sizeof(elf_regs));
-+                      send_netdump_skb(dev, tmp, strlen(tmp) + sizeof(elf_regs), &reply);
-+                      break;
-+              }
-+
-+              case COMM_GET_NR_PAGES:
-+                      reply.code = REPLY_NR_PAGES;
-+                      reply.nr = req->nr;
-+                      reply.info = max_mapnr;
-+                      sprintf(tmp, "Number of pages: %ld\n", max_mapnr);
-+                      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+                      break;
-+
-+              case COMM_SHOW_STATE:
-+                      netdump_mode = 0;
-+                      if (regs)
-+                              show_regs(regs);
-+                      show_state();
-+                      show_mem();
-+                      netdump_mode = 1;
-+                      reply.code = REPLY_SHOW_STATE;
-+                      reply.nr = req->nr;
-+                      reply.info = 0;
-+                      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+                      break;
-+
-+              default:
-+                      reply.code = REPLY_ERROR;
-+                      reply.nr = req->nr;
-+                      reply.info = req->command;
-+                      Dprintk("got UNKNOWN command!\n");
-+                      sprintf(tmp, "Got unknown command code %d!\n", req->command);
-+                      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+                      break;
-+              }
-+              kfree(req);
-+              req = NULL;
-+      }
-+      sprintf(tmp, "NETDUMP end.\n");
-+      reply.code = REPLY_END_NETDUMP;
-+      reply.nr = 0;
-+      reply.info = 0;
-+      send_netdump_skb(dev, tmp, strlen(tmp), &reply);
-+      printk("NETDUMP END!\n");
-+      __restore_flags(flags);
-+}
-+
-+static char *dev;
-+static int netdump_target_eth_byte0 = 255;
-+static int netdump_target_eth_byte1 = 255;
-+static int netdump_target_eth_byte2 = 255;
-+static int netdump_target_eth_byte3 = 255;
-+static int netdump_target_eth_byte4 = 255;
-+static int netdump_target_eth_byte5 = 255;
-+
-+static int netlog_target_eth_byte0 = 255;
-+static int netlog_target_eth_byte1 = 255;
-+static int netlog_target_eth_byte2 = 255;
-+static int netlog_target_eth_byte3 = 255;
-+static int netlog_target_eth_byte4 = 255;
-+static int netlog_target_eth_byte5 = 255;
-+
-+static int syslog_target_eth_byte0 = 255;
-+static int syslog_target_eth_byte1 = 255;
-+static int syslog_target_eth_byte2 = 255;
-+static int syslog_target_eth_byte3 = 255;
-+static int syslog_target_eth_byte4 = 255;
-+static int syslog_target_eth_byte5 = 255;
-+
-+MODULE_PARM(netdump_target_ip, "i");
-+MODULE_PARM_DESC(netdump_target_ip,
-+      "remote netdump IP address as a native (not network) endian integer");
-+MODULE_PARM(netlog_target_ip, "i");
-+MODULE_PARM_DESC(netlog_target_ip,
-+      "remote netlog IP address as a native (not network) endian integer");
-+MODULE_PARM(syslog_target_ip, "i");
-+MODULE_PARM_DESC(syslog_target_ip,
-+      "remote syslog IP address as a native (not network) endian integer");
-+
-+MODULE_PARM(source_port, "h");
-+MODULE_PARM_DESC(source_port,
-+      "local port from which to send netdump packets");
-+
-+MODULE_PARM(netdump_target_port, "h");
-+MODULE_PARM_DESC(netdump_target_port,
-+      "remote port to which to send netdump packets");
-+MODULE_PARM(netlog_target_port, "h");
-+MODULE_PARM_DESC(netlog_target_port,
-+      "remote port to which to send netlog packets");
-+MODULE_PARM(syslog_target_port, "h");
-+MODULE_PARM_DESC(syslog_target_port,
-+      "remote port to which to send syslog packets");
-+
-+#define ETH_BYTE(name,nr) \
-+      MODULE_PARM(name##_target_eth_byte##nr, "i"); \
-+      MODULE_PARM_DESC(name##_target_eth_byte##nr, \
-+              "byte "#nr" of the netdump server MAC address")
-+
-+#define ETH_BYTES(name) \
-+      ETH_BYTE(name, 0); ETH_BYTE(name, 1); ETH_BYTE(name, 2); \
-+      ETH_BYTE(name, 3); ETH_BYTE(name, 4); ETH_BYTE(name, 5);
-+
-+ETH_BYTES(netdump);
-+ETH_BYTES(netlog);
-+ETH_BYTES(syslog);
-+
-+MODULE_PARM(magic1, "i");
-+MODULE_PARM_DESC(magic1,
-+      "lower 32 bits of magic cookie shared between client and server");
-+MODULE_PARM(magic2, "i");
-+MODULE_PARM_DESC(magic2,
-+      "upper 32 bits of magic cookie shared between client and server");
-+MODULE_PARM(dev, "s");
-+MODULE_PARM_DESC(dev,
-+      "name of the device from which to send netdump and syslog packets");
-+MODULE_PARM(mhz, "i");
-+MODULE_PARM_DESC(mhz,
-+      "one second wall clock time takes this many million CPU cycles");
-+MODULE_PARM(idle_timeout, "i");
-+MODULE_PARM_DESC(idle_timeout,
-+      "reboot system after this many idle seconds");
-+
-+static struct console netconsole =
-+       { flags: CON_ENABLED, write: write_netconsole_msg };
-+
-+static int init_netconsole(void)
-+{
-+      struct net_device *ndev = NULL;
-+      struct in_device *in_dev;
-+
-+      printk(KERN_INFO "netlog: using network device <%s>\n", dev);
-+      // this will be valid once the device goes up.
-+      if (dev)
-+              ndev = dev_get_by_name(dev);
-+      if (!ndev) {
-+              printk(KERN_ERR "netlog: network device %s does not exist, aborting.\n", dev);
-+              return -1;
-+      }
-+      if (!ndev->poll_controller) {
-+              printk(KERN_ERR "netlog: %s's network driver does not implement netlogging yet, aborting.\n", dev);
-+              return -1;
-+      }
-+      in_dev = in_dev_get(ndev);
-+      if (!in_dev) {
-+              printk(KERN_ERR "netlog: network device %s is not an IP protocol device, aborting.\n", dev);
-+              return -1;
-+      }
-+
-+      if (!magic1 || !magic2) {
-+              printk(KERN_ERR "netlog: magic cookie (magic1,magic2) not specified.\n");
-+              return -1;
-+      }
-+      netconsole_magic = magic1 + (((u64)magic2)<<32);
-+
-+      source_ip = ntohl(in_dev->ifa_list->ifa_local);
-+      if (!source_ip) {
-+              printk(KERN_ERR "netlog: network device %s has no local address, aborting.\n", dev);
-+              return -1;
-+      }
-+#define IP(x) ((unsigned char *)&source_ip)[x]
-+      printk(KERN_INFO "netlog: using source IP %u.%u.%u.%u\n",
-+              IP(3), IP(2), IP(1), IP(0));
-+#undef IP
-+      source_ip = htonl(source_ip);
-+      if (!source_port) {
-+              printk(KERN_ERR "netlog: source_port parameter not specified, aborting.\n");
-+              return -1;
-+      }
-+      printk(KERN_INFO "netlog: using source UDP port: %u\n", source_port);
-+      source_port = htons(source_port);
-+
-+      if (!netdump_target_ip && !netlog_target_ip && !syslog_target_ip) {
-+              printk(KERN_ERR "netlog: target_ip parameter not specified, aborting.\n");
-+              return -1;
-+      }
-+      if (netdump_target_ip) {
-+#define IP(x) ((unsigned char *)&netdump_target_ip)[x]
-+              printk(KERN_INFO "netlog: using netdump target IP %u.%u.%u.%u\n",
-+                      IP(3), IP(2), IP(1), IP(0));
-+#undef IP
-+              netdump_target_ip = htonl(netdump_target_ip);
-+      }
-+      if (netlog_target_ip) {
-+#define IP(x) ((unsigned char *)&netlog_target_ip)[x]
-+              printk(KERN_INFO "netlog: using netlog target IP %u.%u.%u.%u\n",
-+                      IP(3), IP(2), IP(1), IP(0));
-+#undef IP
-+              netlog_target_ip = htonl(netlog_target_ip);
-+      }
-+      if (syslog_target_ip) {
-+              if (!syslog_target_port)
-+                      syslog_target_port = 514;
-+#define IP(x) ((unsigned char *)&syslog_target_ip)[x]
-+              printk("netlog: using syslog target IP %u.%u.%u.%u, port: %d\n", IP(3), IP(2), IP(1), IP(0), syslog_target_port);
-+#undef IP
-+              syslog_target_ip = htonl(syslog_target_ip);
-+              syslog_target_port = htons(syslog_target_port);
-+      }
-+      if (!netdump_target_port && !netlog_target_port && !syslog_target_port) {
-+              printk(KERN_ERR "netlog: target_port parameter not specified, aborting.\n");
-+              return -1;
-+      }
-+      if (netdump_target_port) {
-+              printk(KERN_INFO "netlog: using target UDP port: %u\n", netdump_target_port);
-+              netdump_target_port = htons(netdump_target_port);
-+      }
-+      if (netlog_target_port) {
-+              printk(KERN_INFO "netlog: using target UDP port: %u\n", netlog_target_port);
-+              netlog_target_port = htons(netlog_target_port);
-+      }
-+
-+      netdump_daddr[0] = netdump_target_eth_byte0;
-+      netdump_daddr[1] = netdump_target_eth_byte1;
-+      netdump_daddr[2] = netdump_target_eth_byte2;
-+      netdump_daddr[3] = netdump_target_eth_byte3;
-+      netdump_daddr[4] = netdump_target_eth_byte4;
-+      netdump_daddr[5] = netdump_target_eth_byte5;
-+
-+      if ((netdump_daddr[0] & netdump_daddr[1] & netdump_daddr[2] & netdump_daddr[3] & netdump_daddr[4] & netdump_daddr[5]) == 255)
-+              printk(KERN_INFO "netlog: using broadcast ethernet frames to send netdump packets.\n");
-+      else
-+              printk(KERN_INFO "netlog: using netdump target ethernet address %02x:%02x:%02x:%02x:%02x:%02x.\n",
-+                              netdump_daddr[0], netdump_daddr[1], netdump_daddr[2], netdump_daddr[3], netdump_daddr[4], netdump_daddr[5]);
-+
-+      netlog_daddr[0] = netlog_target_eth_byte0;
-+      netlog_daddr[1] = netlog_target_eth_byte1;
-+      netlog_daddr[2] = netlog_target_eth_byte2;
-+      netlog_daddr[3] = netlog_target_eth_byte3;
-+      netlog_daddr[4] = netlog_target_eth_byte4;
-+      netlog_daddr[5] = netlog_target_eth_byte5;
-+
-+      if ((netlog_daddr[0] & netlog_daddr[1] & netlog_daddr[2] & netlog_daddr[3] & netlog_daddr[4] & netlog_daddr[5]) == 255)
-+              printk(KERN_INFO "netlog: using broadcast ethernet frames to send netdump packets.\n");
-+      else
-+              printk(KERN_INFO "netlog: using netdump target ethernet address %02x:%02x:%02x:%02x:%02x:%02x.\n",
-+                              netlog_daddr[0], netlog_daddr[1], netlog_daddr[2], netlog_daddr[3], netlog_daddr[4], netlog_daddr[5]);
-+      syslog_daddr[0] = syslog_target_eth_byte0;
-+      syslog_daddr[1] = syslog_target_eth_byte1;
-+      syslog_daddr[2] = syslog_target_eth_byte2;
-+      syslog_daddr[3] = syslog_target_eth_byte3;
-+      syslog_daddr[4] = syslog_target_eth_byte4;
-+      syslog_daddr[5] = syslog_target_eth_byte5;
-+
-+      if ((syslog_daddr[0] & syslog_daddr[1] & syslog_daddr[2] & syslog_daddr[3] & syslog_daddr[4] & syslog_daddr[5]) == 255)
-+              printk(KERN_INFO "netlog: using broadcast ethernet frames to send syslog packets.\n");
-+      else
-+              printk(KERN_INFO "netlog: using syslog target ethernet address %02x:%02x:%02x:%02x:%02x:%02x.\n",
-+                              syslog_daddr[0], syslog_daddr[1], syslog_daddr[2], syslog_daddr[3], syslog_daddr[4], syslog_daddr[5]);
-+
-+      mhz_cycles = (unsigned long long)mhz * 1000000ULL;
-+      jiffy_cycles = (unsigned long long)mhz * (1000000/HZ);
-+
-+      INIT_LIST_HEAD(&request_list);
-+
-+      ndev->rx_hook = netconsole_rx_hook;
-+      netdump_func = netconsole_netdump;
-+      netconsole_dev = ndev;
-+#define STARTUP_MSG "[...network console startup...]\n"
-+      write_netconsole_msg(NULL, STARTUP_MSG, strlen(STARTUP_MSG));
-+
-+      register_console(&netconsole);
-+      printk(KERN_INFO "netlog: network logging started up successfully!\n");
-+      return 0;
-+}
-+
-+static void cleanup_netconsole(void)
-+{
-+      printk(KERN_INFO "netlog: network logging shut down.\n");
-+      unregister_console(&netconsole);
-+
-+#define SHUTDOWN_MSG "[...network console shutdown...]\n"
-+      write_netconsole_msg(NULL, SHUTDOWN_MSG, strlen(SHUTDOWN_MSG));
-+      netconsole_dev->rx_hook = NULL;
-+      netconsole_dev = NULL;
-+}
-+
-+module_init(init_netconsole);
-+module_exit(cleanup_netconsole);
-+
-+MODULE_LICENSE("GPL");
-+
-Index: linux-2.4.24/drivers/net/netconsole.h
-===================================================================
---- linux-2.4.24.orig/drivers/net/netconsole.h 1969-12-31 19:00:00.000000000 -0500
-+++ linux-2.4.24/drivers/net/netconsole.h      2004-05-07 16:58:39.000000000 -0400
-@@ -0,0 +1,81 @@
-+/*
-+ *  linux/drivers/net/netconsole.h
-+ *
-+ *  Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
-+ *
-+ *  This file contains the implementation of an IRQ-safe, crash-safe
-+ *  kernel console implementation that outputs kernel messages to the
-+ *  network.
-+ *
-+ * Modification history:
-+ *
-+ * 2001-09-17    started by Ingo Molnar.
-+ */
-+
-+/****************************************************************
-+ *      This program is free software; you can redistribute it and/or modify
-+ *      it under the terms of the GNU General Public License as published by
-+ *      the Free Software Foundation; either version 2, or (at your option)
-+ *      any later version.
-+ *
-+ *      This program is distributed in the hope that it will be useful,
-+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *      GNU General Public License for more details.
-+ *
-+ *      You should have received a copy of the GNU General Public License
-+ *      along with this program; if not, write to the Free Software
-+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-+ *
-+ ****************************************************************/
-+
-+#define NETCONSOLE_VERSION 0x04
-+
-+enum netdump_commands {
-+      COMM_NONE = 0,
-+      COMM_SEND_MEM = 1,
-+      COMM_EXIT = 2,
-+      COMM_REBOOT = 3,
-+      COMM_HELLO = 4,
-+      COMM_GET_NR_PAGES = 5,
-+      COMM_GET_PAGE_SIZE = 6,
-+      COMM_START_NETDUMP_ACK = 7,
-+      COMM_GET_REGS = 8,
-+      COMM_SHOW_STATE = 9,
-+};
-+
-+#define NETDUMP_REQ_SIZE (8+4*4)
-+
-+typedef struct netdump_req_s {
-+      u64 magic;
-+      u32 nr;
-+      u32 command;
-+      u32 from;
-+      u32 to;
-+      struct list_head list; 
-+} req_t;
-+
-+enum netdump_replies {
-+      REPLY_NONE = 0,
-+      REPLY_ERROR = 1,
-+      REPLY_LOG = 2,
-+      REPLY_MEM = 3,
-+      REPLY_RESERVED = 4,
-+      REPLY_HELLO = 5,
-+      REPLY_NR_PAGES = 6,
-+      REPLY_PAGE_SIZE = 7,
-+      REPLY_START_NETDUMP = 8,
-+      REPLY_END_NETDUMP = 9,
-+      REPLY_REGS = 10,
-+      REPLY_MAGIC = 11,
-+      REPLY_SHOW_STATE = 12,
-+};
-+
-+typedef struct netdump_reply_s {
-+      u32 nr;
-+      u32 code;
-+      u32 info;
-+} reply_t;
-+
-+#define HEADER_LEN (1 + sizeof(reply_t))
-+
-Index: linux-2.4.24/drivers/net/tlan.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/tlan.c       2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/tlan.c    2004-05-07 16:58:39.000000000 -0400
-@@ -345,6 +345,8 @@
- static void   TLan_EeReceiveByte( u16, u8 *, int );
- static int    TLan_EeReadByte( struct net_device *, u8, u8 * );
-+static void   TLan_Poll(struct net_device *);
-+
- static void 
- TLan_StoreSKB( struct tlan_list_tag *tag, struct sk_buff *skb)
-@@ -891,6 +893,9 @@
-       dev->get_stats = &TLan_GetStats;
-       dev->set_multicast_list = &TLan_SetMulticastList;
-       dev->do_ioctl = &TLan_ioctl;
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = &TLan_Poll;
-+#endif
-       dev->tx_timeout = &TLan_tx_timeout;
-       dev->watchdog_timeo = TX_TIMEOUT;
-@@ -1176,7 +1181,14 @@
- } /* TLan_HandleInterrupts */
--
-+#ifdef HAVE_POLL_CONTROLLER
-+static void TLan_Poll(struct net_device *dev)
-+{
-+      if (!netdump_mode) disable_irq(dev->irq);
-+      TLan_HandleInterrupt(dev->irq, dev, NULL);
-+      if (!netdump_mode) enable_irq(dev->irq);
-+}
-+#endif
-       /***************************************************************
-Index: linux-2.4.24/drivers/net/tulip/tulip_core.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/tulip/tulip_core.c   2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/tulip/tulip_core.c        2004-05-07 16:58:39.000000000 -0400
-@@ -266,6 +266,7 @@
- static struct net_device_stats *tulip_get_stats(struct net_device *dev);
- static int private_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
- static void set_rx_mode(struct net_device *dev);
-+static void poll_tulip(struct net_device *dev);
-@@ -1728,6 +1729,9 @@
-       dev->get_stats = tulip_get_stats;
-       dev->do_ioctl = private_ioctl;
-       dev->set_multicast_list = set_rx_mode;
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = &poll_tulip;
-+#endif
-       if (register_netdev(dev))
-               goto err_out_free_ring;
-@@ -1902,6 +1906,24 @@
- }
-+#ifdef HAVE_POLL_CONTROLLER
-+
-+/*
-+ * Polling 'interrupt' - used by things like netconsole to send skbs
-+ * without having to re-enable interrupts. It's not called while
-+ * the interrupt routine is executing.
-+ */
-+
-+static void poll_tulip (struct net_device *dev)
-+{
-+       if (!netdump_mode) disable_irq(dev->irq);
-+       tulip_interrupt (dev->irq, dev, NULL);
-+       if (!netdump_mode) enable_irq(dev->irq);
-+}
-+
-+#endif
-+
-+
- static struct pci_driver tulip_driver = {
-       name:           DRV_NAME,
-       id_table:       tulip_pci_tbl,
-Index: linux-2.4.24/drivers/net/e100/e100_main.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/e100/e100_main.c     2004-05-07 16:58:39.000000000 -0400
-+++ linux-2.4.24/drivers/net/e100/e100_main.c  2004-05-07 17:00:21.000000000 -0400
-@@ -664,6 +664,10 @@
-                 goto err_unregister_netdev;
-       }
-       
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = e100_netpoll;
-+#endif
-+
-       e100nics++;
-       e100_get_speed_duplex_caps(bdp);
-Index: linux-2.4.24/drivers/net/e1000/e1000_main.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/e1000/e1000_main.c   2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/e1000/e1000_main.c        2004-05-07 16:58:39.000000000 -0400
-@@ -182,6 +182,9 @@
- static int e1000_resume(struct pci_dev *pdev);
- #endif
-+/* for netdump / net console */
-+static void e1000_netpoll (struct net_device *dev);
-+
- struct notifier_block e1000_notifier_reboot = {
-       .notifier_call  = e1000_notify_reboot,
-       .next           = NULL,
-@@ -434,6 +437,10 @@
-       netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
-       netdev->vlan_rx_kill_vid = e1000_vlan_rx_kill_vid;
-+#ifdef HAVE_POLL_CONTROLLER
-+      netdev->poll_controller = e1000_netpoll;
-+#endif
-+
-       netdev->irq = pdev->irq;
-       netdev->mem_start = mmio_start;
-       netdev->mem_end = mmio_start + mmio_len;
-@@ -2899,4 +2906,20 @@
- }
- #endif
-+#ifdef HAVE_POLL_CONTROLLER
-+/*
-+ * Polling 'interrupt' - used by things like netconsole to send skbs
-+ * without having to re-enable interrupts. It's not called while
-+ * the interrupt routine is executing.
-+ */
-+
-+static void e1000_netpoll (struct net_device *dev)
-+{
-+      if (!netdump_mode) disable_irq(dev->irq);
-+      e1000_intr (dev->irq, dev, NULL);
-+      if (!netdump_mode) enable_irq(dev->irq);
-+}
-+
-+#endif
-+
- /* e1000_main.c */
-Index: linux-2.4.24/drivers/net/tg3.c
-===================================================================
---- linux-2.4.24.orig/drivers/net/tg3.c        2003-11-28 13:26:20.000000000 -0500
-+++ linux-2.4.24/drivers/net/tg3.c     2004-05-07 16:58:39.000000000 -0400
-@@ -216,6 +216,9 @@
- #define tr16(reg)             readw(tp->regs + (reg))
- #define tr8(reg)              readb(tp->regs + (reg))
-+/* Added by mark.fasheh@oracle.com to help enable netdump on these cards */
-+static void poll_tg3 (struct net_device *dev);
-+
- static void tg3_write_mem(struct tg3 *tp, u32 off, u32 val)
- {
-       unsigned long flags;
-@@ -7630,6 +7633,9 @@
-       dev->watchdog_timeo = TG3_TX_TIMEOUT;
-       dev->change_mtu = tg3_change_mtu;
-       dev->irq = pdev->irq;
-+#ifdef HAVE_POLL_CONTROLLER
-+      dev->poll_controller = &poll_tg3;
-+#endif
-       err = tg3_get_invariants(tp);
-       if (err) {
-@@ -7862,5 +7868,23 @@
-       pci_unregister_driver(&tg3_driver);
- }
-+#ifdef HAVE_POLL_CONTROLLER
-+
-+/*
-+ * Polling 'interrupt' - used by things like netconsole to send skbs
-+ * without having to re-enable interrupts. It's not called while
-+ * the interrupt routine is executing.
-+ */
-+
-+static void poll_tg3 (struct net_device *dev)
-+{
-+      if (!netdump_mode) disable_irq(dev->irq);
-+      tg3_interrupt (dev->irq, dev, NULL);
-+      if (!netdump_mode) enable_irq(dev->irq);
-+}
-+
-+#endif
-+
-+
- module_init(tg3_init);
- module_exit(tg3_cleanup);
-Index: linux-2.4.24/include/asm-i386/kmap_types.h
-===================================================================
---- linux-2.4.24.orig/include/asm-i386/kmap_types.h    2003-08-25 07:44:43.000000000 -0400
-+++ linux-2.4.24/include/asm-i386/kmap_types.h 2004-05-07 16:59:12.000000000 -0400
-@@ -10,6 +10,7 @@
-       KM_BH_IRQ,
-       KM_SOFTIRQ0,
-       KM_SOFTIRQ1,
-+      KM_NETDUMP,
-       KM_TYPE_NR
- };
-Index: linux-2.4.24/include/linux/kernel.h
-===================================================================
---- linux-2.4.24.orig/include/linux/kernel.h   2004-05-07 16:56:55.000000000 -0400
-+++ linux-2.4.24/include/linux/kernel.h        2004-05-07 16:58:39.000000000 -0400
-@@ -104,6 +104,9 @@
- extern void bust_spinlocks(int yes);
- extern int oops_in_progress;          /* If set, an oops, panic(), BUG() or die() is in progress */
-+struct pt_regs;
-+extern void (*netdump_func) (struct pt_regs *regs);
-+extern int netdump_mode;
- extern int tainted;
- extern const char *print_tainted(void);
-Index: linux-2.4.24/include/linux/netdevice.h
-===================================================================
---- linux-2.4.24.orig/include/linux/netdevice.h        2003-11-28 13:26:21.000000000 -0500
-+++ linux-2.4.24/include/linux/netdevice.h     2004-05-07 16:58:39.000000000 -0400
-@@ -435,6 +435,9 @@
-                                                    unsigned char *haddr);
-       int                     (*neigh_setup)(struct net_device *dev, struct neigh_parms *);
-       int                     (*accept_fastpath)(struct net_device *, struct dst_entry*);
-+#define HAVE_POLL_CONTROLLER
-+      void                    (*poll_controller)(struct net_device *dev);
-+      int                     (*rx_hook)(struct sk_buff *skb);
-       /* open/release and usage marking */
-       struct module *owner;
-Index: linux-2.4.24/kernel/panic.c
-===================================================================
---- linux-2.4.24.orig/kernel/panic.c   2004-05-07 16:56:56.000000000 -0400
-+++ linux-2.4.24/kernel/panic.c        2004-05-07 16:58:39.000000000 -0400
-@@ -62,6 +62,8 @@
-       vsprintf(buf, fmt, args);
-       va_end(args);
-       printk(KERN_EMERG "Kernel panic: %s\n",buf);
-+      if (netdump_func)
-+              BUG();
-       if (in_interrupt())
-               printk(KERN_EMERG "In interrupt handler - not syncing\n");
-       else if (!current->pid)
-Index: linux-2.4.24/net/core/dev.c
-===================================================================
---- linux-2.4.24.orig/net/core/dev.c   2003-11-28 13:26:21.000000000 -0500
-+++ linux-2.4.24/net/core/dev.c        2004-05-07 16:58:39.000000000 -0400
-@@ -1288,6 +1288,13 @@
-       local_irq_save(flags);
-+      if (unlikely(skb->dev->rx_hook != NULL)) {
-+              int ret;
-+
-+              ret = skb->dev->rx_hook(skb);
-+              if (ret == NET_RX_DROP)
-+                      goto drop;
-+        }
-       netdev_rx_stat[this_cpu].total++;
-       if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-               if (queue->input_pkt_queue.qlen) {
diff --git a/lustre/kernel_patches/patches/linux-2.4.19-bgl-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.19-bgl-xattr-0.8.54.patch
deleted file mode 100644 (file)
index a6a7e12..0000000
+++ /dev/null
@@ -1,5242 +0,0 @@
- Documentation/Configure.help  |   66 ++
- arch/alpha/defconfig          |    7 
- arch/alpha/kernel/entry.S     |   12 
- arch/arm/defconfig            |    7 
- arch/arm/kernel/calls.S       |   24 
- arch/i386/defconfig           |    7 
- arch/ia64/defconfig           |    7 
- arch/ia64/kernel/entry.S      |   24 
- arch/m68k/defconfig           |    7 
- arch/mips/defconfig           |    7 
- arch/mips64/defconfig         |    7 
- arch/ppc/defconfig            |   14 
- arch/ppc64/kernel/misc.S      |    2 
- arch/s390/defconfig           |    7 
- arch/s390/kernel/entry.S      |   24 
- arch/s390x/defconfig          |    7 
- arch/s390x/kernel/entry.S     |   24 
- arch/s390x/kernel/wrapper32.S |   92 +++
- arch/sparc/defconfig          |    7 
- arch/sparc/kernel/systbls.S   |   10 
- arch/sparc64/defconfig        |    7 
- arch/sparc64/kernel/systbls.S |   20 
- fs/Config.in                  |   14 
- fs/Makefile                   |    3 
- fs/ext2/Makefile              |    4 
- fs/ext2/file.c                |    5 
- fs/ext2/ialloc.c              |    2 
- fs/ext2/inode.c               |   34 -
- fs/ext2/namei.c               |   14 
- fs/ext2/super.c               |   29 
- fs/ext2/symlink.c             |   14 
- fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
- fs/ext2/xattr_user.c          |  103 +++
- fs/ext3/Makefile              |   10 
- fs/ext3/file.c                |    5 
- fs/ext3/ialloc.c              |    2 
- fs/ext3/inode.c               |   35 -
- fs/ext3/namei.c               |   21 
- fs/ext3/super.c               |   36 +
- fs/ext3/symlink.c             |   14 
- fs/ext3/xattr.c               | 1225 ++++++++++++++++++++++++++++++++++++++++++
- fs/ext3/xattr_user.c          |  111 +++
- fs/jfs/jfs_xattr.h            |    6 
- fs/jfs/xattr.c                |    6 
- fs/mbcache.c                  |  648 ++++++++++++++++++++++
- include/asm-arm/unistd.h      |    2 
- include/asm-ia64/unistd.h     |   13 
- include/asm-ppc64/unistd.h    |    2 
- include/asm-s390/unistd.h     |   15 
- include/asm-s390x/unistd.h    |   15 
- include/asm-sparc/unistd.h    |   24 
- include/asm-sparc64/unistd.h  |   24 
- include/linux/cache_def.h     |   15 
- include/linux/errno.h         |    4 
- include/linux/ext2_fs.h       |   31 -
- include/linux/ext2_xattr.h    |  157 +++++
- include/linux/ext3_fs.h       |   31 -
- include/linux/ext3_jbd.h      |    8 
- include/linux/ext3_xattr.h    |  157 +++++
- include/linux/fs.h            |    2 
- include/linux/mbcache.h       |   69 ++
- kernel/ksyms.c                |    4 
- mm/vmscan.c                   |   35 +
- fs/ext3/ext3-exports.c        |   14 +  
- 64 files changed, 4355 insertions(+), 195 deletions(-)
-
-Index: linux-DRV401/arch/ppc/defconfig
-===================================================================
---- linux-DRV401.orig/arch/ppc/defconfig       2004-10-15 10:24:32.000000000 -0700
-+++ linux-DRV401/arch/ppc/defconfig    2004-10-15 11:03:51.000000000 -0700
-@@ -1,6 +1,13 @@
- #
- # Automatically generated by make menuconfig: don't edit
- #
-+CONFIG_EXT3_FS_XATTR=y
-+# CONFIG_EXT3_FS_XATTR_SHARING is not set
-+# CONFIG_EXT3_FS_XATTR_USER is not set
-+# CONFIG_EXT2_FS_XATTR is not set
-+# CONFIG_EXT2_FS_XATTR_SHARING is not set
-+# CONFIG_EXT2_FS_XATTR_USER is not set
-+# CONFIG_FS_MBCACHE is not set
- # CONFIG_UID16 is not set
- # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
- CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-Index: linux-DRV401/fs/Config.in
-===================================================================
---- linux-DRV401.orig/fs/Config.in     2004-10-15 10:24:06.000000000 -0700
-+++ linux-DRV401/fs/Config.in  2004-10-15 11:03:51.000000000 -0700
-@@ -22,6 +22,11 @@
- dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
- tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS
-+dep_mbool '  Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS
-+dep_bool '    Ext3 extended attribute block sharing' \
-+    CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR
-+dep_bool '    Ext3 extended user attributes' \
-+    CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR
- # CONFIG_JBD could be its own option (even modular), but until there are
- # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS
- # dep_tristate '  Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS
-@@ -77,6 +82,11 @@
- tristate 'ROM file system support' CONFIG_ROMFS_FS
- tristate 'Second extended fs support' CONFIG_EXT2_FS
-+dep_mbool '  Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS
-+dep_bool '    Ext2 extended attribute block sharing' \
-+    CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR
-+dep_bool '    Ext2 extended user attributes' \
-+    CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR
- tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS
-@@ -156,6 +166,10 @@
-   fi
- fi
-+# Meta block cache for Extended Attributes (ext2/ext3)
-+#tristate 'Meta block cache' CONFIG_FS_MBCACHE
-+define_tristate CONFIG_FS_MBCACHE y 
-+
- mainmenu_option next_comment
- comment 'Partition Types'
- source fs/partitions/Config.in
-Index: linux-DRV401/fs/Makefile
-===================================================================
---- linux-DRV401.orig/fs/Makefile      2004-10-15 10:39:15.000000000 -0700
-+++ linux-DRV401/fs/Makefile   2004-10-15 11:03:51.000000000 -0700
-@@ -14,7 +14,7 @@
-               super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
-               fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \
-               dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \
--              filesystems.o namespace.o seq_file.o quota.o
-+              filesystems.o namespace.o seq_file.o quota.o xattr.o
- ifeq ($(CONFIG_QUOTA),y)
- obj-y += dquot.o
-@@ -76,6 +76,9 @@
- obj-$(CONFIG_BINFMT_ELF)      += binfmt_elf.o
-+export-objs += mbcache.o
-+obj-$(CONFIG_FS_MBCACHE)      += mbcache.o
-+
- # persistent filesystems
- obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
-Index: linux-DRV401/fs/ext2/Makefile
-===================================================================
---- linux-DRV401.orig/fs/ext2/Makefile 2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/Makefile      2004-10-15 11:03:51.000000000 -0700
-@@ -13,4 +13,8 @@
-               ioctl.o namei.o super.o symlink.o
- obj-m    := $(O_TARGET)
-+export-objs += xattr.o
-+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o
-+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
-+
- include $(TOPDIR)/Rules.make
-Index: linux-DRV401/fs/ext2/file.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/file.c   2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/file.c        2004-10-15 11:03:51.000000000 -0700
-@@ -20,6 +20,7 @@
- #include <linux/fs.h>
- #include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
- #include <linux/sched.h>
- /*
-@@ -51,4 +52,8 @@
- struct inode_operations ext2_file_inode_operations = {
-       truncate:       ext2_truncate,
-+      setxattr:       ext2_setxattr,
-+      getxattr:       ext2_getxattr,
-+      listxattr:      ext2_listxattr,
-+      removexattr:    ext2_removexattr,
- };
-Index: linux-DRV401/fs/ext2/ialloc.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/ialloc.c 2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/ialloc.c      2004-10-15 11:03:51.000000000 -0700
-@@ -15,6 +15,7 @@
- #include <linux/config.h>
- #include <linux/fs.h>
- #include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
- #include <linux/locks.h>
- #include <linux/quotaops.h>
-@@ -167,6 +168,7 @@
-        */
-       if (!is_bad_inode(inode)) {
-               /* Quota is already initialized in iput() */
-+              ext2_xattr_delete_inode(inode);
-               DQUOT_FREE_INODE(inode);
-               DQUOT_DROP(inode);
-       }
-Index: linux-DRV401/fs/ext2/inode.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/inode.c  2004-10-15 10:24:00.000000000 -0700
-+++ linux-DRV401/fs/ext2/inode.c       2004-10-15 11:03:51.000000000 -0700
-@@ -39,6 +39,18 @@
- static int ext2_update_inode(struct inode * inode, int do_sync);
- /*
-+ * Test whether an inode is a fast symlink.
-+ */
-+static inline int ext2_inode_is_fast_symlink(struct inode *inode)
-+{
-+      int ea_blocks = inode->u.ext2_i.i_file_acl ?
-+              (inode->i_sb->s_blocksize >> 9) : 0;
-+
-+      return (S_ISLNK(inode->i_mode) &&
-+              inode->i_blocks - ea_blocks == 0);
-+}
-+
-+/*
-  * Called at each iput()
-  */
- void ext2_put_inode (struct inode * inode)
-@@ -53,9 +65,7 @@
- {
-       lock_kernel();
--      if (is_bad_inode(inode) ||
--          inode->i_ino == EXT2_ACL_IDX_INO ||
--          inode->i_ino == EXT2_ACL_DATA_INO)
-+      if (is_bad_inode(inode))
-               goto no_delete;
-       inode->u.ext2_i.i_dtime = CURRENT_TIME;
-       mark_inode_dirty(inode);
-@@ -792,6 +802,8 @@
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-           S_ISLNK(inode->i_mode)))
-               return;
-+      if (ext2_inode_is_fast_symlink(inode))
-+              return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
-@@ -879,8 +891,7 @@
-       unsigned long offset;
-       struct ext2_group_desc * gdp;
--      if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
--           inode->i_ino != EXT2_ACL_DATA_INO &&
-+      if ((inode->i_ino != EXT2_ROOT_INO &&
-            inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
-           inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
-               ext2_error (inode->i_sb, "ext2_read_inode",
-@@ -965,10 +976,7 @@
-       for (block = 0; block < EXT2_N_BLOCKS; block++)
-               inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
--      if (inode->i_ino == EXT2_ACL_IDX_INO ||
--          inode->i_ino == EXT2_ACL_DATA_INO)
--              /* Nothing to do */ ;
--      else if (S_ISREG(inode->i_mode)) {
-+      if (S_ISREG(inode->i_mode)) {
-               inode->i_op = &ext2_file_inode_operations;
-               inode->i_fop = &ext2_file_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
-@@ -977,15 +985,17 @@
-               inode->i_fop = &ext2_dir_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
-       } else if (S_ISLNK(inode->i_mode)) {
--              if (!inode->i_blocks)
-+              if (ext2_inode_is_fast_symlink(inode))
-                       inode->i_op = &ext2_fast_symlink_inode_operations;
-               else {
--                      inode->i_op = &page_symlink_inode_operations;
-+                      inode->i_op = &ext2_symlink_inode_operations;
-                       inode->i_mapping->a_ops = &ext2_aops;
-               }
--      } else 
-+      } else {
-+              inode->i_op = &ext2_special_inode_operations;
-               init_special_inode(inode, inode->i_mode,
-                                  le32_to_cpu(raw_inode->i_block[0]));
-+      }
-       brelse (bh);
-       inode->i_attr_flags = 0;
-       if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
-Index: linux-DRV401/fs/ext2/namei.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/namei.c  2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/namei.c       2004-10-15 11:03:51.000000000 -0700
-@@ -31,6 +31,7 @@
- #include <linux/fs.h>
- #include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
- #include <linux/pagemap.h>
- /*
-@@ -136,7 +137,7 @@
-       if (l > sizeof (inode->u.ext2_i.i_data)) {
-               /* slow symlink */
--              inode->i_op = &page_symlink_inode_operations;
-+              inode->i_op = &ext2_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext2_aops;
-               err = block_symlink(inode, symname, l);
-               if (err)
-@@ -345,4 +346,15 @@
-       rmdir:          ext2_rmdir,
-       mknod:          ext2_mknod,
-       rename:         ext2_rename,
-+      setxattr:       ext2_setxattr,
-+      getxattr:       ext2_getxattr,
-+      listxattr:      ext2_listxattr,
-+      removexattr:    ext2_removexattr,
-+};
-+
-+struct inode_operations ext2_special_inode_operations = {
-+      setxattr:       ext2_setxattr,
-+      getxattr:       ext2_getxattr,
-+      listxattr:      ext2_listxattr,
-+      removexattr:    ext2_removexattr,
- };
-Index: linux-DRV401/fs/ext2/super.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/super.c  2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/super.c       2004-10-15 11:03:51.000000000 -0700
-@@ -21,6 +21,7 @@
- #include <linux/string.h>
- #include <linux/fs.h>
- #include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
- #include <linux/slab.h>
- #include <linux/init.h>
- #include <linux/locks.h>
-@@ -125,6 +126,7 @@
-       int db_count;
-       int i;
-+      ext2_xattr_put_super(sb);
-       if (!(sb->s_flags & MS_RDONLY)) {
-               struct ext2_super_block *es = EXT2_SB(sb)->s_es;
-@@ -175,6 +177,13 @@
-            this_char = strtok (NULL, ",")) {
-               if ((value = strchr (this_char, '=')) != NULL)
-                       *value++ = 0;
-+#ifdef CONFIG_EXT2_FS_XATTR_USER
-+              if (!strcmp (this_char, "user_xattr"))
-+                      set_opt (*mount_options, XATTR_USER);
-+              else if (!strcmp (this_char, "nouser_xattr"))
-+                      clear_opt (*mount_options, XATTR_USER);
-+              else
-+#endif
-               if (!strcmp (this_char, "bsddf"))
-                       clear_opt (*mount_options, MINIX_DF);
-               else if (!strcmp (this_char, "nouid32")) {
-@@ -424,6 +433,9 @@
-           blocksize = BLOCK_SIZE;
-       sb->u.ext2_sb.s_mount_opt = 0;
-+#ifdef CONFIG_EXT2_FS_XATTR_USER
-+      /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */
-+#endif
-       if (!parse_options ((char *) data, &sb_block, &resuid, &resgid,
-           &sb->u.ext2_sb.s_mount_opt)) {
-               return NULL;
-@@ -810,12 +822,27 @@
- static int __init init_ext2_fs(void)
- {
--        return register_filesystem(&ext2_fs_type);
-+      int error = init_ext2_xattr();
-+      if (error)
-+              return error;
-+      error = init_ext2_xattr_user();
-+      if (error)
-+              goto fail;
-+      error = register_filesystem(&ext2_fs_type);
-+      if (!error)
-+              return 0;
-+
-+      exit_ext2_xattr_user();
-+fail:
-+      exit_ext2_xattr();
-+      return error;
- }
- static void __exit exit_ext2_fs(void)
- {
-       unregister_filesystem(&ext2_fs_type);
-+      exit_ext2_xattr_user();
-+      exit_ext2_xattr();
- }
- EXPORT_NO_SYMBOLS;
-Index: linux-DRV401/fs/ext2/symlink.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/symlink.c        2004-10-15 10:23:59.000000000 -0700
-+++ linux-DRV401/fs/ext2/symlink.c     2004-10-15 11:03:51.000000000 -0700
-@@ -19,6 +19,7 @@
- #include <linux/fs.h>
- #include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
- static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen)
- {
-@@ -32,7 +33,20 @@
-       return vfs_follow_link(nd, s);
- }
-+struct inode_operations ext2_symlink_inode_operations = {
-+      readlink:       page_readlink,
-+      follow_link:    page_follow_link,
-+      setxattr:       ext2_setxattr,
-+      getxattr:       ext2_getxattr,
-+      listxattr:      ext2_listxattr,
-+      removexattr:    ext2_removexattr,
-+};
-+
- struct inode_operations ext2_fast_symlink_inode_operations = {
-       readlink:       ext2_readlink,
-       follow_link:    ext2_follow_link,
-+      setxattr:       ext2_setxattr,
-+      getxattr:       ext2_getxattr,
-+      listxattr:      ext2_listxattr,
-+      removexattr:    ext2_removexattr,
- };
-Index: linux-DRV401/fs/ext2/xattr.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/xattr.c  2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/ext2/xattr.c       2004-10-15 11:03:51.000000000 -0700
-@@ -0,0 +1,1212 @@
-+/*
-+ * linux/fs/ext2/xattr.c
-+ *
-+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ *
-+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
-+ * Extended attributes for symlinks and special files added per
-+ *  suggestion of Luka Renko <luka.renko@hermes.si>.
-+ */
-+
-+/*
-+ * Extended attributes are stored on disk blocks allocated outside of
-+ * any inode. The i_file_acl field is then made to point to this allocated
-+ * block. If all extended attributes of an inode are identical, these
-+ * inodes may share the same extended attribute block. Such situations
-+ * are automatically detected by keeping a cache of recent attribute block
-+ * numbers and hashes over the block's contents in memory.
-+ *
-+ *
-+ * Extended attribute block layout:
-+ *
-+ *   +------------------+
-+ *   | header           |
-+ *   | entry 1          | |
-+ *   | entry 2          | | growing downwards
-+ *   | entry 3          | v
-+ *   | four null bytes  |
-+ *   | . . .            |
-+ *   | value 1          | ^
-+ *   | value 3          | | growing upwards
-+ *   | value 2          | |
-+ *   +------------------+
-+ *
-+ * The block header is followed by multiple entry descriptors. These entry
-+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD
-+ * byte boundaries. The entry descriptors are sorted by attribute name,
-+ * so that two extended attribute blocks can be compared efficiently.
-+ *
-+ * Attribute values are aligned to the end of the block, stored in
-+ * no specific order. They are also padded to EXT2_XATTR_PAD byte
-+ * boundaries. No additional gaps are left between them.
-+ *
-+ * Locking strategy
-+ * ----------------
-+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of
-+ * the xattr inode operations are called, so we are guaranteed that only one
-+ * processes accesses extended attributes of an inode at any time.
-+ *
-+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that
-+ * only a single process is modifying an extended attribute block, even
-+ * if the block is shared among inodes.
-+ *
-+ * Note for porting to 2.5
-+ * -----------------------
-+ * The BKL will no longer be held in the xattr inode operations.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/locks.h>
-+#include <linux/slab.h>
-+#include <linux/fs.h>
-+#include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
-+#include <linux/mbcache.h>
-+#include <linux/quotaops.h>
-+#include <asm/semaphore.h>
-+#include <linux/compatmac.h>
-+
-+/* These symbols may be needed by a module. */
-+EXPORT_SYMBOL(ext2_xattr_register);
-+EXPORT_SYMBOL(ext2_xattr_unregister);
-+EXPORT_SYMBOL(ext2_xattr_get);
-+EXPORT_SYMBOL(ext2_xattr_list);
-+EXPORT_SYMBOL(ext2_xattr_set);
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
-+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
-+#endif
-+
-+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
-+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
-+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
-+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
-+
-+#ifdef EXT2_XATTR_DEBUG
-+# define ea_idebug(inode, f...) do { \
-+              printk(KERN_DEBUG "inode %s:%ld: ", \
-+                      kdevname(inode->i_dev), inode->i_ino); \
-+              printk(f); \
-+              printk("\n"); \
-+      } while (0)
-+# define ea_bdebug(bh, f...) do { \
-+              printk(KERN_DEBUG "block %s:%ld: ", \
-+                      kdevname(bh->b_dev), bh->b_blocknr); \
-+              printk(f); \
-+              printk("\n"); \
-+      } while (0)
-+#else
-+# define ea_idebug(f...)
-+# define ea_bdebug(f...)
-+#endif
-+
-+static int ext2_xattr_set2(struct inode *, struct buffer_head *,
-+                         struct ext2_xattr_header *);
-+
-+#ifdef CONFIG_EXT2_FS_XATTR_SHARING
-+
-+static int ext2_xattr_cache_insert(struct buffer_head *);
-+static struct buffer_head *ext2_xattr_cache_find(struct inode *,
-+                                               struct ext2_xattr_header *);
-+static void ext2_xattr_cache_remove(struct buffer_head *);
-+static void ext2_xattr_rehash(struct ext2_xattr_header *,
-+                            struct ext2_xattr_entry *);
-+
-+static struct mb_cache *ext2_xattr_cache;
-+
-+#else
-+# define ext2_xattr_cache_insert(bh) 0
-+# define ext2_xattr_cache_find(inode, header) NULL
-+# define ext2_xattr_cache_remove(bh) while(0) {}
-+# define ext2_xattr_rehash(header, entry) while(0) {}
-+#endif
-+
-+/*
-+ * If a file system does not share extended attributes among inodes,
-+ * we should not need the ext2_xattr_sem semaphore. However, the
-+ * filesystem may still contain shared blocks, so we always take
-+ * the lock.
-+ */
-+
-+DECLARE_MUTEX(ext2_xattr_sem);
-+
-+static inline int
-+ext2_xattr_new_block(struct inode *inode, int * errp, int force)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) +
-+              EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb);
-+
-+      /* How can we enforce the allocation? */
-+      int block = ext2_new_block(inode, goal, 0, 0, errp);
-+#ifdef OLD_QUOTAS
-+      if (!*errp)
-+              inode->i_blocks += inode->i_sb->s_blocksize >> 9;
-+#endif
-+      return block;
-+}
-+
-+static inline int
-+ext2_xattr_quota_alloc(struct inode *inode, int force)
-+{
-+      /* How can we enforce the allocation? */
-+#ifdef OLD_QUOTAS
-+      int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1);
-+      if (!error)
-+              inode->i_blocks += inode->i_sb->s_blocksize >> 9;
-+#else
-+      int error = DQUOT_ALLOC_BLOCK(inode, 1);
-+#endif
-+      return error;
-+}
-+
-+#ifdef OLD_QUOTAS
-+
-+static inline void
-+ext2_xattr_quota_free(struct inode *inode)
-+{
-+      DQUOT_FREE_BLOCK(inode->i_sb, inode, 1);
-+      inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
-+}
-+
-+static inline void
-+ext2_xattr_free_block(struct inode * inode, unsigned long block)
-+{
-+      ext2_free_blocks(inode, block, 1);
-+      inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
-+}
-+
-+#else
-+# define ext2_xattr_quota_free(inode) \
-+      DQUOT_FREE_BLOCK(inode, 1)
-+# define ext2_xattr_free_block(inode, block) \
-+      ext2_free_blocks(inode, block, 1)
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
-+
-+static inline struct buffer_head *
-+sb_bread(struct super_block *sb, int block)
-+{
-+      return bread(sb->s_dev, block, sb->s_blocksize);
-+}
-+
-+static inline struct buffer_head *
-+sb_getblk(struct super_block *sb, int block)
-+{
-+      return getblk(sb->s_dev, block, sb->s_blocksize);
-+}
-+
-+#endif
-+
-+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX];
-+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED;
-+
-+int
-+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler)
-+{
-+      int error = -EINVAL;
-+
-+      if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
-+              write_lock(&ext2_handler_lock);
-+              if (!ext2_xattr_handlers[name_index-1]) {
-+                      ext2_xattr_handlers[name_index-1] = handler;
-+                      error = 0;
-+              }
-+              write_unlock(&ext2_handler_lock);
-+      }
-+      return error;
-+}
-+
-+void
-+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler)
-+{
-+      if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) {
-+              write_lock(&ext2_handler_lock);
-+              ext2_xattr_handlers[name_index-1] = NULL;
-+              write_unlock(&ext2_handler_lock);
-+      }
-+}
-+
-+static inline const char *
-+strcmp_prefix(const char *a, const char *a_prefix)
-+{
-+      while (*a_prefix && *a == *a_prefix) {
-+              a++;
-+              a_prefix++;
-+      }
-+      return *a_prefix ? NULL : a;
-+}
-+
-+/*
-+ * Decode the extended attribute name, and translate it into
-+ * the name_index and name suffix.
-+ */
-+static struct ext2_xattr_handler *
-+ext2_xattr_resolve_name(const char **name)
-+{
-+      struct ext2_xattr_handler *handler = NULL;
-+      int i;
-+
-+      if (!*name)
-+              return NULL;
-+      read_lock(&ext2_handler_lock);
-+      for (i=0; i<EXT2_XATTR_INDEX_MAX; i++) {
-+              if (ext2_xattr_handlers[i]) {
-+                      const char *n = strcmp_prefix(*name,
-+                              ext2_xattr_handlers[i]->prefix);
-+                      if (n) {
-+                              handler = ext2_xattr_handlers[i];
-+                              *name = n;
-+                              break;
-+                      }
-+              }
-+      }
-+      read_unlock(&ext2_handler_lock);
-+      return handler;
-+}
-+
-+static inline struct ext2_xattr_handler *
-+ext2_xattr_handler(int name_index)
-+{
-+      struct ext2_xattr_handler *handler = NULL;
-+      if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) {
-+              read_lock(&ext2_handler_lock);
-+              handler = ext2_xattr_handlers[name_index-1];
-+              read_unlock(&ext2_handler_lock);
-+      }
-+      return handler;
-+}
-+
-+/*
-+ * Inode operation getxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+ssize_t
-+ext2_getxattr(struct dentry *dentry, const char *name,
-+            void *buffer, size_t size)
-+{
-+      struct ext2_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      handler = ext2_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->get(inode, name, buffer, size);
-+}
-+
-+/*
-+ * Inode operation listxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+ssize_t
-+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
-+{
-+      return ext2_xattr_list(dentry->d_inode, buffer, size);
-+}
-+
-+/*
-+ * Inode operation setxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+int
-+ext2_setxattr(struct dentry *dentry, const char *name,
-+            const void *value, size_t size, int flags)
-+{
-+      struct ext2_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      if (size == 0)
-+              value = "";  /* empty EA, do not remove */
-+      handler = ext2_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->set(inode, name, value, size, flags);
-+}
-+
-+/*
-+ * Inode operation removexattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+int
-+ext2_removexattr(struct dentry *dentry, const char *name)
-+{
-+      struct ext2_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      handler = ext2_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
-+}
-+
-+/*
-+ * ext2_xattr_get()
-+ *
-+ * Copy an extended attribute into the buffer
-+ * provided, or compute the buffer size required.
-+ * Buffer is NULL to compute the size of the buffer required.
-+ *
-+ * Returns a negative error number on failure, or the number of bytes
-+ * used / required on success.
-+ */
-+int
-+ext2_xattr_get(struct inode *inode, int name_index, const char *name,
-+             void *buffer, size_t buffer_size)
-+{
-+      struct buffer_head *bh = NULL;
-+      struct ext2_xattr_entry *entry;
-+      unsigned int block, size;
-+      char *end;
-+      int name_len, error;
-+
-+      ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
-+                name_index, name, buffer, (long)buffer_size);
-+
-+      if (name == NULL)
-+              return -EINVAL;
-+      if (!EXT2_I(inode)->i_file_acl)
-+              return -ENOATTR;
-+      block = EXT2_I(inode)->i_file_acl;
-+      ea_idebug(inode, "reading block %d", block);
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh)
-+              return -EIO;
-+      ea_bdebug(bh, "b_count=%d, refcount=%d",
-+              atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-+      end = bh->b_data + bh->b_size;
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+bad_block:    ext2_error(inode->i_sb, "ext2_xattr_get",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              error = -EIO;
-+              goto cleanup;
-+      }
-+      /* find named attribute */
-+      name_len = strlen(name);
-+
-+      error = -ERANGE;
-+      if (name_len > 255)
-+              goto cleanup;
-+      entry = FIRST_ENTRY(bh);
-+      while (!IS_LAST_ENTRY(entry)) {
-+              struct ext2_xattr_entry *next =
-+                      EXT2_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+              if (name_index == entry->e_name_index &&
-+                  name_len == entry->e_name_len &&
-+                  memcmp(name, entry->e_name, name_len) == 0)
-+                      goto found;
-+              entry = next;
-+      }
-+      /* Check the remaining name entries */
-+      while (!IS_LAST_ENTRY(entry)) {
-+              struct ext2_xattr_entry *next =
-+                      EXT2_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+              entry = next;
-+      }
-+      if (ext2_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      error = -ENOATTR;
-+      goto cleanup;
-+found:
-+      /* check the buffer size */
-+      if (entry->e_value_block != 0)
-+              goto bad_block;
-+      size = le32_to_cpu(entry->e_value_size);
-+      if (size > inode->i_sb->s_blocksize ||
-+          le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
-+              goto bad_block;
-+
-+      if (ext2_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      if (buffer) {
-+              error = -ERANGE;
-+              if (size > buffer_size)
-+                      goto cleanup;
-+              /* return value of attribute */
-+              memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-+                      size);
-+      }
-+      error = size;
-+
-+cleanup:
-+      brelse(bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * ext2_xattr_list()
-+ *
-+ * Copy a list of attribute names into the buffer
-+ * provided, or compute the buffer size required.
-+ * Buffer is NULL to compute the size of the buffer required.
-+ *
-+ * Returns a negative error number on failure, or the number of bytes
-+ * used / required on success.
-+ */
-+int
-+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
-+{
-+      struct buffer_head *bh = NULL;
-+      struct ext2_xattr_entry *entry;
-+      unsigned int block, size = 0;
-+      char *buf, *end;
-+      int error;
-+
-+      ea_idebug(inode, "buffer=%p, buffer_size=%ld",
-+                buffer, (long)buffer_size);
-+
-+      if (!EXT2_I(inode)->i_file_acl)
-+              return 0;
-+      block = EXT2_I(inode)->i_file_acl;
-+      ea_idebug(inode, "reading block %d", block);
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh)
-+              return -EIO;
-+      ea_bdebug(bh, "b_count=%d, refcount=%d",
-+              atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-+      end = bh->b_data + bh->b_size;
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+bad_block:    ext2_error(inode->i_sb, "ext2_xattr_list",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              error = -EIO;
-+              goto cleanup;
-+      }
-+      /* compute the size required for the list of attribute names */
-+      for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-+           entry = EXT2_XATTR_NEXT(entry)) {
-+              struct ext2_xattr_handler *handler;
-+              struct ext2_xattr_entry *next =
-+                      EXT2_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+
-+              handler = ext2_xattr_handler(entry->e_name_index);
-+              if (handler)
-+                      size += handler->list(NULL, inode, entry->e_name,
-+                                            entry->e_name_len);
-+      }
-+
-+      if (ext2_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      if (!buffer) {
-+              error = size;
-+              goto cleanup;
-+      } else {
-+              error = -ERANGE;
-+              if (size > buffer_size)
-+                      goto cleanup;
-+      }
-+
-+      /* list the attribute names */
-+      buf = buffer;
-+      for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-+           entry = EXT2_XATTR_NEXT(entry)) {
-+              struct ext2_xattr_handler *handler;
-+              
-+              handler = ext2_xattr_handler(entry->e_name_index);
-+              if (handler)
-+                      buf += handler->list(buf, inode, entry->e_name,
-+                                           entry->e_name_len);
-+      }
-+      error = size;
-+
-+cleanup:
-+      brelse(bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
-+ * not set, set it.
-+ */
-+static void ext2_xattr_update_super_block(struct super_block *sb)
-+{
-+      if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
-+              return;
-+
-+      lock_super(sb);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
-+      EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR;
-+#endif
-+      EXT2_SB(sb)->s_es->s_feature_compat |=
-+              cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR);
-+      sb->s_dirt = 1;
-+      mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
-+      unlock_super(sb);
-+}
-+
-+/*
-+ * ext2_xattr_set()
-+ *
-+ * Create, replace or remove an extended attribute for this inode. Buffer
-+ * is NULL to remove an existing extended attribute, and non-NULL to
-+ * either replace an existing extended attribute, or create a new extended
-+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
-+ * specify that an extended attribute must exist and must not exist
-+ * previous to the call, respectively.
-+ *
-+ * Returns 0, or a negative error number on failure.
-+ */
-+int
-+ext2_xattr_set(struct inode *inode, int name_index, const char *name,
-+             const void *value, size_t value_len, int flags)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      struct buffer_head *bh = NULL;
-+      struct ext2_xattr_header *header = NULL;
-+      struct ext2_xattr_entry *here, *last;
-+      unsigned int name_len;
-+      int block = EXT2_I(inode)->i_file_acl;
-+      int min_offs = sb->s_blocksize, not_found = 1, free, error;
-+      char *end;
-+      
-+      /*
-+       * header -- Points either into bh, or to a temporarily
-+       *           allocated buffer.
-+       * here -- The named entry found, or the place for inserting, within
-+       *         the block pointed to by header.
-+       * last -- Points right after the last named entry within the block
-+       *         pointed to by header.
-+       * min_offs -- The offset of the first value (values are aligned
-+       *             towards the end of the block).
-+       * end -- Points right after the block pointed to by header.
-+       */
-+      
-+      ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
-+                name_index, name, value, (long)value_len);
-+
-+      if (IS_RDONLY(inode))
-+              return -EROFS;
-+      if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-+              return -EPERM;
-+      if (value == NULL)
-+              value_len = 0;
-+      if (name == NULL)
-+              return -EINVAL;
-+      name_len = strlen(name);
-+      if (name_len > 255 || value_len > sb->s_blocksize)
-+              return -ERANGE;
-+      down(&ext2_xattr_sem);
-+
-+      if (block) {
-+              /* The inode already has an extended attribute block. */
-+
-+              bh = sb_bread(sb, block);
-+              error = -EIO;
-+              if (!bh)
-+                      goto cleanup;
-+              ea_bdebug(bh, "b_count=%d, refcount=%d",
-+                      atomic_read(&(bh->b_count)),
-+                      le32_to_cpu(HDR(bh)->h_refcount));
-+              header = HDR(bh);
-+              end = bh->b_data + bh->b_size;
-+              if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
-+                  header->h_blocks != cpu_to_le32(1)) {
-+bad_block:            ext2_error(sb, "ext2_xattr_set",
-+                              "inode %ld: bad block %d", inode->i_ino, block);
-+                      error = -EIO;
-+                      goto cleanup;
-+              }
-+              /* Find the named attribute. */
-+              here = FIRST_ENTRY(bh);
-+              while (!IS_LAST_ENTRY(here)) {
-+                      struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here);
-+                      if ((char *)next >= end)
-+                              goto bad_block;
-+                      if (!here->e_value_block && here->e_value_size) {
-+                              int offs = le16_to_cpu(here->e_value_offs);
-+                              if (offs < min_offs)
-+                                      min_offs = offs;
-+                      }
-+                      not_found = name_index - here->e_name_index;
-+                      if (!not_found)
-+                              not_found = name_len - here->e_name_len;
-+                      if (!not_found)
-+                              not_found = memcmp(name, here->e_name,name_len);
-+                      if (not_found <= 0)
-+                              break;
-+                      here = next;
-+              }
-+              last = here;
-+              /* We still need to compute min_offs and last. */
-+              while (!IS_LAST_ENTRY(last)) {
-+                      struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last);
-+                      if ((char *)next >= end)
-+                              goto bad_block;
-+                      if (!last->e_value_block && last->e_value_size) {
-+                              int offs = le16_to_cpu(last->e_value_offs);
-+                              if (offs < min_offs)
-+                                      min_offs = offs;
-+                      }
-+                      last = next;
-+              }
-+
-+              /* Check whether we have enough space left. */
-+              free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
-+      } else {
-+              /* We will use a new extended attribute block. */
-+              free = sb->s_blocksize -
-+                      sizeof(struct ext2_xattr_header) - sizeof(__u32);
-+              here = last = NULL;  /* avoid gcc uninitialized warning. */
-+      }
-+
-+      if (not_found) {
-+              /* Request to remove a nonexistent attribute? */
-+              error = -ENOATTR;
-+              if (flags & XATTR_REPLACE)
-+                      goto cleanup;
-+              error = 0;
-+              if (value == NULL)
-+                      goto cleanup;
-+              else
-+                      free -= EXT2_XATTR_LEN(name_len);
-+      } else {
-+              /* Request to create an existing attribute? */
-+              error = -EEXIST;
-+              if (flags & XATTR_CREATE)
-+                      goto cleanup;
-+              if (!here->e_value_block && here->e_value_size) {
-+                      unsigned int size = le32_to_cpu(here->e_value_size);
-+
-+                      if (le16_to_cpu(here->e_value_offs) + size > 
-+                          sb->s_blocksize || size > sb->s_blocksize)
-+                              goto bad_block;
-+                      free += EXT2_XATTR_SIZE(size);
-+              }
-+      }
-+      free -= EXT2_XATTR_SIZE(value_len);
-+      error = -ENOSPC;
-+      if (free < 0)
-+              goto cleanup;
-+
-+      /* Here we know that we can set the new attribute. */
-+
-+      if (header) {
-+              if (header->h_refcount == cpu_to_le32(1)) {
-+                      ea_bdebug(bh, "modifying in-place");
-+                      ext2_xattr_cache_remove(bh);
-+              } else {
-+                      int offset;
-+
-+                      ea_bdebug(bh, "cloning");
-+                      header = kmalloc(bh->b_size, GFP_KERNEL);
-+                      error = -ENOMEM;
-+                      if (header == NULL)
-+                              goto cleanup;
-+                      memcpy(header, HDR(bh), bh->b_size);
-+                      header->h_refcount = cpu_to_le32(1);
-+                      offset = (char *)header - bh->b_data;
-+                      here = ENTRY((char *)here + offset);
-+                      last = ENTRY((char *)last + offset);
-+              }
-+      } else {
-+              /* Allocate a buffer where we construct the new block. */
-+              header = kmalloc(sb->s_blocksize, GFP_KERNEL);
-+              error = -ENOMEM;
-+              if (header == NULL)
-+                      goto cleanup;
-+              memset(header, 0, sb->s_blocksize);
-+              end = (char *)header + sb->s_blocksize;
-+              header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
-+              header->h_blocks = header->h_refcount = cpu_to_le32(1);
-+              last = here = ENTRY(header+1);
-+      }
-+
-+      if (not_found) {
-+              /* Insert the new name. */
-+              int size = EXT2_XATTR_LEN(name_len);
-+              int rest = (char *)last - (char *)here;
-+              memmove((char *)here + size, here, rest);
-+              memset(here, 0, size);
-+              here->e_name_index = name_index;
-+              here->e_name_len = name_len;
-+              memcpy(here->e_name, name, name_len);
-+      } else {
-+              /* Remove the old value. */
-+              if (!here->e_value_block && here->e_value_size) {
-+                      char *first_val = (char *)header + min_offs;
-+                      int offs = le16_to_cpu(here->e_value_offs);
-+                      char *val = (char *)header + offs;
-+                      size_t size = EXT2_XATTR_SIZE(
-+                              le32_to_cpu(here->e_value_size));
-+                      memmove(first_val + size, first_val, val - first_val);
-+                      memset(first_val, 0, size);
-+                      here->e_value_offs = 0;
-+                      min_offs += size;
-+
-+                      /* Adjust all value offsets. */
-+                      last = ENTRY(header+1);
-+                      while (!IS_LAST_ENTRY(last)) {
-+                              int o = le16_to_cpu(last->e_value_offs);
-+                              if (!last->e_value_block && o < offs)
-+                                      last->e_value_offs =
-+                                              cpu_to_le16(o + size);
-+                              last = EXT2_XATTR_NEXT(last);
-+                      }
-+              }
-+              if (value == NULL) {
-+                      /* Remove this attribute. */
-+                      if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) {
-+                              /* This block is now empty. */
-+                              error = ext2_xattr_set2(inode, bh, NULL);
-+                              goto cleanup;
-+                      } else {
-+                              /* Remove the old name. */
-+                              int size = EXT2_XATTR_LEN(name_len);
-+                              last = ENTRY((char *)last - size);
-+                              memmove(here, (char*)here + size,
-+                                      (char*)last - (char*)here);
-+                              memset(last, 0, size);
-+                      }
-+              }
-+      }
-+
-+      if (value != NULL) {
-+              /* Insert the new value. */
-+              here->e_value_size = cpu_to_le32(value_len);
-+              if (value_len) {
-+                      size_t size = EXT2_XATTR_SIZE(value_len);
-+                      char *val = (char *)header + min_offs - size;
-+                      here->e_value_offs =
-+                              cpu_to_le16((char *)val - (char *)header);
-+                      memset(val + size - EXT2_XATTR_PAD, 0,
-+                             EXT2_XATTR_PAD); /* Clear the pad bytes. */
-+                      memcpy(val, value, value_len);
-+              }
-+      }
-+      ext2_xattr_rehash(header, here);
-+
-+      error = ext2_xattr_set2(inode, bh, header);
-+
-+cleanup:
-+      brelse(bh);
-+      if (!(bh && header == HDR(bh)))
-+              kfree(header);
-+      up(&ext2_xattr_sem);
-+
-+      return error;
-+}
-+
-+/*
-+ * Second half of ext2_xattr_set(): Update the file system.
-+ */
-+static int
-+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
-+              struct ext2_xattr_header *header)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      struct buffer_head *new_bh = NULL;
-+      int error;
-+
-+      if (header) {
-+              new_bh = ext2_xattr_cache_find(inode, header);
-+              if (new_bh) {
-+                      /*
-+                       * We found an identical block in the cache.
-+                       * The old block will be released after updating
-+                       * the inode.
-+                       */
-+                      ea_bdebug(old_bh, "reusing block %ld",
-+                              new_bh->b_blocknr);
-+                      
-+                      error = -EDQUOT;
-+                      if (ext2_xattr_quota_alloc(inode, 1))
-+                              goto cleanup;
-+                      
-+                      HDR(new_bh)->h_refcount = cpu_to_le32(
-+                              le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
-+                      ea_bdebug(new_bh, "refcount now=%d",
-+                              le32_to_cpu(HDR(new_bh)->h_refcount));
-+              } else if (old_bh && header == HDR(old_bh)) {
-+                      /* Keep this block. */
-+                      new_bh = old_bh;
-+                      ext2_xattr_cache_insert(new_bh);
-+              } else {
-+                      /* We need to allocate a new block */
-+                      int force = EXT2_I(inode)->i_file_acl != 0;
-+                      int block = ext2_xattr_new_block(inode, &error, force);
-+                      if (error)
-+                              goto cleanup;
-+                      ea_idebug(inode, "creating block %d", block);
-+
-+                      new_bh = sb_getblk(sb, block);
-+                      if (!new_bh) {
-+                              ext2_xattr_free_block(inode, block);
-+                              error = -EIO;
-+                              goto cleanup;
-+                      }
-+                      lock_buffer(new_bh);
-+                      memcpy(new_bh->b_data, header, new_bh->b_size);
-+                      mark_buffer_uptodate(new_bh, 1);
-+                      unlock_buffer(new_bh);
-+                      ext2_xattr_cache_insert(new_bh);
-+                      
-+                      ext2_xattr_update_super_block(sb);
-+              }
-+              mark_buffer_dirty(new_bh);
-+              if (IS_SYNC(inode)) {
-+                      ll_rw_block(WRITE, 1, &new_bh);
-+                      wait_on_buffer(new_bh); 
-+                      error = -EIO;
-+                      if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
-+                              goto cleanup;
-+              }
-+      }
-+
-+      /* Update the inode. */
-+      EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-+      inode->i_ctime = CURRENT_TIME;
-+      if (IS_SYNC(inode)) {
-+              error = ext2_sync_inode (inode);
-+              if (error)
-+                      goto cleanup;
-+      } else
-+              mark_inode_dirty(inode);
-+
-+      error = 0;
-+      if (old_bh && old_bh != new_bh) {
-+              /*
-+               * If there was an old block, and we are not still using it,
-+               * we now release the old block.
-+              */
-+              unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
-+
-+              if (refcount == 1) {
-+                      /* Free the old block. */
-+                      ea_bdebug(old_bh, "freeing");
-+                      ext2_xattr_free_block(inode, old_bh->b_blocknr);
-+                      mark_buffer_clean(old_bh);
-+              } else {
-+                      /* Decrement the refcount only. */
-+                      refcount--;
-+                      HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
-+                      ext2_xattr_quota_free(inode);
-+                      mark_buffer_dirty(old_bh);
-+                      ea_bdebug(old_bh, "refcount now=%d", refcount);
-+              }
-+      }
-+
-+cleanup:
-+      if (old_bh != new_bh)
-+              brelse(new_bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * ext2_xattr_delete_inode()
-+ *
-+ * Free extended attribute resources associated with this inode. This
-+ * is called immediately before an inode is freed.
-+ */
-+void
-+ext2_xattr_delete_inode(struct inode *inode)
-+{
-+      struct buffer_head *bh;
-+      unsigned int block = EXT2_I(inode)->i_file_acl;
-+
-+      if (!block)
-+              return;
-+      down(&ext2_xattr_sem);
-+
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh) {
-+              ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
-+                      "inode %ld: block %d read error", inode->i_ino, block);
-+              goto cleanup;
-+      }
-+      ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+              ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              goto cleanup;
-+      }
-+      ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
-+      if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-+              ext2_xattr_cache_remove(bh);
-+              ext2_xattr_free_block(inode, block);
-+              bforget(bh);
-+              bh = NULL;
-+      } else {
-+              HDR(bh)->h_refcount = cpu_to_le32(
-+                      le32_to_cpu(HDR(bh)->h_refcount) - 1);
-+              mark_buffer_dirty(bh);
-+              if (IS_SYNC(inode)) {
-+                      ll_rw_block(WRITE, 1, &bh);
-+                      wait_on_buffer(bh);
-+              }
-+              ext2_xattr_quota_free(inode);
-+      }
-+      EXT2_I(inode)->i_file_acl = 0;
-+
-+cleanup:
-+      brelse(bh);
-+      up(&ext2_xattr_sem);
-+}
-+
-+/*
-+ * ext2_xattr_put_super()
-+ *
-+ * This is called when a file system is unmounted.
-+ */
-+void
-+ext2_xattr_put_super(struct super_block *sb)
-+{
-+#ifdef CONFIG_EXT2_FS_XATTR_SHARING
-+      mb_cache_shrink(ext2_xattr_cache, sb->s_dev);
-+#endif
-+}
-+
-+#ifdef CONFIG_EXT2_FS_XATTR_SHARING
-+
-+/*
-+ * ext2_xattr_cache_insert()
-+ *
-+ * Create a new entry in the extended attribute cache, and insert
-+ * it unless such an entry is already in the cache.
-+ *
-+ * Returns 0, or a negative error number on failure.
-+ */
-+static int
-+ext2_xattr_cache_insert(struct buffer_head *bh)
-+{
-+      __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
-+      struct mb_cache_entry *ce;
-+      int error;
-+
-+      ce = mb_cache_entry_alloc(ext2_xattr_cache);
-+      if (!ce)
-+              return -ENOMEM;
-+      error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash);
-+      if (error) {
-+              mb_cache_entry_free(ce);
-+              if (error == -EBUSY) {
-+                      ea_bdebug(bh, "already in cache (%d cache entries)",
-+                              atomic_read(&ext2_xattr_cache->c_entry_count));
-+                      error = 0;
-+              }
-+      } else {
-+              ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
-+                        atomic_read(&ext2_xattr_cache->c_entry_count));
-+              mb_cache_entry_release(ce);
-+      }
-+      return error;
-+}
-+
-+/*
-+ * ext2_xattr_cmp()
-+ *
-+ * Compare two extended attribute blocks for equality.
-+ *
-+ * Returns 0 if the blocks are equal, 1 if they differ, and
-+ * a negative error number on errors.
-+ */
-+static int
-+ext2_xattr_cmp(struct ext2_xattr_header *header1,
-+             struct ext2_xattr_header *header2)
-+{
-+      struct ext2_xattr_entry *entry1, *entry2;
-+
-+      entry1 = ENTRY(header1+1);
-+      entry2 = ENTRY(header2+1);
-+      while (!IS_LAST_ENTRY(entry1)) {
-+              if (IS_LAST_ENTRY(entry2))
-+                      return 1;
-+              if (entry1->e_hash != entry2->e_hash ||
-+                  entry1->e_name_len != entry2->e_name_len ||
-+                  entry1->e_value_size != entry2->e_value_size ||
-+                  memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
-+                      return 1;
-+              if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
-+                      return -EIO;
-+              if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
-+                         (char *)header2 + le16_to_cpu(entry2->e_value_offs),
-+                         le32_to_cpu(entry1->e_value_size)))
-+                      return 1;
-+
-+              entry1 = EXT2_XATTR_NEXT(entry1);
-+              entry2 = EXT2_XATTR_NEXT(entry2);
-+      }
-+      if (!IS_LAST_ENTRY(entry2))
-+              return 1;
-+      return 0;
-+}
-+
-+/*
-+ * ext2_xattr_cache_find()
-+ *
-+ * Find an identical extended attribute block.
-+ *
-+ * Returns a pointer to the block found, or NULL if such a block was
-+ * not found or an error occurred.
-+ */
-+static struct buffer_head *
-+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
-+{
-+      __u32 hash = le32_to_cpu(header->h_hash);
-+      struct mb_cache_entry *ce;
-+
-+      if (!header->h_hash)
-+              return NULL;  /* never share */
-+      ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-+      ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash);
-+      while (ce) {
-+              struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
-+
-+              if (!bh) {
-+                      ext2_error(inode->i_sb, "ext2_xattr_cache_find",
-+                              "inode %ld: block %ld read error",
-+                              inode->i_ino, ce->e_block);
-+              } else if (le32_to_cpu(HDR(bh)->h_refcount) >
-+                         EXT2_XATTR_REFCOUNT_MAX) {
-+                      ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block,
-+                              le32_to_cpu(HDR(bh)->h_refcount),
-+                              EXT2_XATTR_REFCOUNT_MAX);
-+              } else if (!ext2_xattr_cmp(header, HDR(bh))) {
-+                      ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
-+                      mb_cache_entry_release(ce);
-+                      return bh;
-+              }
-+              brelse(bh);
-+              ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash);
-+      }
-+      return NULL;
-+}
-+
-+/*
-+ * ext2_xattr_cache_remove()
-+ *
-+ * Remove the cache entry of a block from the cache. Called when a
-+ * block becomes invalid.
-+ */
-+static void
-+ext2_xattr_cache_remove(struct buffer_head *bh)
-+{
-+      struct mb_cache_entry *ce;
-+
-+      ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr);
-+      if (ce) {
-+              ea_bdebug(bh, "removing (%d cache entries remaining)",
-+                        atomic_read(&ext2_xattr_cache->c_entry_count)-1);
-+              mb_cache_entry_free(ce);
-+      } else 
-+              ea_bdebug(bh, "no cache entry");
-+}
-+
-+#define NAME_HASH_SHIFT 5
-+#define VALUE_HASH_SHIFT 16
-+
-+/*
-+ * ext2_xattr_hash_entry()
-+ *
-+ * Compute the hash of an extended attribute.
-+ */
-+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
-+                                       struct ext2_xattr_entry *entry)
-+{
-+      __u32 hash = 0;
-+      char *name = entry->e_name;
-+      int n;
-+
-+      for (n=0; n < entry->e_name_len; n++) {
-+              hash = (hash << NAME_HASH_SHIFT) ^
-+                     (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
-+                     *name++;
-+      }
-+
-+      if (entry->e_value_block == 0 && entry->e_value_size != 0) {
-+              __u32 *value = (__u32 *)((char *)header +
-+                      le16_to_cpu(entry->e_value_offs));
-+              for (n = (le32_to_cpu(entry->e_value_size) +
-+                   EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
-+                      hash = (hash << VALUE_HASH_SHIFT) ^
-+                             (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
-+                             le32_to_cpu(*value++);
-+              }
-+      }
-+      entry->e_hash = cpu_to_le32(hash);
-+}
-+
-+#undef NAME_HASH_SHIFT
-+#undef VALUE_HASH_SHIFT
-+
-+#define BLOCK_HASH_SHIFT 16
-+
-+/*
-+ * ext2_xattr_rehash()
-+ *
-+ * Re-compute the extended attribute hash value after an entry has changed.
-+ */
-+static void ext2_xattr_rehash(struct ext2_xattr_header *header,
-+                            struct ext2_xattr_entry *entry)
-+{
-+      struct ext2_xattr_entry *here;
-+      __u32 hash = 0;
-+      
-+      ext2_xattr_hash_entry(header, entry);
-+      here = ENTRY(header+1);
-+      while (!IS_LAST_ENTRY(here)) {
-+              if (!here->e_hash) {
-+                      /* Block is not shared if an entry's hash value == 0 */
-+                      hash = 0;
-+                      break;
-+              }
-+              hash = (hash << BLOCK_HASH_SHIFT) ^
-+                     (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
-+                     le32_to_cpu(here->e_hash);
-+              here = EXT2_XATTR_NEXT(here);
-+      }
-+      header->h_hash = cpu_to_le32(hash);
-+}
-+
-+#undef BLOCK_HASH_SHIFT
-+
-+int __init
-+init_ext2_xattr(void)
-+{
-+      ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
-+              sizeof(struct mb_cache_entry) +
-+              sizeof(struct mb_cache_entry_index), 1, 61);
-+      if (!ext2_xattr_cache)
-+              return -ENOMEM;
-+
-+      return 0;
-+}
-+
-+void
-+exit_ext2_xattr(void)
-+{
-+      mb_cache_destroy(ext2_xattr_cache);
-+}
-+
-+#else  /* CONFIG_EXT2_FS_XATTR_SHARING */
-+
-+int __init
-+init_ext2_xattr(void)
-+{
-+      return 0;
-+}
-+
-+void
-+exit_ext2_xattr(void)
-+{
-+}
-+
-+#endif  /* CONFIG_EXT2_FS_XATTR_SHARING */
-Index: linux-DRV401/fs/ext2/xattr_user.c
-===================================================================
---- linux-DRV401.orig/fs/ext2/xattr_user.c     2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/ext2/xattr_user.c  2004-10-15 11:03:51.000000000 -0700
-@@ -0,0 +1,103 @@
-+/*
-+ * linux/fs/ext2/xattr_user.c
-+ * Handler for extended user attributes.
-+ *
-+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/fs.h>
-+#include <linux/ext2_fs.h>
-+#include <linux/ext2_xattr.h>
-+
-+#ifdef CONFIG_EXT2_FS_POSIX_ACL
-+# include <linux/ext2_acl.h>
-+#endif
-+
-+#define XATTR_USER_PREFIX "user."
-+
-+static size_t
-+ext2_xattr_user_list(char *list, struct inode *inode,
-+                   const char *name, int name_len)
-+{
-+      const int prefix_len = sizeof(XATTR_USER_PREFIX)-1;
-+
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return 0;
-+
-+      if (list) {
-+              memcpy(list, XATTR_USER_PREFIX, prefix_len);
-+              memcpy(list+prefix_len, name, name_len);
-+              list[prefix_len + name_len] = '\0';
-+      }
-+      return prefix_len + name_len + 1;
-+}
-+
-+static int
-+ext2_xattr_user_get(struct inode *inode, const char *name,
-+                  void *buffer, size_t size)
-+{
-+      int error;
-+
-+      if (strcmp(name, "") == 0)
-+              return -EINVAL;
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return -ENOTSUP;
-+#ifdef CONFIG_EXT2_FS_POSIX_ACL
-+      error = ext2_permission_locked(inode, MAY_READ);
-+#else
-+      error = permission(inode, MAY_READ);
-+#endif
-+      if (error)
-+              return error;
-+
-+      return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name,
-+                            buffer, size);
-+}
-+
-+static int
-+ext2_xattr_user_set(struct inode *inode, const char *name,
-+                  const void *value, size_t size, int flags)
-+{
-+      int error;
-+
-+      if (strcmp(name, "") == 0)
-+              return -EINVAL;
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return -ENOTSUP;
-+      if ( !S_ISREG(inode->i_mode) &&
-+          (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-+              return -EPERM;
-+#ifdef CONFIG_EXT2_FS_POSIX_ACL
-+      error = ext2_permission_locked(inode, MAY_WRITE);
-+#else
-+      error = permission(inode, MAY_WRITE);
-+#endif
-+      if (error)
-+              return error;
-+
-+      return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
-+                            value, size, flags);
-+}
-+
-+struct ext2_xattr_handler ext2_xattr_user_handler = {
-+      prefix: XATTR_USER_PREFIX,
-+      list:   ext2_xattr_user_list,
-+      get:    ext2_xattr_user_get,
-+      set:    ext2_xattr_user_set,
-+};
-+
-+int __init
-+init_ext2_xattr_user(void)
-+{
-+      return ext2_xattr_register(EXT2_XATTR_INDEX_USER,
-+                                 &ext2_xattr_user_handler);
-+}
-+
-+void
-+exit_ext2_xattr_user(void)
-+{
-+      ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
-+                            &ext2_xattr_user_handler);
-+}
-Index: linux-DRV401/fs/ext3/Makefile
-===================================================================
---- linux-DRV401.orig/fs/ext3/Makefile 2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/fs/ext3/Makefile      2004-10-15 11:03:51.000000000 -0700
-@@ -1,5 +1,5 @@
- #
--# Makefile for the linux ext2-filesystem routines.
-+# Makefile for the linux ext3-filesystem routines.
- #
- # Note! Dependencies are done automagically by 'make dep', which also
- # removes any old dependencies. DON'T put your own dependencies here
-@@ -9,8 +9,14 @@
- O_TARGET := ext3.o
-+export-objs := ext3-exports.o
-+
- obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
--              ioctl.o namei.o super.o symlink.o hash.o
-+              ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
- obj-m    := $(O_TARGET)
-+export-objs += xattr.o
-+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o
-+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
-+
- include $(TOPDIR)/Rules.make
-Index: linux-DRV401/fs/ext3/file.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/file.c   2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/fs/ext3/file.c        2004-10-15 11:03:51.000000000 -0700
-@@ -23,6 +23,7 @@
- #include <linux/locks.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
-+#include <linux/ext3_xattr.h>
- #include <linux/ext3_jbd.h>
- #include <linux/smp_lock.h>
-@@ -93,5 +94,9 @@
- struct inode_operations ext3_file_inode_operations = {
-       truncate:       ext3_truncate,          /* BKL held */
-       setattr:        ext3_setattr,           /* BKL held */
-+      setxattr:       ext3_setxattr,          /* BKL held */
-+      getxattr:       ext3_getxattr,          /* BKL held */
-+      listxattr:      ext3_listxattr,         /* BKL held */
-+      removexattr:    ext3_removexattr,       /* BKL held */
- };
-Index: linux-DRV401/fs/ext3/ialloc.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/ialloc.c 2004-10-15 10:24:00.000000000 -0700
-+++ linux-DRV401/fs/ext3/ialloc.c      2004-10-15 11:03:52.000000000 -0700
-@@ -17,6 +17,7 @@
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
- #include <linux/stat.h>
- #include <linux/string.h>
- #include <linux/locks.h>
-@@ -216,6 +217,7 @@
-        * as writing the quota to disk may need the lock as well.
-        */
-       DQUOT_INIT(inode);
-+      ext3_xattr_delete_inode(handle, inode);
-       DQUOT_FREE_INODE(inode);
-       DQUOT_DROP(inode);
-Index: linux-DRV401/fs/ext3/inode.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/inode.c  2004-10-15 10:24:00.000000000 -0700
-+++ linux-DRV401/fs/ext3/inode.c       2004-10-15 11:03:52.000000000 -0700
-@@ -39,6 +39,18 @@
-  */
- #undef SEARCH_FROM_ZERO
-+/*
-+ * Test whether an inode is a fast symlink.
-+ */
-+static inline int ext3_inode_is_fast_symlink(struct inode *inode)
-+{
-+      int ea_blocks = inode->u.ext3_i.i_file_acl ?
-+              (inode->i_sb->s_blocksize >> 9) : 0;
-+
-+      return (S_ISLNK(inode->i_mode) &&
-+              inode->i_blocks - ea_blocks == 0);
-+}
-+
- /* The ext3 forget function must perform a revoke if we are freeing data
-  * which has been journaled.  Metadata (eg. indirect blocks) must be
-  * revoked in all cases. 
-@@ -48,7 +60,7 @@
-  * still needs to be revoked.
-  */
--static int ext3_forget(handle_t *handle, int is_metadata,
-+int ext3_forget(handle_t *handle, int is_metadata,
-                      struct inode *inode, struct buffer_head *bh,
-                      int blocknr)
- {
-@@ -164,9 +176,7 @@
- {
-       handle_t *handle;
-       
--      if (is_bad_inode(inode) ||
--          inode->i_ino == EXT3_ACL_IDX_INO ||
--          inode->i_ino == EXT3_ACL_DATA_INO)
-+      if (is_bad_inode(inode))
-               goto no_delete;
-       lock_kernel();
-@@ -1843,6 +1853,8 @@
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-           S_ISLNK(inode->i_mode)))
-               return;
-+      if (ext3_inode_is_fast_symlink(inode))
-+              return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
-@@ -1990,8 +2002,6 @@
-       struct ext3_group_desc * gdp;
-               
-       if ((inode->i_ino != EXT3_ROOT_INO &&
--              inode->i_ino != EXT3_ACL_IDX_INO &&
--              inode->i_ino != EXT3_ACL_DATA_INO &&
-               inode->i_ino != EXT3_JOURNAL_INO &&
-               inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
-               inode->i_ino > le32_to_cpu(
-@@ -2118,10 +2128,7 @@
-       brelse (iloc.bh);
--      if (inode->i_ino == EXT3_ACL_IDX_INO ||
--          inode->i_ino == EXT3_ACL_DATA_INO)
--              /* Nothing to do */ ;
--      else if (S_ISREG(inode->i_mode)) {
-+      if (S_ISREG(inode->i_mode)) {
-               inode->i_op = &ext3_file_inode_operations;
-               inode->i_fop = &ext3_file_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
-@@ -2129,15 +2136,17 @@
-               inode->i_op = &ext3_dir_inode_operations;
-               inode->i_fop = &ext3_dir_operations;
-       } else if (S_ISLNK(inode->i_mode)) {
--              if (!inode->i_blocks)
-+              if (ext3_inode_is_fast_symlink(inode))
-                       inode->i_op = &ext3_fast_symlink_inode_operations;
-               else {
--                      inode->i_op = &page_symlink_inode_operations;
-+                      inode->i_op = &ext3_symlink_inode_operations;
-                       inode->i_mapping->a_ops = &ext3_aops;
-               }
--      } else 
-+      } else {
-+              inode->i_op = &ext3_special_inode_operations;
-               init_special_inode(inode, inode->i_mode,
-                                  le32_to_cpu(iloc.raw_inode->i_block[0]));
-+      }
-       /* inode->i_attr_flags = 0;                             unused */
-       if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
-               /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
-Index: linux-DRV401/fs/ext3/namei.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/namei.c  2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/fs/ext3/namei.c       2004-10-15 11:03:52.000000000 -0700
-@@ -29,6 +29,7 @@
- #include <linux/sched.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
- #include <linux/fcntl.h>
- #include <linux/stat.h>
- #include <linux/string.h>
-@@ -1612,7 +1613,7 @@
-       if (IS_SYNC(dir))
-               handle->h_sync = 1;
--      inode = ext3_new_inode (handle, dir, S_IFDIR);
-+      inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
-       err = PTR_ERR(inode);
-       if (IS_ERR(inode))
-               goto out_stop;
-@@ -1620,7 +1621,6 @@
-       inode->i_op = &ext3_dir_inode_operations;
-       inode->i_fop = &ext3_dir_operations;
-       inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
--      inode->i_blocks = 0;    
-       dir_block = ext3_bread (handle, inode, 0, 1, &err);
-       if (!dir_block) {
-               inode->i_nlink--; /* is this nlink == 0? */
-@@ -1647,9 +1647,6 @@
-       BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
-       ext3_journal_dirty_metadata(handle, dir_block);
-       brelse (dir_block);
--      inode->i_mode = S_IFDIR | mode;
--      if (dir->i_mode & S_ISGID)
--              inode->i_mode |= S_ISGID;
-       ext3_mark_inode_dirty(handle, inode);
-       err = ext3_add_entry (handle, dentry, inode);
-       if (err) {
-@@ -2018,7 +2015,7 @@
-               goto out_stop;
-       if (l > sizeof (EXT3_I(inode)->i_data)) {
--              inode->i_op = &page_symlink_inode_operations;
-+              inode->i_op = &ext3_symlink_inode_operations;
-               inode->i_mapping->a_ops = &ext3_aops;
-               /*
-                * block_symlink() calls back into ext3_prepare/commit_write.
-@@ -2245,4 +2242,16 @@
-       rmdir:          ext3_rmdir,             /* BKL held */
-       mknod:          ext3_mknod,             /* BKL held */
-       rename:         ext3_rename,            /* BKL held */
-+      setxattr:       ext3_setxattr,          /* BKL held */
-+      getxattr:       ext3_getxattr,          /* BKL held */
-+      listxattr:      ext3_listxattr,         /* BKL held */
-+      removexattr:    ext3_removexattr,       /* BKL held */
- };
-+
-+struct inode_operations ext3_special_inode_operations = {
-+      setxattr:       ext3_setxattr,          /* BKL held */
-+      getxattr:       ext3_getxattr,          /* BKL held */
-+      listxattr:      ext3_listxattr,         /* BKL held */
-+      removexattr:    ext3_removexattr,       /* BKL held */
-+};
-+
-Index: linux-DRV401/fs/ext3/super.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/super.c  2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/fs/ext3/super.c       2004-10-15 11:03:52.000000000 -0700
-@@ -24,6 +24,7 @@
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
- #include <linux/slab.h>
- #include <linux/init.h>
- #include <linux/locks.h>
-@@ -404,6 +405,7 @@
-       kdev_t j_dev = sbi->s_journal->j_dev;
-       int i;
-+      ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-       if (!(sb->s_flags & MS_RDONLY)) {
-               EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-@@ -499,6 +501,7 @@
-                         int is_remount)
- {
-       unsigned long *mount_options = &sbi->s_mount_opt;
-+      
-       uid_t *resuid = &sbi->s_resuid;
-       gid_t *resgid = &sbi->s_resgid;
-       char * this_char;
-@@ -511,6 +514,13 @@
-            this_char = strtok (NULL, ",")) {
-               if ((value = strchr (this_char, '=')) != NULL)
-                       *value++ = 0;
-+#ifdef CONFIG_EXT3_FS_XATTR_USER
-+              if (!strcmp (this_char, "user_xattr"))
-+                      set_opt (*mount_options, XATTR_USER);
-+              else if (!strcmp (this_char, "nouser_xattr"))
-+                      clear_opt (*mount_options, XATTR_USER);
-+              else
-+#endif
-               if (!strcmp (this_char, "bsddf"))
-                       clear_opt (*mount_options, MINIX_DF);
-               else if (!strcmp (this_char, "nouid32")) {
-@@ -924,6 +934,12 @@
-       sbi->s_mount_opt = 0;
-       sbi->s_resuid = EXT3_DEF_RESUID;
-       sbi->s_resgid = EXT3_DEF_RESGID;
-+
-+      /* Default extended attribute flags */
-+#ifdef CONFIG_EXT3_FS_XATTR_USER
-+      /* set_opt(sbi->s_mount_opt, XATTR_USER); */
-+#endif
-+
-       if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) {
-               sb->s_dev = 0;
-               goto out_fail;
-@@ -1742,12 +1758,27 @@
- static int __init init_ext3_fs(void)
- {
--        return register_filesystem(&ext3_fs_type);
-+      int error = init_ext3_xattr();
-+      if (error)
-+              return error;
-+      error = init_ext3_xattr_user();
-+      if (error)
-+              goto fail;
-+      error = register_filesystem(&ext3_fs_type);
-+      if (!error)
-+              return 0;
-+      
-+      exit_ext3_xattr_user();
-+fail:
-+      exit_ext3_xattr();
-+      return error;
- }
- static void __exit exit_ext3_fs(void)
- {
-       unregister_filesystem(&ext3_fs_type);
-+      exit_ext3_xattr_user();
-+      exit_ext3_xattr();
- }
- EXPORT_SYMBOL(ext3_force_commit);
-Index: linux-DRV401/fs/ext3/symlink.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/symlink.c        2004-10-15 10:24:00.000000000 -0700
-+++ linux-DRV401/fs/ext3/symlink.c     2004-10-15 11:03:52.000000000 -0700
-@@ -20,6 +20,7 @@
- #include <linux/fs.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
-+#include <linux/ext3_xattr.h>
- static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen)
- {
-@@ -33,7 +34,20 @@
-       return vfs_follow_link(nd, s);
- }
-+struct inode_operations ext3_symlink_inode_operations = {
-+      readlink:       page_readlink,          /* BKL not held.  Don't need */
-+      follow_link:    page_follow_link,       /* BKL not held.  Don't need */
-+      setxattr:       ext3_setxattr,          /* BKL held */
-+      getxattr:       ext3_getxattr,          /* BKL held */
-+      listxattr:      ext3_listxattr,         /* BKL held */
-+      removexattr:    ext3_removexattr,       /* BKL held */
-+};
-+
- struct inode_operations ext3_fast_symlink_inode_operations = {
-       readlink:       ext3_readlink,          /* BKL not held.  Don't need */
-       follow_link:    ext3_follow_link,       /* BKL not held.  Don't need */
-+      setxattr:       ext3_setxattr,          /* BKL held */
-+      getxattr:       ext3_getxattr,          /* BKL held */
-+      listxattr:      ext3_listxattr,         /* BKL held */
-+      removexattr:    ext3_removexattr,       /* BKL held */
- };
-Index: linux-DRV401/fs/ext3/xattr.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/xattr.c  2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/ext3/xattr.c       2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,1225 @@
-+/*
-+ * linux/fs/ext3/xattr.c
-+ *
-+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ *
-+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
-+ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
-+ * Extended attributes for symlinks and special files added per
-+ *  suggestion of Luka Renko <luka.renko@hermes.si>.
-+ */
-+
-+/*
-+ * Extended attributes are stored on disk blocks allocated outside of
-+ * any inode. The i_file_acl field is then made to point to this allocated
-+ * block. If all extended attributes of an inode are identical, these
-+ * inodes may share the same extended attribute block. Such situations
-+ * are automatically detected by keeping a cache of recent attribute block
-+ * numbers and hashes over the block's contents in memory.
-+ *
-+ *
-+ * Extended attribute block layout:
-+ *
-+ *   +------------------+
-+ *   | header           |
-+ *   | entry 1          | |
-+ *   | entry 2          | | growing downwards
-+ *   | entry 3          | v
-+ *   | four null bytes  |
-+ *   | . . .            |
-+ *   | value 1          | ^
-+ *   | value 3          | | growing upwards
-+ *   | value 2          | |
-+ *   +------------------+
-+ *
-+ * The block header is followed by multiple entry descriptors. These entry
-+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
-+ * byte boundaries. The entry descriptors are sorted by attribute name,
-+ * so that two extended attribute blocks can be compared efficiently.
-+ *
-+ * Attribute values are aligned to the end of the block, stored in
-+ * no specific order. They are also padded to EXT3_XATTR_PAD byte
-+ * boundaries. No additional gaps are left between them.
-+ *
-+ * Locking strategy
-+ * ----------------
-+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of
-+ * the xattr inode operations are called, so we are guaranteed that only one
-+ * processes accesses extended attributes of an inode at any time.
-+ *
-+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that
-+ * only a single process is modifying an extended attribute block, even
-+ * if the block is shared among inodes.
-+ *
-+ * Note for porting to 2.5
-+ * -----------------------
-+ * The BKL will no longer be held in the xattr inode operations.
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/fs.h>
-+#include <linux/locks.h>
-+#include <linux/slab.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_xattr.h>
-+#include <linux/mbcache.h>
-+#include <linux/quotaops.h>
-+#include <asm/semaphore.h>
-+#include <linux/compatmac.h>
-+
-+#define EXT3_EA_USER "user."
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
-+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
-+#endif
-+
-+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
-+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
-+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
-+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
-+
-+#ifdef EXT3_XATTR_DEBUG
-+# define ea_idebug(inode, f...) do { \
-+              printk(KERN_DEBUG "inode %s:%ld: ", \
-+                      kdevname(inode->i_dev), inode->i_ino); \
-+              printk(f); \
-+              printk("\n"); \
-+      } while (0)
-+# define ea_bdebug(bh, f...) do { \
-+              printk(KERN_DEBUG "block %s:%ld: ", \
-+                      kdevname(bh->b_dev), bh->b_blocknr); \
-+              printk(f); \
-+              printk("\n"); \
-+      } while (0)
-+#else
-+# define ea_idebug(f...)
-+# define ea_bdebug(f...)
-+#endif
-+
-+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *,
-+                         struct ext3_xattr_header *);
-+
-+#ifdef CONFIG_EXT3_FS_XATTR_SHARING
-+
-+static int ext3_xattr_cache_insert(struct buffer_head *);
-+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
-+                                               struct ext3_xattr_header *);
-+static void ext3_xattr_cache_remove(struct buffer_head *);
-+static void ext3_xattr_rehash(struct ext3_xattr_header *,
-+                            struct ext3_xattr_entry *);
-+
-+static struct mb_cache *ext3_xattr_cache;
-+
-+#else
-+# define ext3_xattr_cache_insert(bh) 0
-+# define ext3_xattr_cache_find(inode, header) NULL
-+# define ext3_xattr_cache_remove(bh) while(0) {}
-+# define ext3_xattr_rehash(header, entry) while(0) {}
-+#endif
-+
-+/*
-+ * If a file system does not share extended attributes among inodes,
-+ * we should not need the ext3_xattr_sem semaphore. However, the
-+ * filesystem may still contain shared blocks, so we always take
-+ * the lock.
-+ */
-+
-+DECLARE_MUTEX(ext3_xattr_sem);
-+
-+static inline int
-+ext3_xattr_new_block(handle_t *handle, struct inode *inode,
-+                   int * errp, int force)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
-+              EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb);
-+
-+      /* How can we enforce the allocation? */
-+      int block = ext3_new_block(handle, inode, goal, 0, 0, errp);
-+#ifdef OLD_QUOTAS
-+      if (!*errp)
-+              inode->i_blocks += inode->i_sb->s_blocksize >> 9;
-+#endif
-+      return block;
-+}
-+
-+static inline int
-+ext3_xattr_quota_alloc(struct inode *inode, int force)
-+{
-+      /* How can we enforce the allocation? */
-+#ifdef OLD_QUOTAS
-+      int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1);
-+      if (!error)
-+              inode->i_blocks += inode->i_sb->s_blocksize >> 9;
-+#else
-+      int error = DQUOT_ALLOC_BLOCK(inode, 1);
-+#endif
-+      return error;
-+}
-+
-+#ifdef OLD_QUOTAS
-+
-+static inline void
-+ext3_xattr_quota_free(struct inode *inode)
-+{
-+      DQUOT_FREE_BLOCK(inode->i_sb, inode, 1);
-+      inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
-+}
-+
-+static inline void
-+ext3_xattr_free_block(handle_t *handle, struct inode * inode,
-+                    unsigned long block)
-+{
-+      ext3_free_blocks(handle, inode, block, 1);
-+      inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
-+}
-+
-+#else
-+# define ext3_xattr_quota_free(inode) \
-+      DQUOT_FREE_BLOCK(inode, 1)
-+# define ext3_xattr_free_block(handle, inode, block) \
-+      ext3_free_blocks(handle, inode, block, 1)
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
-+
-+static inline struct buffer_head *
-+sb_bread(struct super_block *sb, int block)
-+{
-+      return bread(sb->s_dev, block, sb->s_blocksize);
-+}
-+
-+static inline struct buffer_head *
-+sb_getblk(struct super_block *sb, int block)
-+{
-+      return getblk(sb->s_dev, block, sb->s_blocksize);
-+}
-+
-+#endif
-+
-+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
-+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
-+
-+int
-+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
-+{
-+      int error = -EINVAL;
-+
-+      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
-+              write_lock(&ext3_handler_lock);
-+              if (!ext3_xattr_handlers[name_index-1]) {
-+                      ext3_xattr_handlers[name_index-1] = handler;
-+                      error = 0;
-+              }
-+              write_unlock(&ext3_handler_lock);
-+      }
-+      return error;
-+}
-+
-+void
-+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
-+{
-+      if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
-+              write_lock(&ext3_handler_lock);
-+              ext3_xattr_handlers[name_index-1] = NULL;
-+              write_unlock(&ext3_handler_lock);
-+      }
-+}
-+
-+static inline const char *
-+strcmp_prefix(const char *a, const char *a_prefix)
-+{
-+      while (*a_prefix && *a == *a_prefix) {
-+              a++;
-+              a_prefix++;
-+      }
-+      return *a_prefix ? NULL : a;
-+}
-+
-+/*
-+ * Decode the extended attribute name, and translate it into
-+ * the name_index and name suffix.
-+ */
-+static inline struct ext3_xattr_handler *
-+ext3_xattr_resolve_name(const char **name)
-+{
-+      struct ext3_xattr_handler *handler = NULL;
-+      int i;
-+
-+      if (!*name)
-+              return NULL;
-+      read_lock(&ext3_handler_lock);
-+      for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
-+              if (ext3_xattr_handlers[i]) {
-+                      const char *n = strcmp_prefix(*name,
-+                              ext3_xattr_handlers[i]->prefix);
-+                      if (n) {
-+                              handler = ext3_xattr_handlers[i];
-+                              *name = n;
-+                              break;
-+                      }
-+              }
-+      }
-+      read_unlock(&ext3_handler_lock);
-+      return handler;
-+}
-+
-+static inline struct ext3_xattr_handler *
-+ext3_xattr_handler(int name_index)
-+{
-+      struct ext3_xattr_handler *handler = NULL;
-+      if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
-+              read_lock(&ext3_handler_lock);
-+              handler = ext3_xattr_handlers[name_index-1];
-+              read_unlock(&ext3_handler_lock);
-+      }
-+      return handler;
-+}
-+
-+/*
-+ * Inode operation getxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+ssize_t
-+ext3_getxattr(struct dentry *dentry, const char *name,
-+            void *buffer, size_t size)
-+{
-+      struct ext3_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      handler = ext3_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->get(inode, name, buffer, size);
-+}
-+
-+/*
-+ * Inode operation listxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+ssize_t
-+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
-+{
-+      return ext3_xattr_list(dentry->d_inode, buffer, size);
-+}
-+
-+/*
-+ * Inode operation setxattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+int
-+ext3_setxattr(struct dentry *dentry, const char *name,
-+            const void *value, size_t size, int flags)
-+{
-+      struct ext3_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      if (size == 0)
-+              value = "";  /* empty EA, do not remove */
-+      handler = ext3_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->set(inode, name, value, size, flags);
-+}
-+
-+/*
-+ * Inode operation removexattr()
-+ *
-+ * dentry->d_inode->i_sem down
-+ * BKL held [before 2.5.x]
-+ */
-+int
-+ext3_removexattr(struct dentry *dentry, const char *name)
-+{
-+      struct ext3_xattr_handler *handler;
-+      struct inode *inode = dentry->d_inode;
-+
-+      handler = ext3_xattr_resolve_name(&name);
-+      if (!handler)
-+              return -ENOTSUP;
-+      return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
-+}
-+
-+/*
-+ * ext3_xattr_get()
-+ *
-+ * Copy an extended attribute into the buffer
-+ * provided, or compute the buffer size required.
-+ * Buffer is NULL to compute the size of the buffer required.
-+ *
-+ * Returns a negative error number on failure, or the number of bytes
-+ * used / required on success.
-+ */
-+int
-+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-+             void *buffer, size_t buffer_size)
-+{
-+      struct buffer_head *bh = NULL;
-+      struct ext3_xattr_entry *entry;
-+      unsigned int block, size;
-+      char *end;
-+      int name_len, error;
-+
-+      ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
-+                name_index, name, buffer, (long)buffer_size);
-+
-+      if (name == NULL)
-+              return -EINVAL;
-+      if (!EXT3_I(inode)->i_file_acl)
-+              return -ENOATTR;
-+      block = EXT3_I(inode)->i_file_acl;
-+      ea_idebug(inode, "reading block %d", block);
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh)
-+              return -EIO;
-+      ea_bdebug(bh, "b_count=%d, refcount=%d",
-+              atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-+      end = bh->b_data + bh->b_size;
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+bad_block:    ext3_error(inode->i_sb, "ext3_xattr_get",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              error = -EIO;
-+              goto cleanup;
-+      }
-+      /* find named attribute */
-+      name_len = strlen(name);
-+
-+      error = -ERANGE;
-+      if (name_len > 255)
-+              goto cleanup;
-+      entry = FIRST_ENTRY(bh);
-+      while (!IS_LAST_ENTRY(entry)) {
-+              struct ext3_xattr_entry *next =
-+                      EXT3_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+              if (name_index == entry->e_name_index &&
-+                  name_len == entry->e_name_len &&
-+                  memcmp(name, entry->e_name, name_len) == 0)
-+                      goto found;
-+              entry = next;
-+      }
-+      /* Check the remaining name entries */
-+      while (!IS_LAST_ENTRY(entry)) {
-+              struct ext3_xattr_entry *next =
-+                      EXT3_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+              entry = next;
-+      }
-+      if (ext3_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      error = -ENOATTR;
-+      goto cleanup;
-+found:
-+      /* check the buffer size */
-+      if (entry->e_value_block != 0)
-+              goto bad_block;
-+      size = le32_to_cpu(entry->e_value_size);
-+      if (size > inode->i_sb->s_blocksize ||
-+          le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
-+              goto bad_block;
-+
-+      if (ext3_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      if (buffer) {
-+              error = -ERANGE;
-+              if (size > buffer_size)
-+                      goto cleanup;
-+              /* return value of attribute */
-+              memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-+                      size);
-+      }
-+      error = size;
-+
-+cleanup:
-+      brelse(bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * ext3_xattr_list()
-+ *
-+ * Copy a list of attribute names into the buffer
-+ * provided, or compute the buffer size required.
-+ * Buffer is NULL to compute the size of the buffer required.
-+ *
-+ * Returns a negative error number on failure, or the number of bytes
-+ * used / required on success.
-+ */
-+int
-+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
-+{
-+      struct buffer_head *bh = NULL;
-+      struct ext3_xattr_entry *entry;
-+      unsigned int block, size = 0;
-+      char *buf, *end;
-+      int error;
-+
-+      ea_idebug(inode, "buffer=%p, buffer_size=%ld",
-+                buffer, (long)buffer_size);
-+
-+      if (!EXT3_I(inode)->i_file_acl)
-+              return 0;
-+      block = EXT3_I(inode)->i_file_acl;
-+      ea_idebug(inode, "reading block %d", block);
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh)
-+              return -EIO;
-+      ea_bdebug(bh, "b_count=%d, refcount=%d",
-+              atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
-+      end = bh->b_data + bh->b_size;
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+bad_block:    ext3_error(inode->i_sb, "ext3_xattr_list",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              error = -EIO;
-+              goto cleanup;
-+      }
-+      /* compute the size required for the list of attribute names */
-+      for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-+           entry = EXT3_XATTR_NEXT(entry)) {
-+              struct ext3_xattr_handler *handler;
-+              struct ext3_xattr_entry *next =
-+                      EXT3_XATTR_NEXT(entry);
-+              if ((char *)next >= end)
-+                      goto bad_block;
-+
-+              handler = ext3_xattr_handler(entry->e_name_index);
-+              if (handler)
-+                      size += handler->list(NULL, inode, entry->e_name,
-+                                            entry->e_name_len);
-+      }
-+
-+      if (ext3_xattr_cache_insert(bh))
-+              ea_idebug(inode, "cache insert failed");
-+      if (!buffer) {
-+              error = size;
-+              goto cleanup;
-+      } else {
-+              error = -ERANGE;
-+              if (size > buffer_size)
-+                      goto cleanup;
-+      }
-+
-+      /* list the attribute names */
-+      buf = buffer;
-+      for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
-+           entry = EXT3_XATTR_NEXT(entry)) {
-+              struct ext3_xattr_handler *handler;
-+
-+              handler = ext3_xattr_handler(entry->e_name_index);
-+              if (handler)
-+                      buf += handler->list(buf, inode, entry->e_name,
-+                                           entry->e_name_len);
-+      }
-+      error = size;
-+
-+cleanup:
-+      brelse(bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
-+ * not set, set it.
-+ */
-+static void ext3_xattr_update_super_block(handle_t *handle,
-+                                        struct super_block *sb)
-+{
-+      if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
-+              return;
-+
-+      lock_super(sb);
-+      ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
-+      EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR;
-+#endif
-+      EXT3_SB(sb)->s_es->s_feature_compat |=
-+              cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
-+      sb->s_dirt = 1;
-+      ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-+      unlock_super(sb);
-+}
-+
-+/*
-+ * ext3_xattr_set()
-+ *
-+ * Create, replace or remove an extended attribute for this inode. Buffer
-+ * is NULL to remove an existing extended attribute, and non-NULL to
-+ * either replace an existing extended attribute, or create a new extended
-+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
-+ * specify that an extended attribute must exist and must not exist
-+ * previous to the call, respectively.
-+ *
-+ * Returns 0, or a negative error number on failure.
-+ */
-+int
-+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
-+             const char *name, const void *value, size_t value_len, int flags)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      struct buffer_head *bh = NULL;
-+      struct ext3_xattr_header *header = NULL;
-+      struct ext3_xattr_entry *here, *last;
-+      unsigned int name_len;
-+      int block = EXT3_I(inode)->i_file_acl;
-+      int min_offs = sb->s_blocksize, not_found = 1, free, error;
-+      char *end;
-+      
-+      /*
-+       * header -- Points either into bh, or to a temporarily
-+       *           allocated buffer.
-+       * here -- The named entry found, or the place for inserting, within
-+       *         the block pointed to by header.
-+       * last -- Points right after the last named entry within the block
-+       *         pointed to by header.
-+       * min_offs -- The offset of the first value (values are aligned
-+       *             towards the end of the block).
-+       * end -- Points right after the block pointed to by header.
-+       */
-+      
-+      ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
-+                name_index, name, value, (long)value_len);
-+
-+      if (IS_RDONLY(inode))
-+              return -EROFS;
-+      if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-+              return -EPERM;
-+      if (value == NULL)
-+              value_len = 0;
-+      if (name == NULL)
-+              return -EINVAL;
-+      name_len = strlen(name);
-+      if (name_len > 255 || value_len > sb->s_blocksize)
-+              return -ERANGE;
-+      down(&ext3_xattr_sem);
-+
-+      if (block) {
-+              /* The inode already has an extended attribute block. */
-+              bh = sb_bread(sb, block);
-+              error = -EIO;
-+              if (!bh)
-+                      goto cleanup;
-+              ea_bdebug(bh, "b_count=%d, refcount=%d",
-+                      atomic_read(&(bh->b_count)),
-+                      le32_to_cpu(HDR(bh)->h_refcount));
-+              header = HDR(bh);
-+              end = bh->b_data + bh->b_size;
-+              if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-+                  header->h_blocks != cpu_to_le32(1)) {
-+bad_block:            ext3_error(sb, "ext3_xattr_set",
-+                              "inode %ld: bad block %d", inode->i_ino, block);
-+                      error = -EIO;
-+                      goto cleanup;
-+              }
-+              /* Find the named attribute. */
-+              here = FIRST_ENTRY(bh);
-+              while (!IS_LAST_ENTRY(here)) {
-+                      struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
-+                      if ((char *)next >= end)
-+                              goto bad_block;
-+                      if (!here->e_value_block && here->e_value_size) {
-+                              int offs = le16_to_cpu(here->e_value_offs);
-+                              if (offs < min_offs)
-+                                      min_offs = offs;
-+                      }
-+                      not_found = name_index - here->e_name_index;
-+                      if (!not_found)
-+                              not_found = name_len - here->e_name_len;
-+                      if (!not_found)
-+                              not_found = memcmp(name, here->e_name,name_len);
-+                      if (not_found <= 0)
-+                              break;
-+                      here = next;
-+              }
-+              last = here;
-+              /* We still need to compute min_offs and last. */
-+              while (!IS_LAST_ENTRY(last)) {
-+                      struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
-+                      if ((char *)next >= end)
-+                              goto bad_block;
-+                      if (!last->e_value_block && last->e_value_size) {
-+                              int offs = le16_to_cpu(last->e_value_offs);
-+                              if (offs < min_offs)
-+                                      min_offs = offs;
-+                      }
-+                      last = next;
-+              }
-+
-+              /* Check whether we have enough space left. */
-+              free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
-+      } else {
-+              /* We will use a new extended attribute block. */
-+              free = sb->s_blocksize -
-+                      sizeof(struct ext3_xattr_header) - sizeof(__u32);
-+              here = last = NULL;  /* avoid gcc uninitialized warning. */
-+      }
-+
-+      if (not_found) {
-+              /* Request to remove a nonexistent attribute? */
-+              error = -ENOATTR;
-+              if (flags & XATTR_REPLACE)
-+                      goto cleanup;
-+              error = 0;
-+              if (value == NULL)
-+                      goto cleanup;
-+              else
-+                      free -= EXT3_XATTR_LEN(name_len);
-+      } else {
-+              /* Request to create an existing attribute? */
-+              error = -EEXIST;
-+              if (flags & XATTR_CREATE)
-+                      goto cleanup;
-+              if (!here->e_value_block && here->e_value_size) {
-+                      unsigned int size = le32_to_cpu(here->e_value_size);
-+
-+                      if (le16_to_cpu(here->e_value_offs) + size > 
-+                          sb->s_blocksize || size > sb->s_blocksize)
-+                              goto bad_block;
-+                      free += EXT3_XATTR_SIZE(size);
-+              }
-+      }
-+      free -= EXT3_XATTR_SIZE(value_len);
-+      error = -ENOSPC;
-+      if (free < 0)
-+              goto cleanup;
-+
-+      /* Here we know that we can set the new attribute. */
-+
-+      if (header) {
-+              if (header->h_refcount == cpu_to_le32(1)) {
-+                      ea_bdebug(bh, "modifying in-place");
-+                      ext3_xattr_cache_remove(bh);
-+                      error = ext3_journal_get_write_access(handle, bh);
-+                      if (error)
-+                              goto cleanup;
-+              } else {
-+                      int offset;
-+
-+                      ea_bdebug(bh, "cloning");
-+                      header = kmalloc(bh->b_size, GFP_KERNEL);
-+                      error = -ENOMEM;
-+                      if (header == NULL)
-+                              goto cleanup;
-+                      memcpy(header, HDR(bh), bh->b_size);
-+                      header->h_refcount = cpu_to_le32(1);
-+                      offset = (char *)header - bh->b_data;
-+                      here = ENTRY((char *)here + offset);
-+                      last = ENTRY((char *)last + offset);
-+              }
-+      } else {
-+              /* Allocate a buffer where we construct the new block. */
-+              header = kmalloc(sb->s_blocksize, GFP_KERNEL);
-+              error = -ENOMEM;
-+              if (header == NULL)
-+                      goto cleanup;
-+              memset(header, 0, sb->s_blocksize);
-+              end = (char *)header + sb->s_blocksize;
-+              header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-+              header->h_blocks = header->h_refcount = cpu_to_le32(1);
-+              last = here = ENTRY(header+1);
-+      }
-+
-+      if (not_found) {
-+              /* Insert the new name. */
-+              int size = EXT3_XATTR_LEN(name_len);
-+              int rest = (char *)last - (char *)here;
-+              memmove((char *)here + size, here, rest);
-+              memset(here, 0, size);
-+              here->e_name_index = name_index;
-+              here->e_name_len = name_len;
-+              memcpy(here->e_name, name, name_len);
-+      } else {
-+              /* Remove the old value. */
-+              if (!here->e_value_block && here->e_value_size) {
-+                      char *first_val = (char *)header + min_offs;
-+                      int offs = le16_to_cpu(here->e_value_offs);
-+                      char *val = (char *)header + offs;
-+                      size_t size = EXT3_XATTR_SIZE(
-+                              le32_to_cpu(here->e_value_size));
-+                      memmove(first_val + size, first_val, val - first_val);
-+                      memset(first_val, 0, size);
-+                      here->e_value_offs = 0;
-+                      min_offs += size;
-+
-+                      /* Adjust all value offsets. */
-+                      last = ENTRY(header+1);
-+                      while (!IS_LAST_ENTRY(last)) {
-+                              int o = le16_to_cpu(last->e_value_offs);
-+                              if (!last->e_value_block && o < offs)
-+                                      last->e_value_offs =
-+                                              cpu_to_le16(o + size);
-+                              last = EXT3_XATTR_NEXT(last);
-+                      }
-+              }
-+              if (value == NULL) {
-+                      /* Remove this attribute. */
-+                      if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) {
-+                              /* This block is now empty. */
-+                              error = ext3_xattr_set2(handle, inode, bh,NULL);
-+                              goto cleanup;
-+                      } else {
-+                              /* Remove the old name. */
-+                              int size = EXT3_XATTR_LEN(name_len);
-+                              last = ENTRY((char *)last - size);
-+                              memmove(here, (char*)here + size,
-+                                      (char*)last - (char*)here);
-+                              memset(last, 0, size);
-+                      }
-+              }
-+      }
-+
-+      if (value != NULL) {
-+              /* Insert the new value. */
-+              here->e_value_size = cpu_to_le32(value_len);
-+              if (value_len) {
-+                      size_t size = EXT3_XATTR_SIZE(value_len);
-+                      char *val = (char *)header + min_offs - size;
-+                      here->e_value_offs =
-+                              cpu_to_le16((char *)val - (char *)header);
-+                      memset(val + size - EXT3_XATTR_PAD, 0,
-+                             EXT3_XATTR_PAD); /* Clear the pad bytes. */
-+                      memcpy(val, value, value_len);
-+              }
-+      }
-+      ext3_xattr_rehash(header, here);
-+
-+      error = ext3_xattr_set2(handle, inode, bh, header);
-+
-+cleanup:
-+      brelse(bh);
-+      if (!(bh && header == HDR(bh)))
-+              kfree(header);
-+      up(&ext3_xattr_sem);
-+
-+      return error;
-+}
-+
-+/*
-+ * Second half of ext3_xattr_set(): Update the file system.
-+ */
-+static int
-+ext3_xattr_set2(handle_t *handle, struct inode *inode,
-+              struct buffer_head *old_bh, struct ext3_xattr_header *header)
-+{
-+      struct super_block *sb = inode->i_sb;
-+      struct buffer_head *new_bh = NULL;
-+      int error;
-+
-+      if (header) {
-+              new_bh = ext3_xattr_cache_find(inode, header);
-+              if (new_bh) {
-+                      /*
-+                       * We found an identical block in the cache.
-+                       * The old block will be released after updating
-+                       * the inode.
-+                       */
-+                      ea_bdebug(old_bh, "reusing block %ld",
-+                              new_bh->b_blocknr);
-+                      
-+                      error = -EDQUOT;
-+                      if (ext3_xattr_quota_alloc(inode, 1))
-+                              goto cleanup;
-+                      
-+                      error = ext3_journal_get_write_access(handle, new_bh);
-+                      if (error)
-+                              goto cleanup;
-+                      HDR(new_bh)->h_refcount = cpu_to_le32(
-+                              le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
-+                      ea_bdebug(new_bh, "refcount now=%d",
-+                              le32_to_cpu(HDR(new_bh)->h_refcount));
-+              } else if (old_bh && header == HDR(old_bh)) {
-+                      /* Keep this block. */
-+                      new_bh = old_bh;
-+                      ext3_xattr_cache_insert(new_bh);
-+              } else {
-+                      /* We need to allocate a new block */
-+                      int force = EXT3_I(inode)->i_file_acl != 0;
-+                      int block = ext3_xattr_new_block(handle, inode,
-+                                                       &error, force);
-+                      if (error)
-+                              goto cleanup;
-+                      ea_idebug(inode, "creating block %d", block);
-+
-+                      new_bh = sb_getblk(sb, block);
-+                      if (!new_bh) {
-+getblk_failed:                        ext3_xattr_free_block(handle, inode, block);
-+                              error = -EIO;
-+                              goto cleanup;
-+                      }
-+                      lock_buffer(new_bh);
-+                      error = ext3_journal_get_create_access(handle, new_bh);
-+                      if (error) {
-+                              unlock_buffer(new_bh);
-+                              goto getblk_failed;
-+                      }
-+                      memcpy(new_bh->b_data, header, new_bh->b_size);
-+                      mark_buffer_uptodate(new_bh, 1);
-+                      unlock_buffer(new_bh);
-+                      ext3_xattr_cache_insert(new_bh);
-+                      
-+                      ext3_xattr_update_super_block(handle, sb);
-+              }
-+              error = ext3_journal_dirty_metadata(handle, new_bh);
-+              if (error)
-+                      goto cleanup;
-+      }
-+
-+      /* Update the inode. */
-+      EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-+      inode->i_ctime = CURRENT_TIME;
-+      ext3_mark_inode_dirty(handle, inode);
-+      if (IS_SYNC(inode))
-+              handle->h_sync = 1;
-+
-+      error = 0;
-+      if (old_bh && old_bh != new_bh) {
-+              /*
-+               * If there was an old block, and we are not still using it,
-+               * we now release the old block.
-+              */
-+              unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
-+
-+              error = ext3_journal_get_write_access(handle, old_bh);
-+              if (error)
-+                      goto cleanup;
-+              if (refcount == 1) {
-+                      /* Free the old block. */
-+                      ea_bdebug(old_bh, "freeing");
-+                      ext3_xattr_free_block(handle, inode, old_bh->b_blocknr);
-+
-+                      /* ext3_forget() calls bforget() for us, but we
-+                         let our caller release old_bh, so we need to
-+                         duplicate the handle before. */
-+                      get_bh(old_bh);
-+                      ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
-+              } else {
-+                      /* Decrement the refcount only. */
-+                      refcount--;
-+                      HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
-+                      ext3_xattr_quota_free(inode);
-+                      ext3_journal_dirty_metadata(handle, old_bh);
-+                      ea_bdebug(old_bh, "refcount now=%d", refcount);
-+              }
-+      }
-+
-+cleanup:
-+      if (old_bh != new_bh)
-+              brelse(new_bh);
-+
-+      return error;
-+}
-+
-+/*
-+ * ext3_xattr_delete_inode()
-+ *
-+ * Free extended attribute resources associated with this inode. This
-+ * is called immediately before an inode is freed.
-+ */
-+void
-+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+{
-+      struct buffer_head *bh;
-+      unsigned int block = EXT3_I(inode)->i_file_acl;
-+
-+      if (!block)
-+              return;
-+      down(&ext3_xattr_sem);
-+
-+      bh = sb_bread(inode->i_sb, block);
-+      if (!bh) {
-+              ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
-+                      "inode %ld: block %d read error", inode->i_ino, block);
-+              goto cleanup;
-+      }
-+      ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
-+      if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
-+          HDR(bh)->h_blocks != cpu_to_le32(1)) {
-+              ext3_error(inode->i_sb, "ext3_xattr_delete_inode",
-+                      "inode %ld: bad block %d", inode->i_ino, block);
-+              goto cleanup;
-+      }
-+      ext3_journal_get_write_access(handle, bh);
-+      ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
-+      if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
-+              ext3_xattr_cache_remove(bh);
-+              ext3_xattr_free_block(handle, inode, block);
-+              ext3_forget(handle, 1, inode, bh, block);
-+              bh = NULL;
-+      } else {
-+              HDR(bh)->h_refcount = cpu_to_le32(
-+                      le32_to_cpu(HDR(bh)->h_refcount) - 1);
-+              ext3_journal_dirty_metadata(handle, bh);
-+              if (IS_SYNC(inode))
-+                      handle->h_sync = 1;
-+              ext3_xattr_quota_free(inode);
-+      }
-+      EXT3_I(inode)->i_file_acl = 0;
-+
-+cleanup:
-+      brelse(bh);
-+      up(&ext3_xattr_sem);
-+}
-+
-+/*
-+ * ext3_xattr_put_super()
-+ *
-+ * This is called when a file system is unmounted.
-+ */
-+void
-+ext3_xattr_put_super(struct super_block *sb)
-+{
-+#ifdef CONFIG_EXT3_FS_XATTR_SHARING
-+      mb_cache_shrink(ext3_xattr_cache, sb->s_dev);
-+#endif
-+}
-+
-+#ifdef CONFIG_EXT3_FS_XATTR_SHARING
-+
-+/*
-+ * ext3_xattr_cache_insert()
-+ *
-+ * Create a new entry in the extended attribute cache, and insert
-+ * it unless such an entry is already in the cache.
-+ *
-+ * Returns 0, or a negative error number on failure.
-+ */
-+static int
-+ext3_xattr_cache_insert(struct buffer_head *bh)
-+{
-+      __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
-+      struct mb_cache_entry *ce;
-+      int error;
-+
-+      ce = mb_cache_entry_alloc(ext3_xattr_cache);
-+      if (!ce)
-+              return -ENOMEM;
-+      error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash);
-+      if (error) {
-+              mb_cache_entry_free(ce);
-+              if (error == -EBUSY) {
-+                      ea_bdebug(bh, "already in cache (%d cache entries)",
-+                              atomic_read(&ext3_xattr_cache->c_entry_count));
-+                      error = 0;
-+              }
-+      } else {
-+              ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
-+                        atomic_read(&ext3_xattr_cache->c_entry_count));
-+              mb_cache_entry_release(ce);
-+      }
-+      return error;
-+}
-+
-+/*
-+ * ext3_xattr_cmp()
-+ *
-+ * Compare two extended attribute blocks for equality.
-+ *
-+ * Returns 0 if the blocks are equal, 1 if they differ, and
-+ * a negative error number on errors.
-+ */
-+static int
-+ext3_xattr_cmp(struct ext3_xattr_header *header1,
-+             struct ext3_xattr_header *header2)
-+{
-+      struct ext3_xattr_entry *entry1, *entry2;
-+
-+      entry1 = ENTRY(header1+1);
-+      entry2 = ENTRY(header2+1);
-+      while (!IS_LAST_ENTRY(entry1)) {
-+              if (IS_LAST_ENTRY(entry2))
-+                      return 1;
-+              if (entry1->e_hash != entry2->e_hash ||
-+                  entry1->e_name_len != entry2->e_name_len ||
-+                  entry1->e_value_size != entry2->e_value_size ||
-+                  memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
-+                      return 1;
-+              if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
-+                      return -EIO;
-+              if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
-+                         (char *)header2 + le16_to_cpu(entry2->e_value_offs),
-+                         le32_to_cpu(entry1->e_value_size)))
-+                      return 1;
-+
-+              entry1 = EXT3_XATTR_NEXT(entry1);
-+              entry2 = EXT3_XATTR_NEXT(entry2);
-+      }
-+      if (!IS_LAST_ENTRY(entry2))
-+              return 1;
-+      return 0;
-+}
-+
-+/*
-+ * ext3_xattr_cache_find()
-+ *
-+ * Find an identical extended attribute block.
-+ *
-+ * Returns a pointer to the block found, or NULL if such a block was
-+ * not found or an error occurred.
-+ */
-+static struct buffer_head *
-+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header)
-+{
-+      __u32 hash = le32_to_cpu(header->h_hash);
-+      struct mb_cache_entry *ce;
-+
-+      if (!header->h_hash)
-+              return NULL;  /* never share */
-+      ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-+      ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash);
-+      while (ce) {
-+              struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
-+
-+              if (!bh) {
-+                      ext3_error(inode->i_sb, "ext3_xattr_cache_find",
-+                              "inode %ld: block %ld read error",
-+                              inode->i_ino, ce->e_block);
-+              } else if (le32_to_cpu(HDR(bh)->h_refcount) >
-+                         EXT3_XATTR_REFCOUNT_MAX) {
-+                      ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block,
-+                              le32_to_cpu(HDR(bh)->h_refcount),
-+                              EXT3_XATTR_REFCOUNT_MAX);
-+              } else if (!ext3_xattr_cmp(header, HDR(bh))) {
-+                      ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
-+                      mb_cache_entry_release(ce);
-+                      return bh;
-+              }
-+              brelse(bh);
-+              ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash);
-+      }
-+      return NULL;
-+}
-+
-+/*
-+ * ext3_xattr_cache_remove()
-+ *
-+ * Remove the cache entry of a block from the cache. Called when a
-+ * block becomes invalid.
-+ */
-+static void
-+ext3_xattr_cache_remove(struct buffer_head *bh)
-+{
-+      struct mb_cache_entry *ce;
-+
-+      ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr);
-+      if (ce) {
-+              ea_bdebug(bh, "removing (%d cache entries remaining)",
-+                        atomic_read(&ext3_xattr_cache->c_entry_count)-1);
-+              mb_cache_entry_free(ce);
-+      } else 
-+              ea_bdebug(bh, "no cache entry");
-+}
-+
-+#define NAME_HASH_SHIFT 5
-+#define VALUE_HASH_SHIFT 16
-+
-+/*
-+ * ext3_xattr_hash_entry()
-+ *
-+ * Compute the hash of an extended attribute.
-+ */
-+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
-+                                       struct ext3_xattr_entry *entry)
-+{
-+      __u32 hash = 0;
-+      char *name = entry->e_name;
-+      int n;
-+
-+      for (n=0; n < entry->e_name_len; n++) {
-+              hash = (hash << NAME_HASH_SHIFT) ^
-+                     (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
-+                     *name++;
-+      }
-+
-+      if (entry->e_value_block == 0 && entry->e_value_size != 0) {
-+              __u32 *value = (__u32 *)((char *)header +
-+                      le16_to_cpu(entry->e_value_offs));
-+              for (n = (le32_to_cpu(entry->e_value_size) +
-+                   EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
-+                      hash = (hash << VALUE_HASH_SHIFT) ^
-+                             (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
-+                             le32_to_cpu(*value++);
-+              }
-+      }
-+      entry->e_hash = cpu_to_le32(hash);
-+}
-+
-+#undef NAME_HASH_SHIFT
-+#undef VALUE_HASH_SHIFT
-+
-+#define BLOCK_HASH_SHIFT 16
-+
-+/*
-+ * ext3_xattr_rehash()
-+ *
-+ * Re-compute the extended attribute hash value after an entry has changed.
-+ */
-+static void ext3_xattr_rehash(struct ext3_xattr_header *header,
-+                            struct ext3_xattr_entry *entry)
-+{
-+      struct ext3_xattr_entry *here;
-+      __u32 hash = 0;
-+      
-+      ext3_xattr_hash_entry(header, entry);
-+      here = ENTRY(header+1);
-+      while (!IS_LAST_ENTRY(here)) {
-+              if (!here->e_hash) {
-+                      /* Block is not shared if an entry's hash value == 0 */
-+                      hash = 0;
-+                      break;
-+              }
-+              hash = (hash << BLOCK_HASH_SHIFT) ^
-+                     (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
-+                     le32_to_cpu(here->e_hash);
-+              here = EXT3_XATTR_NEXT(here);
-+      }
-+      header->h_hash = cpu_to_le32(hash);
-+}
-+
-+#undef BLOCK_HASH_SHIFT
-+
-+int __init
-+init_ext3_xattr(void)
-+{
-+      ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
-+              sizeof(struct mb_cache_entry) +
-+              sizeof(struct mb_cache_entry_index), 1, 61);
-+      if (!ext3_xattr_cache)
-+              return -ENOMEM;
-+
-+      return 0;
-+}
-+
-+void
-+exit_ext3_xattr(void)
-+{
-+      if (ext3_xattr_cache)
-+              mb_cache_destroy(ext3_xattr_cache);
-+      ext3_xattr_cache = NULL;
-+}
-+
-+#else  /* CONFIG_EXT3_FS_XATTR_SHARING */
-+
-+int __init
-+init_ext3_xattr(void)
-+{
-+      return 0;
-+}
-+
-+void
-+exit_ext3_xattr(void)
-+{
-+}
-+
-+#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
-Index: linux-DRV401/fs/ext3/xattr_user.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/xattr_user.c     2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/ext3/xattr_user.c  2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,111 @@
-+/*
-+ * linux/fs/ext3/xattr_user.c
-+ * Handler for extended user attributes.
-+ *
-+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/string.h>
-+#include <linux/fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_xattr.h>
-+
-+#ifdef CONFIG_EXT3_FS_POSIX_ACL
-+# include <linux/ext3_acl.h>
-+#endif
-+
-+#define XATTR_USER_PREFIX "user."
-+
-+static size_t
-+ext3_xattr_user_list(char *list, struct inode *inode,
-+                   const char *name, int name_len)
-+{
-+      const int prefix_len = sizeof(XATTR_USER_PREFIX)-1;
-+
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return 0;
-+
-+      if (list) {
-+              memcpy(list, XATTR_USER_PREFIX, prefix_len);
-+              memcpy(list+prefix_len, name, name_len);
-+              list[prefix_len + name_len] = '\0';
-+      }
-+      return prefix_len + name_len + 1;
-+}
-+
-+static int
-+ext3_xattr_user_get(struct inode *inode, const char *name,
-+                  void *buffer, size_t size)
-+{
-+      int error;
-+
-+      if (strcmp(name, "") == 0)
-+              return -EINVAL;
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return -ENOTSUP;
-+#ifdef CONFIG_EXT3_FS_POSIX_ACL
-+      error = ext3_permission_locked(inode, MAY_READ);
-+#else
-+      error = permission(inode, MAY_READ);
-+#endif
-+      if (error)
-+              return error;
-+
-+      return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name,
-+                            buffer, size);
-+}
-+
-+static int
-+ext3_xattr_user_set(struct inode *inode, const char *name,
-+                  const void *value, size_t size, int flags)
-+{
-+      handle_t *handle;
-+      int error;
-+
-+      if (strcmp(name, "") == 0)
-+              return -EINVAL;
-+      if (!test_opt(inode->i_sb, XATTR_USER))
-+              return -ENOTSUP;
-+      if ( !S_ISREG(inode->i_mode) &&
-+          (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-+              return -EPERM;
-+#ifdef CONFIG_EXT3_FS_POSIX_ACL
-+      error = ext3_permission_locked(inode, MAY_WRITE);
-+#else
-+      error = permission(inode, MAY_WRITE);
-+#endif
-+      if (error)
-+              return error;
-+
-+      handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS);
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+      error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name,
-+                             value, size, flags);
-+      ext3_journal_stop(handle, inode);
-+
-+      return error;
-+}
-+
-+struct ext3_xattr_handler ext3_xattr_user_handler = {
-+      prefix: XATTR_USER_PREFIX,
-+      list:   ext3_xattr_user_list,
-+      get:    ext3_xattr_user_get,
-+      set:    ext3_xattr_user_set,
-+};
-+
-+int __init
-+init_ext3_xattr_user(void)
-+{
-+      return ext3_xattr_register(EXT3_XATTR_INDEX_USER,
-+                                 &ext3_xattr_user_handler);
-+}
-+
-+void
-+exit_ext3_xattr_user(void)
-+{
-+      ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
-+                            &ext3_xattr_user_handler);
-+}
-Index: linux-DRV401/fs/ext3/ext3-exports.c
-===================================================================
---- linux-DRV401.orig/fs/ext3/ext3-exports.c   2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/ext3/ext3-exports.c        2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,13 @@
-+#include <linux/config.h>
-+#include <linux/module.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
-+
-+EXPORT_SYMBOL(ext3_force_commit);
-+EXPORT_SYMBOL(ext3_bread);
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
-Index: linux-DRV401/fs/mbcache.c
-===================================================================
---- linux-DRV401.orig/fs/mbcache.c     2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/mbcache.c  2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,648 @@
-+/*
-+ * linux/fs/mbcache.c
-+ * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ */
-+
-+/*
-+ * Filesystem Meta Information Block Cache (mbcache)
-+ *
-+ * The mbcache caches blocks of block devices that need to be located
-+ * by their device/block number, as well as by other criteria (such
-+ * as the block's contents).
-+ *
-+ * There can only be one cache entry in a cache per device and block number.
-+ * Additional indexes need not be unique in this sense. The number of
-+ * additional indexes (=other criteria) can be hardwired at compile time
-+ * or specified at cache create time.
-+ *
-+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
-+ * in the cache. A valid entry is in the main hash tables of the cache,
-+ * and may also be in the lru list. An invalid entry is not in any hashes
-+ * or lists.
-+ *
-+ * A valid cache entry is only in the lru list if no handles refer to it.
-+ * Invalid cache entries will be freed when the last handle to the cache
-+ * entry is released. Entries that cannot be freed immediately are put
-+ * back on the lru list.
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+
-+#include <linux/fs.h>
-+#include <linux/slab.h>
-+#include <linux/sched.h>
-+#include <linux/cache_def.h>
-+#include <linux/version.h>
-+#include <linux/init.h>
-+#include <linux/mbcache.h>
-+
-+
-+#ifdef MB_CACHE_DEBUG
-+# define mb_debug(f...) do { \
-+              printk(KERN_DEBUG f); \
-+              printk("\n"); \
-+      } while (0)
-+#define mb_assert(c) do { if (!(c)) \
-+              printk(KERN_ERR "assertion " #c " failed\n"); \
-+      } while(0)
-+#else
-+# define mb_debug(f...) do { } while(0)
-+# define mb_assert(c) do { } while(0)
-+#endif
-+#define mb_error(f...) do { \
-+              printk(KERN_ERR f); \
-+              printk("\n"); \
-+      } while(0)
-+              
-+MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
-+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0)
-+MODULE_LICENSE("GPL");
-+#endif
-+
-+EXPORT_SYMBOL(mb_cache_create);
-+EXPORT_SYMBOL(mb_cache_shrink);
-+EXPORT_SYMBOL(mb_cache_destroy);
-+EXPORT_SYMBOL(mb_cache_entry_alloc);
-+EXPORT_SYMBOL(mb_cache_entry_insert);
-+EXPORT_SYMBOL(mb_cache_entry_release);
-+EXPORT_SYMBOL(mb_cache_entry_takeout);
-+EXPORT_SYMBOL(mb_cache_entry_free);
-+EXPORT_SYMBOL(mb_cache_entry_dup);
-+EXPORT_SYMBOL(mb_cache_entry_get);
-+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-+EXPORT_SYMBOL(mb_cache_entry_find_first);
-+EXPORT_SYMBOL(mb_cache_entry_find_next);
-+#endif
-+
-+
-+/*
-+ * Global data: list of all mbcache's, lru list, and a spinlock for
-+ * accessing cache data structures on SMP machines. The lru list is
-+ * global across all mbcaches.
-+ */
-+
-+static LIST_HEAD(mb_cache_list);
-+static LIST_HEAD(mb_cache_lru_list);
-+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED;
-+
-+static inline int
-+mb_cache_indexes(struct mb_cache *cache)
-+{
-+#ifdef MB_CACHE_INDEXES_COUNT
-+      return MB_CACHE_INDEXES_COUNT;
-+#else
-+      return cache->c_indexes_count;
-+#endif
-+}
-+
-+/*
-+ * What the mbcache registers as to get shrunk dynamically.
-+ */
-+
-+static void
-+mb_cache_memory_pressure(int priority, unsigned int gfp_mask);
-+
-+static struct cache_definition mb_cache_definition = {
-+      "mb_cache",
-+      mb_cache_memory_pressure
-+};
-+
-+
-+static inline int
-+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
-+{
-+      return !list_empty(&ce->e_block_list);
-+}
-+
-+
-+static inline void
-+__mb_cache_entry_unhash(struct mb_cache_entry *ce)
-+{
-+      int n;
-+
-+      if (__mb_cache_entry_is_hashed(ce)) {
-+              list_del_init(&ce->e_block_list);
-+              for (n=0; n<mb_cache_indexes(ce->e_cache); n++)
-+                      list_del(&ce->e_indexes[n].o_list);
-+      }
-+}
-+
-+
-+static inline void
-+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask)
-+{
-+      struct mb_cache *cache = ce->e_cache;
-+
-+      mb_assert(atomic_read(&ce->e_used) == 0);
-+      if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) {
-+              /* free failed -- put back on the lru list
-+                 for freeing later. */
-+              spin_lock(&mb_cache_spinlock);
-+              list_add(&ce->e_lru_list, &mb_cache_lru_list);
-+              spin_unlock(&mb_cache_spinlock);
-+      } else {
-+              kmem_cache_free(cache->c_entry_cache, ce);
-+              atomic_dec(&cache->c_entry_count);
-+      }
-+}
-+
-+
-+static inline void
-+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
-+{
-+      if (atomic_dec_and_test(&ce->e_used)) {
-+              if (__mb_cache_entry_is_hashed(ce))
-+                      list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
-+              else {
-+                      spin_unlock(&mb_cache_spinlock);
-+                      __mb_cache_entry_forget(ce, GFP_KERNEL);
-+                      return;
-+              }
-+      }
-+      spin_unlock(&mb_cache_spinlock);
-+}
-+
-+
-+/*
-+ * mb_cache_memory_pressure()  memory pressure callback
-+ *
-+ * This function is called by the kernel memory management when memory
-+ * gets low.
-+ *
-+ * @priority: Amount by which to shrink the cache (0 = highes priority)
-+ * @gfp_mask: (ignored)
-+ */
-+static void
-+mb_cache_memory_pressure(int priority, unsigned int gfp_mask)
-+{
-+      LIST_HEAD(free_list);
-+      struct list_head *l, *ltmp;
-+      int count = 0;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_for_each(l, &mb_cache_list) {
-+              struct mb_cache *cache =
-+                      list_entry(l, struct mb_cache, c_cache_list);
-+              mb_debug("cache %s (%d)", cache->c_name,
-+                        atomic_read(&cache->c_entry_count));
-+              count += atomic_read(&cache->c_entry_count);
-+      }
-+      mb_debug("trying to free %d of %d entries",
-+                count / (priority ? priority : 1), count);
-+      if (priority)
-+              count /= priority;
-+      while (count-- && !list_empty(&mb_cache_lru_list)) {
-+              struct mb_cache_entry *ce =
-+                      list_entry(mb_cache_lru_list.next,
-+                                 struct mb_cache_entry, e_lru_list);
-+              list_del(&ce->e_lru_list);
-+              __mb_cache_entry_unhash(ce);
-+              list_add_tail(&ce->e_lru_list, &free_list);
-+      }
-+      spin_unlock(&mb_cache_spinlock);
-+      list_for_each_safe(l, ltmp, &free_list) {
-+              __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
-+                                                 e_lru_list), gfp_mask);
-+      }
-+}
-+
-+
-+/*
-+ * mb_cache_create()  create a new cache
-+ *
-+ * All entries in one cache are equal size. Cache entries may be from
-+ * multiple devices. If this is the first mbcache created, registers
-+ * the cache with kernel memory management. Returns NULL if no more
-+ * memory was available.
-+ *
-+ * @name: name of the cache (informal)
-+ * @cache_op: contains the callback called when freeing a cache entry
-+ * @entry_size: The size of a cache entry, including
-+ *              struct mb_cache_entry
-+ * @indexes_count: number of additional indexes in the cache. Must equal
-+ *                 MB_CACHE_INDEXES_COUNT if the number of indexes is
-+ *                 hardwired.
-+ * @bucket_count: number of hash buckets
-+ */
-+struct mb_cache *
-+mb_cache_create(const char *name, struct mb_cache_op *cache_op,
-+              size_t entry_size, int indexes_count, int bucket_count)
-+{
-+      int m=0, n;
-+      struct mb_cache *cache = NULL;
-+
-+      if(entry_size < sizeof(struct mb_cache_entry) +
-+         indexes_count * sizeof(struct mb_cache_entry_index))
-+              return NULL;
-+
-+      MOD_INC_USE_COUNT;
-+      cache = kmalloc(sizeof(struct mb_cache) +
-+                      indexes_count * sizeof(struct list_head), GFP_KERNEL);
-+      if (!cache)
-+              goto fail;
-+      cache->c_name = name;
-+      cache->c_op.free = NULL;
-+      if (cache_op)
-+              cache->c_op.free = cache_op->free;
-+      atomic_set(&cache->c_entry_count, 0);
-+      cache->c_bucket_count = bucket_count;
-+#ifdef MB_CACHE_INDEXES_COUNT
-+      mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
-+#else
-+      cache->c_indexes_count = indexes_count;
-+#endif
-+      cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
-+                                    GFP_KERNEL);
-+      if (!cache->c_block_hash)
-+              goto fail;
-+      for (n=0; n<bucket_count; n++)
-+              INIT_LIST_HEAD(&cache->c_block_hash[n]);
-+      for (m=0; m<indexes_count; m++) {
-+              cache->c_indexes_hash[m] = kmalloc(bucket_count *
-+                                               sizeof(struct list_head),
-+                                               GFP_KERNEL);
-+              if (!cache->c_indexes_hash[m])
-+                      goto fail;
-+              for (n=0; n<bucket_count; n++)
-+                      INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
-+      }
-+      cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
-+              0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL);
-+      if (!cache->c_entry_cache)
-+              goto fail;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_add(&cache->c_cache_list, &mb_cache_list);
-+      spin_unlock(&mb_cache_spinlock);
-+      return cache;
-+
-+fail:
-+      if (cache) {
-+              while (--m >= 0)
-+                      kfree(cache->c_indexes_hash[m]);
-+              if (cache->c_block_hash)
-+                      kfree(cache->c_block_hash);
-+              kfree(cache);
-+      }
-+      MOD_DEC_USE_COUNT;
-+      return NULL;
-+}
-+
-+
-+/*
-+ * mb_cache_shrink()
-+ *
-+ * Removes all cache entires of a device from the cache. All cache entries
-+ * currently in use cannot be freed, and thus remain in the cache.
-+ *
-+ * @cache: which cache to shrink
-+ * @dev: which device's cache entries to shrink
-+ */
-+void
-+mb_cache_shrink(struct mb_cache *cache, kdev_t dev)
-+{
-+      LIST_HEAD(free_list);
-+      struct list_head *l, *ltmp;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
-+              struct mb_cache_entry *ce =
-+                      list_entry(l, struct mb_cache_entry, e_lru_list);
-+              if (ce->e_dev == dev) {
-+                      list_del(&ce->e_lru_list);
-+                      list_add_tail(&ce->e_lru_list, &free_list);
-+                      __mb_cache_entry_unhash(ce);
-+              }
-+      }
-+      spin_unlock(&mb_cache_spinlock);
-+      list_for_each_safe(l, ltmp, &free_list) {
-+              __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
-+                                                 e_lru_list), GFP_KERNEL);
-+      }
-+}
-+
-+
-+/*
-+ * mb_cache_destroy()
-+ *
-+ * Shrinks the cache to its minimum possible size (hopefully 0 entries),
-+ * and then destroys it. If this was the last mbcache, un-registers the
-+ * mbcache from kernel memory management.
-+ */
-+void
-+mb_cache_destroy(struct mb_cache *cache)
-+{
-+      LIST_HEAD(free_list);
-+      struct list_head *l, *ltmp;
-+      int n;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
-+              struct mb_cache_entry *ce =
-+                      list_entry(l, struct mb_cache_entry, e_lru_list);
-+              if (ce->e_cache == cache) {
-+                      list_del(&ce->e_lru_list);
-+                      list_add_tail(&ce->e_lru_list, &free_list);
-+                      __mb_cache_entry_unhash(ce);
-+              }
-+      }
-+      list_del(&cache->c_cache_list);
-+      spin_unlock(&mb_cache_spinlock);
-+      list_for_each_safe(l, ltmp, &free_list) {
-+              __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
-+                                                 e_lru_list), GFP_KERNEL);
-+      }
-+
-+      if (atomic_read(&cache->c_entry_count) > 0) {
-+              mb_error("cache %s: %d orphaned entries",
-+                        cache->c_name,
-+                        atomic_read(&cache->c_entry_count));
-+      }
-+
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0))
-+      /* We don't have kmem_cache_destroy() in 2.2.x */
-+      kmem_cache_shrink(cache->c_entry_cache);
-+#else
-+      kmem_cache_destroy(cache->c_entry_cache);
-+#endif
-+      for (n=0; n < mb_cache_indexes(cache); n++)
-+              kfree(cache->c_indexes_hash[n]);
-+      kfree(cache->c_block_hash);
-+      kfree(cache);
-+
-+      MOD_DEC_USE_COUNT;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_alloc()
-+ *
-+ * Allocates a new cache entry. The new entry will not be valid initially,
-+ * and thus cannot be looked up yet. It should be filled with data, and
-+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
-+ * if no more memory was available.
-+ */
-+struct mb_cache_entry *
-+mb_cache_entry_alloc(struct mb_cache *cache)
-+{
-+      struct mb_cache_entry *ce;
-+
-+      atomic_inc(&cache->c_entry_count);
-+      ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL);
-+      if (ce) {
-+              INIT_LIST_HEAD(&ce->e_lru_list);
-+              INIT_LIST_HEAD(&ce->e_block_list);
-+              ce->e_cache = cache;
-+              atomic_set(&ce->e_used, 1);
-+      }
-+      return ce;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_insert()
-+ *
-+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into
-+ * the cache. After this, the cache entry can be looked up, but is not yet
-+ * in the lru list as the caller still holds a handle to it. Returns 0 on
-+ * success, or -EBUSY if a cache entry for that device + inode exists
-+ * already (this may happen after a failed lookup, if another process has
-+ * inserted the same cache entry in the meantime).
-+ *
-+ * @dev: device the cache entry belongs to
-+ * @block: block number
-+ * @keys: array of additional keys. There must be indexes_count entries
-+ *        in the array (as specified when creating the cache).
-+ */
-+int
-+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev,
-+                    unsigned long block, unsigned int keys[])
-+{
-+      struct mb_cache *cache = ce->e_cache;
-+      unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count;
-+      struct list_head *l;
-+      int error = -EBUSY, n;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_for_each(l, &cache->c_block_hash[bucket]) {
-+              struct mb_cache_entry *ce =
-+                      list_entry(l, struct mb_cache_entry, e_block_list);
-+              if (ce->e_dev == dev && ce->e_block == block)
-+                      goto out;
-+      }
-+      __mb_cache_entry_unhash(ce);
-+      ce->e_dev = dev;
-+      ce->e_block = block;
-+      list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
-+      for (n=0; n<mb_cache_indexes(cache); n++) {
-+              ce->e_indexes[n].o_key = keys[n];
-+              bucket = keys[n] % cache->c_bucket_count;
-+              list_add(&ce->e_indexes[n].o_list,
-+                       &cache->c_indexes_hash[n][bucket]);
-+      }
-+out:
-+      spin_unlock(&mb_cache_spinlock);
-+      return error;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_release()
-+ *
-+ * Release a handle to a cache entry. When the last handle to a cache entry
-+ * is released it is either freed (if it is invalid) or otherwise inserted
-+ * in to the lru list.
-+ */
-+void
-+mb_cache_entry_release(struct mb_cache_entry *ce)
-+{
-+      spin_lock(&mb_cache_spinlock);
-+      __mb_cache_entry_release_unlock(ce);
-+}
-+
-+
-+/*
-+ * mb_cache_entry_takeout()
-+ *
-+ * Take a cache entry out of the cache, making it invalid. The entry can later
-+ * be re-inserted using mb_cache_entry_insert(), or released using
-+ * mb_cache_entry_release().
-+ */
-+void
-+mb_cache_entry_takeout(struct mb_cache_entry *ce)
-+{
-+      spin_lock(&mb_cache_spinlock);
-+      mb_assert(list_empty(&ce->e_lru_list));
-+      __mb_cache_entry_unhash(ce);
-+      spin_unlock(&mb_cache_spinlock);
-+}
-+
-+
-+/*
-+ * mb_cache_entry_free()
-+ *
-+ * This is equivalent to the sequence mb_cache_entry_takeout() --
-+ * mb_cache_entry_release().
-+ */
-+void
-+mb_cache_entry_free(struct mb_cache_entry *ce)
-+{
-+      spin_lock(&mb_cache_spinlock);
-+      mb_assert(list_empty(&ce->e_lru_list));
-+      __mb_cache_entry_unhash(ce);
-+      __mb_cache_entry_release_unlock(ce);
-+}
-+
-+
-+/*
-+ * mb_cache_entry_dup()
-+ *
-+ * Duplicate a handle to a cache entry (does not duplicate the cache entry
-+ * itself). After the call, both the old and the new handle must be released.
-+ */
-+struct mb_cache_entry *
-+mb_cache_entry_dup(struct mb_cache_entry *ce)
-+{
-+      atomic_inc(&ce->e_used);
-+      return ce;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_get()
-+ *
-+ * Get a cache entry  by device / block number. (There can only be one entry
-+ * in the cache per device and block.) Returns NULL if no such cache entry
-+ * exists.
-+ */
-+struct mb_cache_entry *
-+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block)
-+{
-+      unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count;
-+      struct list_head *l;
-+      struct mb_cache_entry *ce;
-+
-+      spin_lock(&mb_cache_spinlock);
-+      list_for_each(l, &cache->c_block_hash[bucket]) {
-+              ce = list_entry(l, struct mb_cache_entry, e_block_list);
-+              if (ce->e_dev == dev && ce->e_block == block) {
-+                      if (!list_empty(&ce->e_lru_list))
-+                              list_del_init(&ce->e_lru_list);
-+                      atomic_inc(&ce->e_used);
-+                      goto cleanup;
-+              }
-+      }
-+      ce = NULL;
-+
-+cleanup:
-+      spin_unlock(&mb_cache_spinlock);
-+      return ce;
-+}
-+
-+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-+
-+static struct mb_cache_entry *
-+__mb_cache_entry_find(struct list_head *l, struct list_head *head,
-+                    int index, kdev_t dev, unsigned int key)
-+{
-+      while (l != head) {
-+              struct mb_cache_entry *ce =
-+                      list_entry(l, struct mb_cache_entry,
-+                                 e_indexes[index].o_list);
-+              if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) {
-+                      if (!list_empty(&ce->e_lru_list))
-+                              list_del_init(&ce->e_lru_list);
-+                      atomic_inc(&ce->e_used);
-+                      return ce;
-+              }
-+              l = l->next;
-+      }
-+      return NULL;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_find_first()
-+ *
-+ * Find the first cache entry on a given device with a certain key in
-+ * an additional index. Additonal matches can be found with
-+ * mb_cache_entry_find_next(). Returns NULL if no match was found.
-+ *
-+ * @cache: the cache to search
-+ * @index: the number of the additonal index to search (0<=index<indexes_count)
-+ * @dev: the device the cache entry should belong to
-+ * @key: the key in the index
-+ */
-+struct mb_cache_entry *
-+mb_cache_entry_find_first(struct mb_cache *cache, int index, kdev_t dev,
-+                        unsigned int key)
-+{
-+      unsigned int bucket = key % cache->c_bucket_count;
-+      struct list_head *l;
-+      struct mb_cache_entry *ce;
-+
-+      mb_assert(index < mb_cache_indexes(cache));
-+      spin_lock(&mb_cache_spinlock);
-+      l = cache->c_indexes_hash[index][bucket].next;
-+      ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-+                                 index, dev, key);
-+      spin_unlock(&mb_cache_spinlock);
-+      return ce;
-+}
-+
-+
-+/*
-+ * mb_cache_entry_find_next()
-+ *
-+ * Find the next cache entry on a given device with a certain key in an
-+ * additional index. Returns NULL if no match could be found. The previous
-+ * entry is atomatically released, so that mb_cache_entry_find_next() can
-+ * be called like this:
-+ *
-+ * entry = mb_cache_entry_find_first();
-+ * while (entry) {
-+ *    ...
-+ *    entry = mb_cache_entry_find_next(entry, ...);
-+ * }
-+ *
-+ * @prev: The previous match
-+ * @index: the number of the additonal index to search (0<=index<indexes_count)
-+ * @dev: the device the cache entry should belong to
-+ * @key: the key in the index
-+ */
-+struct mb_cache_entry *
-+mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, kdev_t dev,
-+                       unsigned int key)
-+{
-+      struct mb_cache *cache = prev->e_cache;
-+      unsigned int bucket = key % cache->c_bucket_count;
-+      struct list_head *l;
-+      struct mb_cache_entry *ce;
-+
-+      mb_assert(index < mb_cache_indexes(cache));
-+      spin_lock(&mb_cache_spinlock);
-+      l = prev->e_indexes[index].o_list.next;
-+      ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-+                                 index, dev, key);
-+      __mb_cache_entry_release_unlock(prev);
-+      return ce;
-+}
-+
-+#endif  /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
-+
-+static int __init init_mbcache(void)
-+{
-+      register_cache(&mb_cache_definition);
-+      return 0;
-+}
-+
-+static void __exit exit_mbcache(void)
-+{
-+      unregister_cache(&mb_cache_definition);
-+}
-+
-+module_init(init_mbcache)
-+module_exit(exit_mbcache)
-+
-Index: linux-DRV401/fs/xattr.c
-===================================================================
---- linux-DRV401.orig/fs/xattr.c       2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/fs/xattr.c    2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,355 @@
-+/*
-+  File: fs/xattr.c
-+
-+  Extended attribute handling.
-+
-+  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
-+  Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
-+ */
-+#include <linux/fs.h>
-+#include <linux/slab.h>
-+#include <linux/vmalloc.h>
-+#include <linux/smp_lock.h>
-+#include <linux/file.h>
-+#include <linux/xattr.h>
-+#include <asm/uaccess.h>
-+
-+/*
-+ * Extended attribute memory allocation wrappers, originally
-+ * based on the Intermezzo PRESTO_ALLOC/PRESTO_FREE macros.
-+ * The vmalloc use here is very uncommon - extended attributes
-+ * are supposed to be small chunks of metadata, and it is quite
-+ * unusual to have very many extended attributes, so lists tend
-+ * to be quite short as well.  The 64K upper limit is derived
-+ * from the extended attribute size limit used by XFS.
-+ * Intentionally allow zero @size for value/list size requests.
-+ */
-+static void *
-+xattr_alloc(size_t size, size_t limit)
-+{
-+      void *ptr;
-+
-+      if (size > limit)
-+              return ERR_PTR(-E2BIG);
-+
-+      if (!size)      /* size request, no buffer is needed */
-+              return NULL;
-+      else if (size <= PAGE_SIZE)
-+              ptr = kmalloc((unsigned long) size, GFP_KERNEL);
-+      else
-+              ptr = vmalloc((unsigned long) size);
-+      if (!ptr)
-+              return ERR_PTR(-ENOMEM);
-+      return ptr;
-+}
-+
-+static void
-+xattr_free(void *ptr, size_t size)
-+{
-+      if (!size)      /* size request, no buffer was needed */
-+              return;
-+      else if (size <= PAGE_SIZE)
-+              kfree(ptr);
-+      else
-+              vfree(ptr);
-+}
-+
-+/*
-+ * Extended attribute SET operations
-+ */
-+static long
-+setxattr(struct dentry *d, char *name, void *value, size_t size, int flags)
-+{
-+      int error;
-+      void *kvalue;
-+      char kname[XATTR_NAME_MAX + 1];
-+
-+      if (flags & ~(XATTR_CREATE|XATTR_REPLACE))
-+              return -EINVAL;
-+
-+      error = strncpy_from_user(kname, name, sizeof(kname));
-+      if (error == 0 || error == sizeof(kname))
-+              error = -ERANGE;
-+      if (error < 0)
-+              return error;
-+
-+      kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
-+      if (IS_ERR(kvalue))
-+              return PTR_ERR(kvalue);
-+
-+      if (size > 0 && copy_from_user(kvalue, value, size)) {
-+              xattr_free(kvalue, size);
-+              return -EFAULT;
-+      }
-+
-+      error = -EOPNOTSUPP;
-+      if (d->d_inode->i_op && d->d_inode->i_op->setxattr) {
-+              down(&d->d_inode->i_sem);
-+              lock_kernel();
-+              error = d->d_inode->i_op->setxattr(d, kname, kvalue, size, flags);
-+              unlock_kernel();
-+              up(&d->d_inode->i_sem);
-+      }
-+
-+      xattr_free(kvalue, size);
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_setxattr(char *path, char *name, void *value, size_t size, int flags)
-+{
-+      struct nameidata nd;
-+      int error;
-+
-+      error = user_path_walk(path, &nd);
-+      if (error)
-+              return error;
-+      error = setxattr(nd.dentry, name, value, size, flags);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_lsetxattr(char *path, char *name, void *value, size_t size, int flags)
-+{
-+      struct nameidata nd;
-+      int error;
-+
-+      error = user_path_walk_link(path, &nd);
-+      if (error)
-+              return error;
-+      error = setxattr(nd.dentry, name, value, size, flags);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_fsetxattr(int fd, char *name, void *value, size_t size, int flags)
-+{
-+      struct file *f;
-+      int error = -EBADF;
-+
-+      f = fget(fd);
-+      if (!f)
-+              return error;
-+      error = setxattr(f->f_dentry, name, value, size, flags);
-+      fput(f);
-+      return error;
-+}
-+
-+/*
-+ * Extended attribute GET operations
-+ */
-+static ssize_t
-+getxattr(struct dentry *d, char *name, void *value, size_t size)
-+{
-+      ssize_t error;
-+      void *kvalue;
-+      char kname[XATTR_NAME_MAX + 1];
-+
-+      error = strncpy_from_user(kname, name, sizeof(kname));
-+      if (error == 0 || error == sizeof(kname))
-+              error = -ERANGE;
-+      if (error < 0)
-+              return error;
-+
-+      kvalue = xattr_alloc(size, XATTR_SIZE_MAX);
-+      if (IS_ERR(kvalue))
-+              return PTR_ERR(kvalue);
-+
-+      error = -EOPNOTSUPP;
-+      if (d->d_inode->i_op && d->d_inode->i_op->getxattr) {
-+              down(&d->d_inode->i_sem);
-+              lock_kernel();
-+              error = d->d_inode->i_op->getxattr(d, kname, kvalue, size);
-+              unlock_kernel();
-+              up(&d->d_inode->i_sem);
-+      }
-+
-+      if (kvalue && error > 0)
-+              if (copy_to_user(value, kvalue, error))
-+                      error = -EFAULT;
-+      xattr_free(kvalue, size);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_getxattr(char *path, char *name, void *value, size_t size)
-+{
-+      struct nameidata nd;
-+      ssize_t error;
-+
-+      error = user_path_walk(path, &nd);
-+      if (error)
-+              return error;
-+      error = getxattr(nd.dentry, name, value, size);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_lgetxattr(char *path, char *name, void *value, size_t size)
-+{
-+      struct nameidata nd;
-+      ssize_t error;
-+
-+      error = user_path_walk_link(path, &nd);
-+      if (error)
-+              return error;
-+      error = getxattr(nd.dentry, name, value, size);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_fgetxattr(int fd, char *name, void *value, size_t size)
-+{
-+      struct file *f;
-+      ssize_t error = -EBADF;
-+
-+      f = fget(fd);
-+      if (!f)
-+              return error;
-+      error = getxattr(f->f_dentry, name, value, size);
-+      fput(f);
-+      return error;
-+}
-+
-+/*
-+ * Extended attribute LIST operations
-+ */
-+static ssize_t
-+listxattr(struct dentry *d, char *list, size_t size)
-+{
-+      ssize_t error;
-+      char *klist;
-+
-+      klist = (char *)xattr_alloc(size, XATTR_LIST_MAX);
-+      if (IS_ERR(klist))
-+              return PTR_ERR(klist);
-+
-+      error = -EOPNOTSUPP;
-+      if (d->d_inode->i_op && d->d_inode->i_op->listxattr) {
-+              down(&d->d_inode->i_sem);
-+              lock_kernel();
-+              error = d->d_inode->i_op->listxattr(d, klist, size);
-+              unlock_kernel();
-+              up(&d->d_inode->i_sem);
-+      }
-+
-+      if (klist && error > 0)
-+              if (copy_to_user(list, klist, error))
-+                      error = -EFAULT;
-+      xattr_free(klist, size);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_listxattr(char *path, char *list, size_t size)
-+{
-+      struct nameidata nd;
-+      ssize_t error;
-+
-+      error = user_path_walk(path, &nd);
-+      if (error)
-+              return error;
-+      error = listxattr(nd.dentry, list, size);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_llistxattr(char *path, char *list, size_t size)
-+{
-+      struct nameidata nd;
-+      ssize_t error;
-+
-+      error = user_path_walk_link(path, &nd);
-+      if (error)
-+              return error;
-+      error = listxattr(nd.dentry, list, size);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage ssize_t
-+sys_flistxattr(int fd, char *list, size_t size)
-+{
-+      struct file *f;
-+      ssize_t error = -EBADF;
-+
-+      f = fget(fd);
-+      if (!f)
-+              return error;
-+      error = listxattr(f->f_dentry, list, size);
-+      fput(f);
-+      return error;
-+}
-+
-+/*
-+ * Extended attribute REMOVE operations
-+ */
-+static long
-+removexattr(struct dentry *d, char *name)
-+{
-+      int error;
-+      char kname[XATTR_NAME_MAX + 1];
-+
-+      error = strncpy_from_user(kname, name, sizeof(kname));
-+      if (error == 0 || error == sizeof(kname))
-+              error = -ERANGE;
-+      if (error < 0)
-+              return error;
-+
-+      error = -EOPNOTSUPP;
-+      if (d->d_inode->i_op && d->d_inode->i_op->removexattr) {
-+              down(&d->d_inode->i_sem);
-+              lock_kernel();
-+              error = d->d_inode->i_op->removexattr(d, kname);
-+              unlock_kernel();
-+              up(&d->d_inode->i_sem);
-+      }
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_removexattr(char *path, char *name)
-+{
-+      struct nameidata nd;
-+      int error;
-+
-+      error = user_path_walk(path, &nd);
-+      if (error)
-+              return error;
-+      error = removexattr(nd.dentry, name);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_lremovexattr(char *path, char *name)
-+{
-+      struct nameidata nd;
-+      int error;
-+
-+      error = user_path_walk_link(path, &nd);
-+      if (error)
-+              return error;
-+      error = removexattr(nd.dentry, name);
-+      path_release(&nd);
-+      return error;
-+}
-+
-+asmlinkage long
-+sys_fremovexattr(int fd, char *name)
-+{
-+      struct file *f;
-+      int error = -EBADF;
-+
-+      f = fget(fd);
-+      if (!f)
-+              return error;
-+      error = removexattr(f->f_dentry, name);
-+      fput(f);
-+      return error;
-+}
-Index: linux-DRV401/include/linux/cache_def.h
-===================================================================
---- linux-DRV401.orig/include/linux/cache_def.h        2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/include/linux/cache_def.h     2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,15 @@
-+/*
-+ * linux/cache_def.h
-+ * Handling of caches defined in drivers, filesystems, ...
-+ *
-+ * Copyright (C) 2002 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+ */
-+
-+struct cache_definition {
-+      const char *name;
-+      void (*shrink)(int, unsigned int);
-+      struct list_head link;
-+};
-+
-+extern void register_cache(struct cache_definition *);
-+extern void unregister_cache(struct cache_definition *);
-Index: linux-DRV401/include/linux/errno.h
-===================================================================
---- linux-DRV401.orig/include/linux/errno.h    2004-10-15 10:26:15.000000000 -0700
-+++ linux-DRV401/include/linux/errno.h 2004-10-15 11:03:52.000000000 -0700
-@@ -23,4 +23,8 @@
- #endif
-+/* Defined for extended attributes */
-+#define ENOATTR ENODATA               /* No such attribute */
-+#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
-+
- #endif
-Index: linux-DRV401/include/linux/ext2_fs.h
-===================================================================
---- linux-DRV401.orig/include/linux/ext2_fs.h  2004-10-15 10:26:11.000000000 -0700
-+++ linux-DRV401/include/linux/ext2_fs.h       2004-10-15 11:03:52.000000000 -0700
-@@ -57,8 +57,6 @@
-  */
- #define       EXT2_BAD_INO             1      /* Bad blocks inode */
- #define EXT2_ROOT_INO          2      /* Root inode */
--#define EXT2_ACL_IDX_INO       3      /* ACL inode */
--#define EXT2_ACL_DATA_INO      4      /* ACL inode */
- #define EXT2_BOOT_LOADER_INO   5      /* Boot loader inode */
- #define EXT2_UNDEL_DIR_INO     6      /* Undelete directory inode */
-@@ -86,7 +84,6 @@
- #else
- # define EXT2_BLOCK_SIZE(s)           (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size)
- #endif
--#define EXT2_ACLE_PER_BLOCK(s)                (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry))
- #define       EXT2_ADDR_PER_BLOCK(s)          (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
- #ifdef __KERNEL__
- # define EXT2_BLOCK_SIZE_BITS(s)      ((s)->s_blocksize_bits)
-@@ -121,28 +118,6 @@
- #endif
- /*
-- * ACL structures
-- */
--struct ext2_acl_header        /* Header of Access Control Lists */
--{
--      __u32   aclh_size;
--      __u32   aclh_file_count;
--      __u32   aclh_acle_count;
--      __u32   aclh_first_acle;
--};
--
--struct ext2_acl_entry /* Access Control List Entry */
--{
--      __u32   acle_size;
--      __u16   acle_perms;     /* Access permissions */
--      __u16   acle_type;      /* Type of entry */
--      __u16   acle_tag;       /* User or group identity */
--      __u16   acle_pad1;
--      __u32   acle_next;      /* Pointer on next entry for the */
--                                      /* same inode or on next free entry */
--};
--
--/*
-  * Structure of a blocks group descriptor
-  */
- struct ext2_group_desc
-@@ -314,6 +289,7 @@
- #define EXT2_MOUNT_ERRORS_PANIC               0x0040  /* Panic on errors */
- #define EXT2_MOUNT_MINIX_DF           0x0080  /* Mimics the Minix statfs */
- #define EXT2_MOUNT_NO_UID32           0x0200  /* Disable 32-bit UIDs */
-+#define EXT2_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- #define clear_opt(o, opt)             o &= ~EXT2_MOUNT_##opt
- #define set_opt(o, opt)                       o |= EXT2_MOUNT_##opt
-@@ -397,6 +373,7 @@
- #ifdef __KERNEL__
- #define EXT2_SB(sb)   (&((sb)->u.ext2_sb))
-+#define EXT2_I(inode) (&((inode)->u.ext2_i))
- #else
- /* Assume that user mode programs are passing in an ext2fs superblock, not
-  * a kernel struct super_block.  This will allow us to call the feature-test
-@@ -466,7 +443,7 @@
- #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008
- #define EXT2_FEATURE_INCOMPAT_ANY             0xffffffff
--#define EXT2_FEATURE_COMPAT_SUPP      0
-+#define EXT2_FEATURE_COMPAT_SUPP      EXT2_FEATURE_COMPAT_EXT_ATTR
- #define EXT2_FEATURE_INCOMPAT_SUPP    EXT2_FEATURE_INCOMPAT_FILETYPE
- #define EXT2_FEATURE_RO_COMPAT_SUPP   (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-                                        EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@
- /* namei.c */
- extern struct inode_operations ext2_dir_inode_operations;
-+extern struct inode_operations ext2_special_inode_operations;
- /* symlink.c */
-+extern struct inode_operations ext2_symlink_inode_operations;
- extern struct inode_operations ext2_fast_symlink_inode_operations;
- #endif        /* __KERNEL__ */
-Index: linux-DRV401/include/linux/ext2_xattr.h
-===================================================================
---- linux-DRV401.orig/include/linux/ext2_xattr.h       2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/include/linux/ext2_xattr.h    2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,157 @@
-+/*
-+  File: linux/ext2_xattr.h
-+
-+  On-disk format of extended attributes for the ext2 filesystem.
-+
-+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+#include <linux/config.h>
-+#include <linux/init.h>
-+#include <linux/xattr.h>
-+
-+/* Magic value in attribute blocks */
-+#define EXT2_XATTR_MAGIC              0xEA020000
-+
-+/* Maximum number of references to one attribute block */
-+#define EXT2_XATTR_REFCOUNT_MAX               1024
-+
-+/* Name indexes */
-+#define EXT2_XATTR_INDEX_MAX                  10
-+#define EXT2_XATTR_INDEX_USER                 1
-+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS     2
-+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT    3
-+
-+struct ext2_xattr_header {
-+      __u32   h_magic;        /* magic number for identification */
-+      __u32   h_refcount;     /* reference count */
-+      __u32   h_blocks;       /* number of disk blocks used */
-+      __u32   h_hash;         /* hash value of all attributes */
-+      __u32   h_reserved[4];  /* zero right now */
-+};
-+
-+struct ext2_xattr_entry {
-+      __u8    e_name_len;     /* length of name */
-+      __u8    e_name_index;   /* attribute name index */
-+      __u16   e_value_offs;   /* offset in disk block of value */
-+      __u32   e_value_block;  /* disk block attribute is stored on (n/i) */
-+      __u32   e_value_size;   /* size of attribute value */
-+      __u32   e_hash;         /* hash value of name and value */
-+      char    e_name[0];      /* attribute name */
-+};
-+
-+#define EXT2_XATTR_PAD_BITS           2
-+#define EXT2_XATTR_PAD                (1<<EXT2_XATTR_PAD_BITS)
-+#define EXT2_XATTR_ROUND              (EXT2_XATTR_PAD-1)
-+#define EXT2_XATTR_LEN(name_len) \
-+      (((name_len) + EXT2_XATTR_ROUND + \
-+      sizeof(struct ext2_xattr_entry)) & ~EXT2_XATTR_ROUND)
-+#define EXT2_XATTR_NEXT(entry) \
-+      ( (struct ext2_xattr_entry *)( \
-+        (char *)(entry) + EXT2_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT2_XATTR_SIZE(size) \
-+      (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
-+
-+#ifdef __KERNEL__
-+
-+# ifdef CONFIG_EXT2_FS_XATTR
-+
-+struct ext2_xattr_handler {
-+      char *prefix;
-+      size_t (*list)(char *list, struct inode *inode, const char *name,
-+                     int name_len);
-+      int (*get)(struct inode *inode, const char *name, void *buffer,
-+                 size_t size);
-+      int (*set)(struct inode *inode, const char *name, const void *buffer,
-+                 size_t size, int flags);
-+};
-+
-+extern int ext2_xattr_register(int, struct ext2_xattr_handler *);
-+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *);
-+
-+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t);
-+extern int ext2_removexattr(struct dentry *, const char *);
-+
-+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext2_xattr_list(struct inode *, char *, size_t);
-+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-+
-+extern void ext2_xattr_delete_inode(struct inode *);
-+extern void ext2_xattr_put_super(struct super_block *);
-+
-+extern int init_ext2_xattr(void) __init;
-+extern void exit_ext2_xattr(void);
-+
-+# else  /* CONFIG_EXT2_FS_XATTR */
-+#  define ext2_setxattr               NULL
-+#  define ext2_getxattr               NULL
-+#  define ext2_listxattr      NULL
-+#  define ext2_removexattr    NULL
-+
-+static inline int
-+ext2_xattr_get(struct inode *inode, int name_index,
-+             const char *name, void *buffer, size_t size)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline int
-+ext2_xattr_list(struct inode *inode, char *buffer, size_t size)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline int
-+ext2_xattr_set(struct inode *inode, int name_index, const char *name,
-+             const void *value, size_t size, int flags)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline void
-+ext2_xattr_delete_inode(struct inode *inode)
-+{
-+}
-+
-+static inline void
-+ext2_xattr_put_super(struct super_block *sb)
-+{
-+}
-+
-+static inline int
-+init_ext2_xattr(void)
-+{
-+      return 0;
-+}
-+
-+static inline void
-+exit_ext2_xattr(void)
-+{
-+}
-+
-+# endif  /* CONFIG_EXT2_FS_XATTR */
-+
-+# ifdef CONFIG_EXT2_FS_XATTR_USER
-+
-+extern int init_ext2_xattr_user(void) __init;
-+extern void exit_ext2_xattr_user(void);
-+
-+# else  /* CONFIG_EXT2_FS_XATTR_USER */
-+
-+static inline int
-+init_ext2_xattr_user(void)
-+{
-+      return 0;
-+}
-+
-+static inline void
-+exit_ext2_xattr_user(void)
-+{
-+}
-+
-+# endif  /* CONFIG_EXT2_FS_XATTR_USER */
-+
-+#endif  /* __KERNEL__ */
-+
-Index: linux-DRV401/include/linux/ext3_fs.h
-===================================================================
---- linux-DRV401.orig/include/linux/ext3_fs.h  2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/include/linux/ext3_fs.h       2004-10-15 11:03:52.000000000 -0700
-@@ -63,8 +63,6 @@
-  */
- #define       EXT3_BAD_INO             1      /* Bad blocks inode */
- #define EXT3_ROOT_INO          2      /* Root inode */
--#define EXT3_ACL_IDX_INO       3      /* ACL inode */
--#define EXT3_ACL_DATA_INO      4      /* ACL inode */
- #define EXT3_BOOT_LOADER_INO   5      /* Boot loader inode */
- #define EXT3_UNDEL_DIR_INO     6      /* Undelete directory inode */
- #define EXT3_RESIZE_INO                7      /* Reserved group descriptors inode */
-@@ -94,7 +92,6 @@
- #else
- # define EXT3_BLOCK_SIZE(s)           (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
- #endif
--#define EXT3_ACLE_PER_BLOCK(s)                (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
- #define       EXT3_ADDR_PER_BLOCK(s)          (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
- #ifdef __KERNEL__
- # define EXT3_BLOCK_SIZE_BITS(s)      ((s)->s_blocksize_bits)
-@@ -129,28 +126,6 @@
- #endif
- /*
-- * ACL structures
-- */
--struct ext3_acl_header        /* Header of Access Control Lists */
--{
--      __u32   aclh_size;
--      __u32   aclh_file_count;
--      __u32   aclh_acle_count;
--      __u32   aclh_first_acle;
--};
--
--struct ext3_acl_entry /* Access Control List Entry */
--{
--      __u32   acle_size;
--      __u16   acle_perms;     /* Access permissions */
--      __u16   acle_type;      /* Type of entry */
--      __u16   acle_tag;       /* User or group identity */
--      __u16   acle_pad1;
--      __u32   acle_next;      /* Pointer on next entry for the */
--                                      /* same inode or on next free entry */
--};
--
--/*
-  * Structure of a blocks group descriptor
-  */
- struct ext3_group_desc
-@@ -344,6 +319,7 @@
-   #define EXT3_MOUNT_WRITEBACK_DATA   0x0C00  /* No data ordering */
- #define EXT3_MOUNT_UPDATE_JOURNAL     0x1000  /* Update the journal format */
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
-+#define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@
- #define EXT3_FEATURE_INCOMPAT_RECOVER         0x0004 /* Needs recovery */
- #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV     0x0008 /* Journal device */
--#define EXT3_FEATURE_COMPAT_SUPP      0
-+#define EXT3_FEATURE_COMPAT_SUPP      EXT2_FEATURE_COMPAT_EXT_ATTR
- #define EXT3_FEATURE_INCOMPAT_SUPP    (EXT3_FEATURE_INCOMPAT_FILETYPE| \
-                                        EXT3_FEATURE_INCOMPAT_RECOVER)
- #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@
- extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
- /* inode.c */
-+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
- extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
- extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -771,8 +748,10 @@
- /* namei.c */
- extern struct inode_operations ext3_dir_inode_operations;
-+extern struct inode_operations ext3_special_inode_operations;
- /* symlink.c */
-+extern struct inode_operations ext3_symlink_inode_operations;
- extern struct inode_operations ext3_fast_symlink_inode_operations;
-Index: linux-DRV401/include/linux/ext3_jbd.h
-===================================================================
---- linux-DRV401.orig/include/linux/ext3_jbd.h 2004-10-15 10:39:16.000000000 -0700
-+++ linux-DRV401/include/linux/ext3_jbd.h      2004-10-15 11:03:52.000000000 -0700
-@@ -30,13 +30,19 @@
- #define EXT3_SINGLEDATA_TRANS_BLOCKS  8
-+/* Extended attributes may touch two data buffers, two bitmap buffers,
-+ * and two group and summaries. */
-+
-+#define EXT3_XATTR_TRANS_BLOCKS               8
-+
- /* Define the minimum size for a transaction which modifies data.  This
-  * needs to take into account the fact that we may end up modifying two
-  * quota files too (one for the group, one for the user quota).  The
-  * superblock only gets updated once, of course, so don't bother
-  * counting that again for the quota updates. */
--#define EXT3_DATA_TRANS_BLOCKS                (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
-+#define EXT3_DATA_TRANS_BLOCKS                (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
-+                                       EXT3_XATTR_TRANS_BLOCKS - 2)
- extern int ext3_writepage_trans_blocks(struct inode *inode);
-Index: linux-DRV401/include/linux/ext3_xattr.h
-===================================================================
---- linux-DRV401.orig/include/linux/ext3_xattr.h       2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/include/linux/ext3_xattr.h    2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,157 @@
-+/*
-+  File: linux/ext3_xattr.h
-+
-+  On-disk format of extended attributes for the ext3 filesystem.
-+
-+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+#include <linux/config.h>
-+#include <linux/init.h>
-+#include <linux/xattr.h>
-+
-+/* Magic value in attribute blocks */
-+#define EXT3_XATTR_MAGIC              0xEA020000
-+
-+/* Maximum number of references to one attribute block */
-+#define EXT3_XATTR_REFCOUNT_MAX               1024
-+
-+/* Name indexes */
-+#define EXT3_XATTR_INDEX_MAX                  10
-+#define EXT3_XATTR_INDEX_USER                 1
-+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS     2
-+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT    3
-+
-+struct ext3_xattr_header {
-+      __u32   h_magic;        /* magic number for identification */
-+      __u32   h_refcount;     /* reference count */
-+      __u32   h_blocks;       /* number of disk blocks used */
-+      __u32   h_hash;         /* hash value of all attributes */
-+      __u32   h_reserved[4];  /* zero right now */
-+};
-+
-+struct ext3_xattr_entry {
-+      __u8    e_name_len;     /* length of name */
-+      __u8    e_name_index;   /* attribute name index */
-+      __u16   e_value_offs;   /* offset in disk block of value */
-+      __u32   e_value_block;  /* disk block attribute is stored on (n/i) */
-+      __u32   e_value_size;   /* size of attribute value */
-+      __u32   e_hash;         /* hash value of name and value */
-+      char    e_name[0];      /* attribute name */
-+};
-+
-+#define EXT3_XATTR_PAD_BITS           2
-+#define EXT3_XATTR_PAD                (1<<EXT3_XATTR_PAD_BITS)
-+#define EXT3_XATTR_ROUND              (EXT3_XATTR_PAD-1)
-+#define EXT3_XATTR_LEN(name_len) \
-+      (((name_len) + EXT3_XATTR_ROUND + \
-+      sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-+#define EXT3_XATTR_NEXT(entry) \
-+      ( (struct ext3_xattr_entry *)( \
-+        (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT3_XATTR_SIZE(size) \
-+      (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
-+
-+#ifdef __KERNEL__
-+
-+# ifdef CONFIG_EXT3_FS_XATTR
-+
-+struct ext3_xattr_handler {
-+      char *prefix;
-+      size_t (*list)(char *list, struct inode *inode, const char *name,
-+                     int name_len);
-+      int (*get)(struct inode *inode, const char *name, void *buffer,
-+                 size_t size);
-+      int (*set)(struct inode *inode, const char *name, const void *buffer,
-+                 size_t size, int flags);
-+};
-+
-+extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
-+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
-+
-+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
-+extern int ext3_removexattr(struct dentry *, const char *);
-+
-+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext3_xattr_list(struct inode *, char *, size_t);
-+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int);
-+
-+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-+extern void ext3_xattr_put_super(struct super_block *);
-+
-+extern int init_ext3_xattr(void) __init;
-+extern void exit_ext3_xattr(void);
-+
-+# else  /* CONFIG_EXT3_FS_XATTR */
-+#  define ext3_setxattr               NULL
-+#  define ext3_getxattr               NULL
-+#  define ext3_listxattr      NULL
-+#  define ext3_removexattr    NULL
-+
-+static inline int
-+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-+             void *buffer, size_t size)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline int
-+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline int
-+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
-+             const char *name, const void *value, size_t size, int flags)
-+{
-+      return -ENOTSUP;
-+}
-+
-+static inline void
-+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+{
-+}
-+
-+static inline void
-+ext3_xattr_put_super(struct super_block *sb)
-+{
-+}
-+
-+static inline int
-+init_ext3_xattr(void)
-+{
-+      return 0;
-+}
-+
-+static inline void
-+exit_ext3_xattr(void)
-+{
-+}
-+
-+# endif  /* CONFIG_EXT3_FS_XATTR */
-+
-+# ifdef CONFIG_EXT3_FS_XATTR_USER
-+
-+extern int init_ext3_xattr_user(void) __init;
-+extern void exit_ext3_xattr_user(void);
-+
-+# else  /* CONFIG_EXT3_FS_XATTR_USER */
-+
-+static inline int
-+init_ext3_xattr_user(void)
-+{
-+      return 0;
-+}
-+
-+static inline void
-+exit_ext3_xattr_user(void)
-+{
-+}
-+
-+#endif  /* CONFIG_EXT3_FS_XATTR_USER */
-+
-+#endif  /* __KERNEL__ */
-+
-Index: linux-DRV401/include/linux/fs.h
-===================================================================
---- linux-DRV401.orig/include/linux/fs.h       2004-10-15 10:39:15.000000000 -0700
-+++ linux-DRV401/include/linux/fs.h    2004-10-15 11:03:52.000000000 -0700
-@@ -936,6 +936,10 @@
-       int (*setattr) (struct dentry *, struct iattr *);
-       int (*setattr_raw) (struct inode *, struct iattr *);
-       int (*getattr) (struct dentry *, struct iattr *);
-+      int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
-+      ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-+      ssize_t (*listxattr) (struct dentry *, char *, size_t);
-+      int (*removexattr) (struct dentry *, const char *);
- };
- struct seq_file;
-Index: linux-DRV401/include/linux/mbcache.h
-===================================================================
---- linux-DRV401.orig/include/linux/mbcache.h  2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/include/linux/mbcache.h       2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,69 @@
-+/*
-+  File: linux/mbcache.h
-+
-+  (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+/* Hardwire the number of additional indexes */
-+#define MB_CACHE_INDEXES_COUNT 1
-+
-+struct mb_cache_entry;
-+
-+struct mb_cache_op {
-+      int (*free)(struct mb_cache_entry *, int);
-+};
-+
-+struct mb_cache {
-+      struct list_head                c_cache_list;
-+      const char                      *c_name;
-+      struct mb_cache_op              c_op;
-+      atomic_t                        c_entry_count;
-+      int                             c_bucket_count;
-+#ifndef MB_CACHE_INDEXES_COUNT
-+      int                             c_indexes_count;
-+#endif
-+      kmem_cache_t                    *c_entry_cache;
-+      struct list_head                *c_block_hash;
-+      struct list_head                *c_indexes_hash[0];
-+};
-+
-+struct mb_cache_entry_index {
-+      struct list_head                o_list;
-+      unsigned int                    o_key;
-+};
-+
-+struct mb_cache_entry {
-+      struct list_head                e_lru_list;
-+      struct mb_cache                 *e_cache;
-+      atomic_t                        e_used;
-+      kdev_t                          e_dev;
-+      unsigned long                   e_block;
-+      struct list_head                e_block_list;
-+      struct mb_cache_entry_index     e_indexes[0];
-+};
-+
-+/* Functions on caches */
-+
-+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t,
-+                                int, int);
-+void mb_cache_shrink(struct mb_cache *, kdev_t);
-+void mb_cache_destroy(struct mb_cache *);
-+
-+/* Functions on cache entries */
-+
-+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *);
-+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long,
-+                        unsigned int[]);
-+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]);
-+void mb_cache_entry_release(struct mb_cache_entry *);
-+void mb_cache_entry_takeout(struct mb_cache_entry *);
-+void mb_cache_entry_free(struct mb_cache_entry *);
-+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *);
-+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t,
-+                                        unsigned long);
-+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int,
-+                                               kdev_t, unsigned int);
-+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
-+                                              kdev_t, unsigned int);
-+#endif
-Index: linux-DRV401/include/linux/xattr.h
-===================================================================
---- linux-DRV401.orig/include/linux/xattr.h    2004-10-12 08:56:38.404764448 -0700
-+++ linux-DRV401/include/linux/xattr.h 2004-10-15 11:03:52.000000000 -0700
-@@ -0,0 +1,15 @@
-+/*
-+  File: linux/xattr.h
-+
-+  Extended attributes handling.
-+
-+  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
-+  Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
-+*/
-+#ifndef _LINUX_XATTR_H
-+#define _LINUX_XATTR_H
-+
-+#define XATTR_CREATE  0x1     /* set the value, fail if attr already exists */
-+#define XATTR_REPLACE 0x2     /* set the value, fail if attr does not exist */
-+
-+#endif        /* _LINUX_XATTR_H */
-Index: linux-DRV401/include/linux/limits.h
-===================================================================
---- linux-DRV401.orig/include/linux/limits.h   2004-10-15 10:26:20.000000000 -0700
-+++ linux-DRV401/include/linux/limits.h        2004-10-15 11:03:52.000000000 -0700
-@@ -13,6 +13,9 @@
- #define NAME_MAX         255  /* # chars in a file name */
- #define PATH_MAX        4096  /* # chars in a path name including nul */
- #define PIPE_BUF        4096  /* # bytes in atomic write to a pipe */
-+#define XATTR_NAME_MAX   255  /* # chars in an extended attribute name */
-+#define XATTR_SIZE_MAX 65536  /* size of an extended attribute value (64k) */
-+#define XATTR_LIST_MAX 65536  /* size of extended attribute namelist (64k) */
- #define RTSIG_MAX       32
-Index: linux-DRV401/kernel/ksyms.c
-===================================================================
---- linux-DRV401.orig/kernel/ksyms.c   2004-10-15 10:39:15.000000000 -0700
-+++ linux-DRV401/kernel/ksyms.c        2004-10-15 11:03:52.000000000 -0700
-@@ -11,6 +11,7 @@
- #include <linux/config.h>
- #include <linux/slab.h>
-+#include <linux/cache_def.h>
- #include <linux/module.h>
- #include <linux/blkdev.h>
- #include <linux/cdrom.h>
-@@ -88,6 +89,7 @@
- EXPORT_SYMBOL(exit_files);
- EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
-+EXPORT_SYMBOL(copy_fs_struct);
- EXPORT_SYMBOL(unshare_files);
- /* internal kernel memory management */
-@@ -105,6 +107,8 @@
- EXPORT_SYMBOL(kmem_cache_shrink);
- EXPORT_SYMBOL(kmem_cache_alloc);
- EXPORT_SYMBOL(kmem_cache_free);
-+EXPORT_SYMBOL(register_cache);
-+EXPORT_SYMBOL(unregister_cache);
- EXPORT_SYMBOL(kmalloc);
- EXPORT_SYMBOL(kfree);
- EXPORT_SYMBOL(vfree);
-Index: linux-DRV401/mm/vmscan.c
-===================================================================
---- linux-DRV401.orig/mm/vmscan.c      2004-10-15 10:24:07.000000000 -0700
-+++ linux-DRV401/mm/vmscan.c   2004-10-15 11:08:53.000000000 -0700
-@@ -15,6 +15,7 @@
- #include <linux/kernel_stat.h>
- #include <linux/swap.h>
- #include <linux/swapctl.h>
-+#include <linux/cache_def.h>
- #include <linux/smp_lock.h>
- #include <linux/pagemap.h>
- #include <linux/init.h>
-@@ -31,6 +32,39 @@
-  */
- #define DEF_PRIORITY (6)
-+static DECLARE_MUTEX(other_caches_sem);
-+static LIST_HEAD(cache_definitions);
-+
-+void register_cache(struct cache_definition *cache)
-+{
-+      down(&other_caches_sem);
-+      list_add(&cache->link, &cache_definitions);
-+      up(&other_caches_sem);
-+}
-+
-+void unregister_cache(struct cache_definition *cache)
-+{
-+      down(&other_caches_sem);
-+      list_del(&cache->link);
-+      up(&other_caches_sem);
-+}
-+
-+static void shrink_other_caches(unsigned int priority, int gfp_mask)
-+{
-+      struct list_head *p;
-+
-+      if (down_trylock(&other_caches_sem))
-+              return;
-+
-+      list_for_each_prev(p, &cache_definitions) {
-+              struct cache_definition *cache =
-+                      list_entry(p, struct cache_definition, link);
-+
-+              cache->shrink(priority, gfp_mask);
-+      }
-+      up(&other_caches_sem);
-+}
-+
- /*
-  * The swap-out function returns 1 if it successfully
-  * scanned all the pages it was asked to (`count').
-@@ -584,6 +618,7 @@
-       shrink_dcache_memory(priority, gfp_mask);
-       shrink_icache_memory(priority, gfp_mask);
-+      shrink_other_caches(priority, gfp_mask);
- #ifdef CONFIG_QUOTA
-       shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
- #endif
diff --git a/lustre/kernel_patches/patches/linux-2.4.19-suse-xattr-0.8.54-hp.patch b/lustre/kernel_patches/patches/linux-2.4.19-suse-xattr-0.8.54-hp.patch
deleted file mode 100644 (file)
index 1becfbc..0000000
+++ /dev/null
@@ -1,346 +0,0 @@
- Documentation/Configure.help  |   66 ++
- arch/ia64/defconfig           |    7 
- fs/Config.in                  |   14 
- fs/Makefile                   |    3 
- fs/ext2/Makefile              |    4 
- fs/ext2/file.c                |    5 
- fs/ext2/ialloc.c              |    2 
- fs/ext2/inode.c               |   34 -
- fs/ext2/namei.c               |   14 
- fs/ext2/super.c               |   29 
- fs/ext2/symlink.c             |   14 
- fs/ext2/xattr.c               | 1212 +++++++++++++++++++++++++++++++++++++++++
- fs/ext2/xattr_user.c          |  103 +++
- fs/ext3/Makefile              |    9 
- fs/ext3/ext3-exports.c        |   13 
- fs/ext3/file.c                |    5 
- fs/ext3/ialloc.c              |    2 
- fs/ext3/inode.c               |   35 -
- fs/ext3/namei.c               |   21 
- fs/ext3/super.c               |   36 +
- fs/ext3/symlink.c             |   14 
- fs/ext3/xattr.c               | 1225 ++++++++++++++++++++++++++++++++++++++++++
- fs/ext3/xattr_user.c          |  111 +++
- fs/jfs/jfs_xattr.h            |    6 
- fs/jfs/xattr.c                |    6 
- fs/mbcache.c                  |  648 ++++++++++++++++++++++
- include/linux/cache_def.h     |   15 
- include/linux/errno.h         |    4 
- include/linux/ext2_fs.h       |   31 -
- include/linux/ext2_xattr.h    |  157 +++++
- include/linux/ext3_fs.h       |   31 -
- include/linux/ext3_jbd.h      |    8 
- include/linux/ext3_xattr.h    |  157 +++++
- include/linux/fs.h            |    2 
- include/linux/mbcache.h       |   69 ++
- kernel/ksyms.c                |    4 
- mm/vmscan.c                   |   35 +
- 62 files changed, 4343 insertions(+), 182 deletions(-)
-
-Index: linux-2.4.19.SuSE/Documentation/Configure.help
-===================================================================
---- linux-2.4.19.SuSE.orig/Documentation/Configure.help        2004-05-03 11:20:17.000000000 -0700
-+++ linux-2.4.19.SuSE/Documentation/Configure.help     2004-05-03 11:50:22.000000000 -0700
-@@ -15296,6 +15296,39 @@
-   If unsure, say N.
-+Ext2 extended attributes
-+CONFIG_EXT2_FS_XATTR
-+  Extended attributes are name:value pairs associated with inodes by
-+  the kernel or by users (see the attr(5) manual page, or visit
-+  <http://acl.bestbits.at/> for details).
-+
-+  If unsure, say N.
-+
-+Ext2 extended attribute block sharing
-+CONFIG_EXT2_FS_XATTR_SHARING
-+  This options enables code for sharing identical extended attribute
-+  blocks among multiple inodes.
-+
-+  Usually, say Y.
-+
-+Ext2 extended user attributes
-+CONFIG_EXT2_FS_XATTR_USER
-+  This option enables extended user attributes on ext2. Processes can
-+  associate extended user attributes with inodes to store additional
-+  information such as the character encoding of files, etc. (see the
-+  attr(5) manual page, or visit <http://acl.bestbits.at/> for details).
-+
-+  If unsure, say N.
-+
-+Ext2 trusted extended attributes
-+CONFIG_EXT2_FS_XATTR_TRUSTED
-+  This option enables extended attributes on ext2 that are accessible
-+  (and visible) only to users capable of CAP_SYS_ADMIN. Usually this
-+  is only the super user. Trusted extended attributes are meant for
-+  implementing system/security services.
-+
-+  If unsure, say N.
-+
- Ext3 journalling file system support (EXPERIMENTAL)
- CONFIG_EXT3_FS
-   This is the journalling version of the Second extended file system
-@@ -15354,6 +15387,39 @@
-   If unsure, say N.
-+Ext3 extended attributes
-+CONFIG_EXT3_FS_XATTR
-+  Extended attributes are name:value pairs associated with inodes by
-+  the kernel or by users (see the attr(5) manual page, or visit
-+  <http://acl.bestbits.at/> for details).
-+
-+  If unsure, say N.
-+
-+Ext3 extended attribute block sharing
-+CONFIG_EXT3_FS_XATTR_SHARING
-+  This options enables code for sharing identical extended attribute
-+  blocks among multiple inodes.
-+
-+  Usually, say Y.
-+
-+Ext3 extended user attributes
-+CONFIG_EXT3_FS_XATTR_USER
-+  This option enables extended user attributes on ext3. Processes can
-+  associate extended user attributes with inodes to store additional
-+  information such as the character encoding of files, etc. (see the
-+  attr(5) manual page, or visit <http://acl.bestbits.at/> for details).
-+
-+  If unsure, say N.
-+
-+Ext3 trusted extended attributes
-+CONFIG_EXT3_FS_XATTR_TRUSTED
-+  This option enables extended attributes on ext3 that are accessible
-+  (and visible) only to users capable of CAP_SYS_ADMIN. Usually this
-+  is only the super user. Trusted extended attributes are meant for
-+  implementing system/security services.
-+
-+  If unsure, say N.
-+
- Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
- CONFIG_JBD
-   This is a generic journalling layer for block devices.  It is
-Index: linux-2.4.19.SuSE/arch/ia64/defconfig
-===================================================================
---- linux-2.4.19.SuSE.orig/arch/ia64/defconfig 2004-05-03 11:19:10.000000000 -0700
-+++ linux-2.4.19.SuSE/arch/ia64/defconfig      2004-05-03 11:50:22.000000000 -0700
-@@ -1,6 +1,13 @@
- #
- # Automatically generated make config: don't edit
- #
-+CONFIG_EXT3_FS_XATTR=y
-+# CONFIG_EXT3_FS_XATTR_SHARING is not set
-+# CONFIG_EXT3_FS_XATTR_USER is not set
-+# CONFIG_EXT2_FS_XATTR is not set
-+# CONFIG_EXT2_FS_XATTR_SHARING is not set
-+# CONFIG_EXT2_FS_XATTR_USER is not set
-+# CONFIG_FS_MBCACHE is not set
- #
- # Code maturity level options
-Index: linux-2.4.19.SuSE/fs/Config.in
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/Config.in        2004-05-03 11:18:52.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/Config.in     2004-05-03 11:50:22.000000000 -0700
-@@ -203,6 +203,10 @@
- #tristate 'Meta block cache' CONFIG_FS_MBCACHE
- define_tristate CONFIG_FS_MBCACHE y
-+# Meta block cache for Extended Attributes (ext2/ext3)
-+#tristate 'Meta block cache' CONFIG_FS_MBCACHE
-+define_tristate CONFIG_FS_MBCACHE y
-+
- mainmenu_option next_comment
- comment 'Partition Types'
- source fs/partitions/Config.in
-Index: linux-2.4.19.SuSE/fs/Makefile
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/Makefile 2004-05-03 11:22:49.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/Makefile      2004-05-03 11:50:22.000000000 -0700
-@@ -104,6 +104,9 @@
- obj-$(CONFIG_FS_MBCACHE)      += mbcache.o
- obj-$(CONFIG_FS_POSIX_ACL)    += posix_acl.o xattr_acl.o
-+export-objs += mbcache.o
-+obj-$(CONFIG_FS_MBCACHE)      += mbcache.o
-+
- # persistent filesystems
- obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
-Index: linux-2.4.19.SuSE/fs/ext2/Makefile
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/Makefile    2004-05-03 11:18:46.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext2/Makefile 2004-05-03 11:50:22.000000000 -0700
-@@ -18,4 +18,8 @@
- obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
- obj-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
-+export-objs += xattr.o
-+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o
-+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
-+
- include $(TOPDIR)/Rules.make
-Index: linux-2.4.19.SuSE/fs/ext2/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/inode.c     2004-05-03 11:18:47.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext2/inode.c  2004-05-03 11:50:22.000000000 -0700
-@@ -52,6 +52,18 @@
- }
- /*
-+ * Test whether an inode is a fast symlink.
-+ */
-+static inline int ext2_inode_is_fast_symlink(struct inode *inode)
-+{
-+      int ea_blocks = inode->u.ext2_i.i_file_acl ?
-+              (inode->i_sb->s_blocksize >> 9) : 0;
-+
-+      return (S_ISLNK(inode->i_mode) &&
-+              inode->i_blocks - ea_blocks == 0);
-+}
-+
-+/*
-  * Called at each iput()
-  */
- void ext2_put_inode (struct inode * inode)
-@@ -806,6 +818,8 @@
-               return;
-       if (ext2_inode_is_fast_symlink(inode))
-               return;
-+      if (ext2_inode_is_fast_symlink(inode))
-+              return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
-Index: linux-2.4.19.SuSE/fs/ext2/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/super.c     2004-05-03 11:18:47.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext2/super.c  2004-05-03 11:50:22.000000000 -0700
-@@ -70,6 +70,7 @@
- {
-       va_list args;
-+      ext2_xattr_put_super(sb);
-       if (!(sb->s_flags & MS_RDONLY)) {
-               sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
-               sb->u.ext2_sb.s_es->s_state =
-Index: linux-2.4.19.SuSE/fs/ext3/inode.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/inode.c     2004-05-03 11:18:47.000000000 -0700
-+++ linux-2.4.19.SuSE/fs/ext3/inode.c  2004-05-03 11:50:22.000000000 -0700
-@@ -54,6 +54,18 @@
-               inode->i_blocks - ea_blocks == 0);
- }
-+/*
-+ * Test whether an inode is a fast symlink.
-+ */
-+static inline int ext3_inode_is_fast_symlink(struct inode *inode)
-+{
-+      int ea_blocks = inode->u.ext3_i.i_file_acl ?
-+              (inode->i_sb->s_blocksize >> 9) : 0;
-+
-+      return (S_ISLNK(inode->i_mode) &&
-+              inode->i_blocks - ea_blocks == 0);
-+}
-+
- /* The ext3 forget function must perform a revoke if we are freeing data
-  * which has been journaled.  Metadata (eg. indirect blocks) must be
-  * revoked in all cases. 
-@@ -1968,6 +1980,8 @@
-               return;
-       if (ext3_inode_is_fast_symlink(inode))
-               return;
-+      if (ext3_inode_is_fast_symlink(inode))
-+              return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-               return;
-Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ext3-exports.c      2004-02-18 07:26:44.000000000 -0800
-+++ linux-2.4.19.SuSE/fs/ext3/ext3-exports.c   2004-05-03 11:50:22.000000000 -0700
-@@ -0,0 +1,13 @@
-+#include <linux/config.h>
-+#include <linux/module.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
-+
-+EXPORT_SYMBOL(ext3_force_commit);
-+EXPORT_SYMBOL(ext3_bread);
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
-Index: linux-2.4.19.SuSE/include/linux/errno.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/errno.h       2004-05-03 11:20:21.000000000 -0700
-+++ linux-2.4.19.SuSE/include/linux/errno.h    2004-05-03 11:50:22.000000000 -0700
-@@ -30,4 +30,8 @@
- #endif
-+/* Defined for extended attributes */
-+#define ENOATTR ENODATA               /* No such attribute */
-+#define ENOTSUP EOPNOTSUPP    /* Operation not supported */
-+
- #endif
-Index: linux-2.4.19.SuSE/kernel/ksyms.c
-===================================================================
---- linux-2.4.19.SuSE.orig/kernel/ksyms.c      2004-05-03 11:22:48.000000000 -0700
-+++ linux-2.4.19.SuSE/kernel/ksyms.c   2004-05-03 11:50:22.000000000 -0700
-@@ -12,6 +12,7 @@
- #define __KERNEL_SYSCALLS__
- #include <linux/config.h>
- #include <linux/slab.h>
-+#include <linux/cache_def.h>
- #include <linux/module.h>
- #include <linux/blkdev.h>
- #include <linux/cdrom.h>
-Index: linux-2.4.19.SuSE/mm/vmscan.c
-===================================================================
---- linux-2.4.19.SuSE.orig/mm/vmscan.c 2004-05-03 11:18:53.000000000 -0700
-+++ linux-2.4.19.SuSE/mm/vmscan.c      2004-05-03 11:50:22.000000000 -0700
-@@ -32,6 +32,39 @@
-  */
- int vm_passes = 60;
-+static DECLARE_MUTEX(other_caches_sem);
-+static LIST_HEAD(cache_definitions);
-+
-+void register_cache(struct cache_definition *cache)
-+{
-+      down(&other_caches_sem);
-+      list_add(&cache->link, &cache_definitions);
-+      up(&other_caches_sem);
-+}
-+
-+void unregister_cache(struct cache_definition *cache)
-+{
-+      down(&other_caches_sem);
-+      list_del(&cache->link);
-+      up(&other_caches_sem);
-+}
-+
-+static void shrink_other_caches(unsigned int priority, int gfp_mask)
-+{
-+      struct list_head *p;
-+
-+      if (down_trylock(&other_caches_sem))
-+              return;
-+
-+      list_for_each_prev(p, &cache_definitions) {
-+              struct cache_definition *cache =
-+                      list_entry(p, struct cache_definition, link);
-+
-+              cache->shrink(priority, gfp_mask);
-+      }
-+      up(&other_caches_sem);
-+}
-+
- /*
-  * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan
-  * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll
diff --git a/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch b/lustre/kernel_patches/patches/linux-2.4.19-xattr-0.8.54-suse.patch
deleted file mode 100644 (file)
index 26d3af9..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
- ext2/super.c        |    3 +--
- ext3/ext3-exports.c |   13 +++++++++++++
- 2 files changed, 14 insertions(+), 2 deletions(-)
-
-Index: linux-2.4.19.SuSE/fs/ext2/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext2/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext2/super.c  Sun Nov 16 00:40:59 2003
-@@ -70,6 +70,7 @@
- {
-       va_list args;
-+      ext2_xattr_put_super(sb);
-       if (!(sb->s_flags & MS_RDONLY)) {
-               sb->u.ext2_sb.s_mount_state |= EXT2_ERROR_FS;
-               sb->u.ext2_sb.s_es->s_state =
-Index: linux-2.4.19.SuSE/fs/ext3/super.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/super.c     Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/ext3/super.c  Sun Nov 16 00:40:59 2003
-@@ -1822,8 +1828,6 @@
-       exit_ext3_xattr();
- }
--EXPORT_SYMBOL(ext3_force_commit);
--EXPORT_SYMBOL(ext3_bread);
- MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
- MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
-Index: linux-2.4.19.SuSE/fs/ext3/ext3-exports.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/ext3/ext3-exports.c      Sun Nov 16 00:40:58 2003
-+++ linux-2.4.19.SuSE/fs/ext3/ext3-exports.c   Sun Nov 16 00:40:59 2003
-@@ -0,0 +1,13 @@
-+#include <linux/config.h>
-+#include <linux/module.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/ext3_xattr.h>
-+
-+EXPORT_SYMBOL(ext3_force_commit);
-+EXPORT_SYMBOL(ext3_bread);
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
diff --git a/lustre/kernel_patches/patches/listman-2.4.19-bgl.patch b/lustre/kernel_patches/patches/listman-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 19ad959..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-Index: linux-2.4.18-chaos/include/linux/list.h
-===================================================================
---- linux-2.4.18-chaos.orig/include/linux/list.h       2003-11-23 00:07:05.000000000 +0300
-+++ linux-2.4.18-chaos/include/linux/list.h    2003-12-11 00:25:15.000000000 +0300
-@@ -173,6 +173,67 @@
-       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
-               pos = pos->prev, prefetch(pos->prev))
-               
-+/**
-+ * list_for_each_entry        -       iterate over list of given type
-+ * @pos:      the type * to use as a loop counter.
-+ * @head:     the head for your list.
-+ * @member:   the name of the list_struct within the struct.
-+ */
-+#define list_for_each_entry(pos, head, member)                                \
-+      for (pos = list_entry((head)->next, typeof(*pos), member),      \
-+                   prefetch(pos->member.next);                        \
-+           &pos->member != (head);                                    \
-+           pos = list_entry(pos->member.next, typeof(*pos), member),  \
-+                   prefetch(pos->member.next))
-+
-+#ifndef list_for_each_entry_safe
-+/**
-+ * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
-+ * @pos:        the type * to use as a loop counter.
-+ * @n:          another type * to use as temporary storage
-+ * @head:       the head for your list.
-+ * @member:     the name of the list_struct within the struct.
-+ */
-+#define list_for_each_entry_safe(pos, n, head, member)                        \
-+        for (pos = list_entry((head)->next, typeof(*pos), member),    \
-+              n = list_entry(pos->member.next, typeof(*pos), member); \
-+           &pos->member != (head);                                    \
-+           pos = n, n = list_entry(n->member.next, typeof(*n), member))
-+#endif
-+
-+/**
-+ * list_move - delete from one list and add as another's head
-+ * @list: the entry to move
-+ * @head: the head that will precede our entry
-+ */
-+static inline void list_move(struct list_head *list, struct list_head *head)
-+{
-+      __list_del(list->prev, list->next);
-+      list_add(list, head);
-+}
-+
-+/**
-+ * list_move_tail - delete from one list and add as another's tail
-+ * @list: the entry to move
-+ * @head: the head that will follow our entry
-+ */
-+static inline void list_move_tail(struct list_head *list,
-+                                struct list_head *head)
-+{
-+      __list_del(list->prev, list->next);
-+      list_add_tail(list, head);
-+}
-+
-+/* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
-+ * as 2.5 and let macros drop back.. */
-+#define hlist_entry                     list_entry
-+#define hlist_head                      list_head
-+#define hlist_node                      list_head
-+#define HLIST_HEAD                      LIST_HEAD
-+#define INIT_HLIST_HEAD                 INIT_LIST_HEAD
-+#define hlist_del_init                  list_del_init
-+#define hlist_add_head                  list_add
-+#define hlist_for_each_safe             list_for_each_safe
- #endif /* __KERNEL__ || _LVM_H_INCLUDE */
diff --git a/lustre/kernel_patches/patches/mcore-2.4.20-8.patch b/lustre/kernel_patches/patches/mcore-2.4.20-8.patch
deleted file mode 100644 (file)
index c8b80eb..0000000
+++ /dev/null
@@ -1,2738 +0,0 @@
-? linux/.config
-? linux/include/linux/autoconf.h
-? linux/include/linux/modules
-Index: linux/Makefile
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/Makefile,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/Makefile     12 Mar 2003 19:48:52 -0000      1.3.2.1
-+++ linux/Makefile     1 Apr 2003 12:17:40 -0000       1.3.2.1.2.1
-@@ -99,6 +99,10 @@
- CFLAGS += -fomit-frame-pointer
- endif
- AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS)
-+ifeq ($(CONFIG_MCL_COREDUMP),y)
-+      CFLAGS += -g
-+endif
-+
- #
- # ROOT_DEV specifies the default root-device when making the image.
-Index: linux/Documentation/Configure.help
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/Documentation/Configure.help,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/Documentation/Configure.help 12 Mar 2003 19:48:52 -0000      1.3.2.1
-+++ linux/Documentation/Configure.help 1 Apr 2003 12:17:40 -0000       1.3.2.1.2.1
-@@ -21660,6 +21660,35 @@
-   This option allows you to run the kernel with data cache disabled.
-   Say Y if you experience CPM lock-ups.
-+Boot kernel image support
-+CONFIG_BOOTIMG
-+  Add support for booting a new Linux kernel from a running Linux
-+  system. You need to download the bootimg(8) utility from
-+  ftp://icaftp.epfl.ch/pub/people/almesber/misc/bootimg-current.tar.gz
-+  in order to use this functionality.
-+
-+Protect SMP configuration tables
-+CONFIG_BOOTIMG_SMP
-+  On SMP systems, the BIOS stores tables with configuration data in
-+  memory and an SMP-enabled kernel reads these tables. However, a
-+  kernel without SMP support will overwrite such tables. If a kernel
-+  without SMP support used bootimg to boot an SMP-enabled kernel, the
-+  latter will probably crash when trying to read the SMP tables. The
-+  CONFIG_BOOTIMG_SMP option enables minimal support for scanning and
-+  protecting of SMP configuration tables also for kernels without SMP
-+  support.
-+
-+In-memory kernel core dump facility
-+CONFIG_MCL_COREDUMP
-+  In conjunction with bootimg, this allows you to get kernel core dumps
-+  of your system at panic() time.  The panic call is modified so that it
-+  calls the core dump facility and reboots the system.  On the way back 
-+  up, the kernel dump image is written out to disk by the accompanying 
-+  init script.  You can use the crash analysis tool to analyze the core 
-+  dump.  This tool can be found at :
-+
-+       http://www.missioncriticallinux.com/download
-+
- #
- # m68k-specific kernel options
- # Documented by Chris Lawrence <mailto:quango@themall.net> et al.
-Index: linux/arch/i386/config.in
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/config.in,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.2
-diff -u -r1.3.2.1 -r1.3.2.1.2.2
---- linux/arch/i386/config.in  12 Mar 2003 19:49:05 -0000      1.3.2.1
-+++ linux/arch/i386/config.in  1 Apr 2003 19:35:12 -0000       1.3.2.1.2.2
-@@ -502,6 +502,12 @@
-    bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
-    bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
-    bool '  Compile the kernel with frame pointers' CONFIG_FRAME_POINTER
-+   if [ "$CONFIG_FRAME_POINTER " != "n" ]; then
-+      bool '  Kernel Core Dump Facility' CONFIG_MCL_COREDUMP
-+      if [ "$CONFIG_MCL_COREDUMP" = "y" ]; then
-+         bool '  Reboot using bootimg' CONFIG_BOOTIMG
-+      fi
-+   fi
- fi
- endmenu
-Index: linux/arch/i386/vmlinux.lds
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/vmlinux.lds,v
-retrieving revision 1.1.1.1.4.1
-retrieving revision 1.1.1.1.4.1.2.1
-diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1
---- linux/arch/i386/vmlinux.lds        12 Mar 2003 19:49:05 -0000      1.1.1.1.4.1
-+++ linux/arch/i386/vmlinux.lds        1 Apr 2003 12:17:40 -0000       1.1.1.1.4.1.2.1
-@@ -19,6 +19,13 @@
-   .rodata : { *(.rodata) *(.rodata.*) }
-   .kstrtab : { *(.kstrtab) }
-+  . = ALIGN(16);              /* Relocatable bootimage code */
-+  __bootimg_start = .;
-+  .bootimg : {
-+      *(.bootimg)
-+      }
-+  __bootimg_end = .;
-+
-   . = ALIGN(16);              /* Exception table */
-   __start___ex_table = .;
-   __ex_table : { *(__ex_table) }
-Index: linux/arch/i386/boot/setup.S
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/boot/setup.S,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.1
-diff -u -r1.2.2.1 -r1.2.2.1.2.1
---- linux/arch/i386/boot/setup.S       12 Mar 2003 19:49:05 -0000      1.2.2.1
-+++ linux/arch/i386/boot/setup.S       1 Apr 2003 12:17:40 -0000       1.2.2.1.2.1
-@@ -105,16 +105,22 @@
- # flags, unused bits must be zero (RFU) bit within loadflags
- loadflags:
- LOADED_HIGH   = 1                     # If set, the kernel is loaded high
-+RELOADS_GDT   = 2                     # if set, kernel reloads GDT, such that
-+                                      # boot loader does not have to provide
-+                                      # GDT in a "safe" memory location
- CAN_USE_HEAP  = 0x80                  # If set, the loader also has set
-                                       # heap_end_ptr to tell how much
-                                       # space behind setup.S can be used for
-                                       # heap purposes.
-                                       # Only the loader knows what is free
--#ifndef __BIG_KERNEL__
--              .byte   0
--#else
--              .byte   LOADED_HIGH
-+_FLAGS = 0
-+#ifdef __BIG_KERNEL__
-+              _FLAGS = _FLAGS | LOADED_HIGH
- #endif
-+#ifdef CONFIG_BOOTIMG
-+              _FLAGS = _FLAGS | RELOADS_GDT
-+#endif
-+              .byte _FLAGS
- setup_move_size: .word  0x8000                # size to move, when setup is not
-                                       # loaded at 0x90000. We will move setup 
-Index: linux/arch/i386/kernel/Makefile
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/Makefile,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.1
-diff -u -r1.2.2.1 -r1.2.2.1.2.1
---- linux/arch/i386/kernel/Makefile    12 Mar 2003 19:49:05 -0000      1.2.2.1
-+++ linux/arch/i386/kernel/Makefile    1 Apr 2003 12:17:40 -0000       1.2.2.1.2.1
-@@ -49,6 +49,7 @@
- obj-$(CONFIG_X86_LONGRUN)     += longrun.o
- obj-$(CONFIG_ELAN_CPUFREQ)    += elanfreq.o
- obj-$(CONFIG_PROFILING)               += profile.o
-+obj-$(CONFIG_MCL_COREDUMP)    += crash.o
- include $(TOPDIR)/Rules.make
-Index: linux/arch/i386/kernel/crash.c
-===================================================================
-RCS file: linux/arch/i386/kernel/crash.c
-diff -N linux/arch/i386/kernel/crash.c
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/arch/i386/kernel/crash.c     1 Apr 2003 12:17:40 -0000       1.1.6.1
-@@ -0,0 +1,82 @@
-+/*
-+ *  linux/arch/i386/crash.c
-+ *
-+ *  Architecture dependant code for MCL in-memory core dump.
-+ */
-+#include <linux/sched.h>
-+#include <linux/types.h>
-+#include <linux/smp.h>
-+#include <linux/crash.h>
-+#include <linux/reboot.h>
-+#include <linux/bootimg.h>
-+
-+inline void crash_save_regs(void) {
-+      static unsigned long regs[8];
-+
-+      __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs[0]));
-+      __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs[1]));
-+      __asm__ __volatile__("movl %%edx,%0" : "=m"(regs[2]));
-+      __asm__ __volatile__("movl %%esi,%0" : "=m"(regs[3]));
-+      __asm__ __volatile__("movl %%edi,%0" : "=m"(regs[4]));
-+      __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs[5]));
-+      __asm__ __volatile__("movl %%eax,%0" : "=m"(regs[6]));
-+      __asm__ __volatile__("movl %%esp,%0" : "=m"(regs[7]));
-+
-+      panic_regs = regs;
-+}
-+
-+/*
-+ *  Save the current stack pointer and EIP.
-+ */
-+void crash_save_current_state(struct task_struct *tp)
-+{
-+      /*
-+       *  Here we save ebp instead of esp just in case the compiler
-+       *  decides to put an extra push in before we execute this
-+       *  instruction (thus invalidating our frame pointer).
-+       */
-+      asm volatile("movl %%ebp,%0":"=m" (*(u_long *)&tp->thread.esp));
-+      tp->thread.eip = (u_long)crash_save_current_state;
-+      panic_ksp[smp_processor_id()] = tp->thread.esp;
-+      mb();
-+
-+      save_core();
-+
-+      crash_halt_or_reboot(1);
-+}
-+
-+/*
-+ *  If we are not the panicking thread, we simply halt.  Otherwise,
-+ *  we take care of calling the reboot code.
-+ */
-+void crash_halt_or_reboot(int boot_cpu)
-+{
-+#ifdef CONFIG_SMP
-+      if (!boot_cpu) {
-+              stop_this_cpu(NULL);
-+              /* NOTREACHED */
-+      }
-+#endif
-+      machine_restart(NULL);
-+}
-+
-+void crash_cleanup_smp_state(void)
-+{
-+      /*
-+       *  Here we duplicate smp_send_stop.  Crash_halt_or_reboot() calls
-+       *  stop_this_cpu.  We now know that we are the only one running, 
-+       *  so we finish off the smp_send_stop function.
-+       */
-+      __cli();
-+#ifdef CONFIG_SMP
-+      disable_local_APIC();
-+#endif
-+}
-+
-+/*
-+ *  Core dump IPI
-+ */
-+void smp_crash_funnel_cpu(void)
-+{
-+      crash_save_current_state(current);
-+}
-Index: linux/arch/i386/kernel/nmi.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/nmi.c,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.1
-diff -u -r1.2.2.1 -r1.2.2.1.2.1
---- linux/arch/i386/kernel/nmi.c       12 Mar 2003 19:49:06 -0000      1.2.2.1
-+++ linux/arch/i386/kernel/nmi.c       1 Apr 2003 12:17:40 -0000       1.2.2.1.2.1
-@@ -374,11 +374,18 @@
-                       bust_spinlocks(1);
-                       printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip);
-                       show_registers(regs);
-+#ifdef CONFIG_MCL_COREDUMP
-+                      spin_unlock(&nmi_print_lock);
-+                      bust_spinlocks(0);
-+                      panic("die");
-+                      /* NOTREACHED */
-+#else
-                       printk("console shuts up ...\n");
-                       console_silent();
-                       spin_unlock(&nmi_print_lock);
-                       bust_spinlocks(0);
-                       do_exit(SIGSEGV);
-+#endif
-               }
-       } else {
-               last_irq_sums[cpu] = sum;
-Index: linux/arch/i386/kernel/process.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/process.c,v
-retrieving revision 1.2.2.2
-retrieving revision 1.2.2.2.2.1
-diff -u -r1.2.2.2 -r1.2.2.2.2.1
---- linux/arch/i386/kernel/process.c   1 Apr 2003 02:11:17 -0000       1.2.2.2
-+++ linux/arch/i386/kernel/process.c   1 Apr 2003 12:17:40 -0000       1.2.2.2.2.1
-@@ -50,6 +50,9 @@
- #ifdef CONFIG_MATH_EMULATION
- #include <asm/math_emu.h>
- #endif
-+#ifdef CONFIG_BOOTIMG
-+#include <linux/bootimg.h>
-+#endif
- #include <linux/irq.h>
-@@ -377,7 +380,21 @@
- void machine_restart(char * __unused)
- {
-+#ifdef CONFIG_MCL_COREDUMP
-+      extern char *panicmsg;
-+      /*
-+       *  Only call bootimg if we have a valid descriptor and
-+       *  we are in a panic() context.
-+       */
-+      if (panicmsg)
-+#endif
-+#ifdef CONFIG_BOOTIMG
-+              if (bootimg_dsc.page_dir)
-+                      boot_image();
-+#endif
-+
- #if CONFIG_SMP
-+{
-       int cpuid;
-       
-       cpuid = GET_APIC_ID(apic_read(APIC_ID));
-@@ -413,6 +430,7 @@
-       if (!netdump_func)
-               smp_send_stop();
-       disable_IO_APIC();
-+}
- #endif
-       if(!reboot_thru_bios) {
-Index: linux/arch/i386/kernel/setup.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/setup.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.2
-diff -u -r1.3.2.1 -r1.3.2.1.2.2
---- linux/arch/i386/kernel/setup.c     12 Mar 2003 19:49:06 -0000      1.3.2.1
-+++ linux/arch/i386/kernel/setup.c     1 Apr 2003 17:55:35 -0000       1.3.2.1.2.2
-@@ -116,6 +116,9 @@
- #include <asm/mpspec.h>
- #include <asm/mmu_context.h>
- #include <asm/edd.h>
-+#ifdef CONFIG_MCL_COREDUMP
-+#include <linux/crash.h>
-+#endif
- /*
-  * Machine setup..
-  */
-@@ -973,6 +976,7 @@
- static unsigned long __init setup_memory(void)
- {
-       unsigned long bootmap_size, start_pfn, max_low_pfn;
-+      unsigned long bootmap_pages = 0UL, crash_pages = 0UL;
-       /*
-        * partially used pages are not usable - thus
-@@ -992,6 +996,21 @@
-       printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
-               pages_to_mb(highend_pfn - highstart_pfn));
- #endif
-+
-+#ifdef CONFIG_MCL_COREDUMP
-+      bootmap_pages = bootmem_bootmap_pages(max_low_pfn);
-+      crash_pages = crash_pages_needed();
-+
-+      printk("start_pfn: %d, bootmap_pages: %d\n", start_pfn, bootmap_pages);
-+
-+      crash_init((u_long)phys_to_virt(PFN_PHYS(start_pfn)),
-+                 (u_long)phys_to_virt(PFN_PHYS(LOW_OFFSET + start_pfn)),
-+                 (u_long)phys_to_virt(PFN_PHYS(LOW_OFFSET + start_pfn +
-+                                               crash_pages)));
-+
-+      printk("new start_pfn: %08lx\n", PFN_PHYS(start_pfn));
-+      printk("crash map starts at %lx\n",(start_pfn+bootmap_pages)*PAGE_SIZE);
-+#endif
-       printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
-                       pages_to_mb(max_low_pfn));
-       /*
-@@ -1007,8 +1026,8 @@
-        * the (very unlikely) case of us accidentally initializing the
-        * bootmem allocator with an invalid RAM area.
-        */
--      reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
--                       bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
-+      reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + bootmap_size + 
-+                    ((1+crash_pages)*PAGE_SIZE) + PAGE_SIZE-1) - (HIGH_MEMORY));
-       /*
-        * reserve physical page 0 - it's a special BIOS page on many boxes,
-@@ -1016,6 +1035,16 @@
-        */
-       reserve_bootmem(0, PAGE_SIZE);
-+#ifdef CONFIG_BOOTIMG
-+      /*
-+       * bootimg(8) reads the old parameter block. Note that the copy in
-+       * empty_zero_page will vanish when mem_init runs. (Should we
-+       * memcpy(phys_to_virt(0x90000), PARAM, PAGE_SIZE);
-+       * now ?)
-+       */
-+      reserve_bootmem(0x90000, PAGE_SIZE);
-+#endif
-+
- #ifdef CONFIG_SMP
-       /*
-        * But first pinch a few for the stack/trampoline stuff
-@@ -1032,6 +1061,7 @@
-       find_smp_config();
- #endif
- #ifdef CONFIG_BLK_DEV_INITRD
-+      printk("caution: initrd may overwrite dump\n"); /* phro */
-       if (LOADER_TYPE && INITRD_START) {
-               if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
-                       reserve_bootmem(INITRD_START, INITRD_SIZE);
-@@ -1172,6 +1202,12 @@
-       smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
- #endif
-       paging_init();
-+#ifdef CONFIG_MCL_COREDUMP
-+      /*
-+       * Reserve crash pages
-+       */
-+      crash_mark_dump_reserved();
-+#endif
- #ifdef CONFIG_X86_LOCAL_APIC
-       /*
-        * get boot-time SMP configuration:
-Index: linux/arch/i386/kernel/smp.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/smp.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/arch/i386/kernel/smp.c       12 Mar 2003 19:49:06 -0000      1.3.2.1
-+++ linux/arch/i386/kernel/smp.c       1 Apr 2003 12:17:40 -0000       1.3.2.1.2.1
-@@ -23,6 +23,9 @@
- #include <asm/pgalloc.h>
- #include <asm/smpboot.h>
-+#ifdef CONFIG_MCL_COREDUMP
-+#include <asm/crash.h>
-+#endif
- /*
-  *    Some notes on x86 processor bugs affecting SMP operation:
-  *
-@@ -579,7 +582,7 @@
-       return 0;
- }
--static void stop_this_cpu (void * dummy)
-+void stop_this_cpu (void * dummy)
- {
-       /*
-        * Remove this CPU:
-Index: linux/arch/i386/kernel/traps.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/traps.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/arch/i386/kernel/traps.c     12 Mar 2003 19:49:06 -0000      1.3.2.1
-+++ linux/arch/i386/kernel/traps.c     1 Apr 2003 12:17:40 -0000       1.3.2.1.2.1
-@@ -52,6 +52,10 @@
- #include <linux/irq.h>
- #include <linux/module.h>
-+#ifdef CONFIG_MCL_COREDUMP
-+#include <linux/crash.h>
-+#endif
-+
- asmlinkage int system_call(void);
- asmlinkage void lcall7(void);
- asmlinkage void lcall27(void);
-@@ -309,7 +313,11 @@
-               netdump_func(regs);
-       bust_spinlocks(0);
-       spin_unlock_irq(&die_lock);
--      do_exit(SIGSEGV);
-+#ifdef CONFIG_MCL_COREDUMP 
-+      if(panic_on_oops)
-+              panic("die");
-+#endif
-+      do_exit(SIGSEGV);/* NOTREACHED */
- }
- static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
-Index: linux/drivers/char/misc.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/drivers/char/misc.c,v
-retrieving revision 1.2
-retrieving revision 1.2.4.1
-diff -u -r1.2 -r1.2.4.1
---- linux/drivers/char/misc.c  25 Sep 2002 17:11:05 -0000      1.2
-+++ linux/drivers/char/misc.c  1 Apr 2003 12:17:41 -0000       1.2.4.1
-@@ -78,6 +78,8 @@
- extern int i8k_init(void);
- extern int lcd_init(void);
-+extern int crash_init_chrdev(void);
-+
- static int misc_read_proc(char *buf, char **start, off_t offset,
-                         int len, int *eof, void *private)
- {
-@@ -255,6 +257,9 @@
- int __init misc_init(void)
- {
-       create_proc_read_entry("misc", 0, 0, misc_read_proc, NULL);
-+#ifdef CONFIG_MCL_COREDUMP
-+      crash_init_chrdev();
-+#endif
- #ifdef CONFIG_MVME16x
-       rtc_MK48T08_init();
- #endif
-Index: linux/drivers/char/sysrq.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/drivers/char/sysrq.c,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.2
-diff -u -r1.2.2.1 -r1.2.2.1.2.2
---- linux/drivers/char/sysrq.c 12 Mar 2003 19:49:47 -0000      1.2.2.1
-+++ linux/drivers/char/sysrq.c 1 Apr 2003 17:55:35 -0000       1.2.2.1.2.2
-@@ -97,7 +97,18 @@
-       action_msg:     "Resetting",
- };
--
-+#ifdef CONFIG_MCL_COREDUMP
-+/* kernel core dump sysrq */
-+static void sysrq_handle_coredump(int key, struct pt_regs *pt_regs,
-+              struct kbd_struct *kbd, struct tty_struct *ttty) {
-+      panic("sysrq");
-+}
-+static struct sysrq_key_op sysrq_coredump_op = {
-+      handler:        sysrq_handle_coredump,
-+      help_msg:       "Crash",
-+      action_msg:     "Dumping core",
-+};
-+#endif
- /* SYNC SYSRQ HANDLERS BLOCK */
-@@ -334,7 +345,11 @@
-                it is handled specially on the spark
-                and will never arive */
- /* b */       &sysrq_reboot_op,
-+#ifdef CONFIG_MCL_COREDUMP
-+/* c */       &sysrq_coredump_op,
-+#else
- /* c */       NULL,
-+#endif
- /* d */       NULL,
- /* e */       &sysrq_term_op,
- /* f */       NULL,
-Index: linux/include/asm-i386/bootimg.h
-===================================================================
-RCS file: linux/include/asm-i386/bootimg.h
-diff -N linux/include/asm-i386/bootimg.h
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/include/asm-i386/bootimg.h   1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,141 @@
-+/* asm-i386/bootimg.h - Boot image, i386-specific code */
-+
-+/* Written 2000 by Werner Almesberger */
-+
-+/*
-+ * When porting bootimg(2) to a new architcture, you need to adapt the
-+ * functions and definitions in this file.
-+ */
-+
-+
-+#ifndef _ASM_I386_BOOTIMG_H
-+#define _ASM_I386_BOOTIMG_H
-+
-+#include <linux/config.h>
-+#include <asm/system.h>
-+
-+#ifdef CONFIG_SMP
-+#include <linux/smp.h>
-+#include <linux/irq.h>
-+#endif
-+
-+
-+/*
-+ * The memory page with the code currently executing has been copied from
-+ * old_page to new_page. Jump there.
-+ *
-+ * Note: flush_icache_range has already been called on the new page.
-+ */
-+
-+static inline void jump_relocated(unsigned long old_page,unsigned long new_page)
-+{
-+      int tmp;
-+
-+      __asm__ __volatile__(
-+      "stc\n\t"
-+      "call 1f\n"
-+      "1:\tjnc 2f\n\t"
-+      "popl %0\n\t"
-+      "addl %1,%0\n\t"
-+      "addl %1,%%esp\n\t"
-+      "clc\n\t"
-+      "jmp *%0\n"
-+      "2:"
-+      : "=&r" (tmp) : "r" (new_page-old_page));
-+}
-+
-+
-+/*
-+ * Stop paging, such that
-+ *  - page tables can be overwritten
-+ *  - all physical memory can be accessed
-+ *  - all physical memory is identity-mapped
-+ *
-+ * (Other rules are possible, but need to be encoded in bootimg(8).)
-+ */
-+
-+static inline void stop_paging(void)
-+{
-+      unsigned long msw;
-+
-+      __asm__ __volatile__(
-+      "movl %%cr0,%0\n\t"
-+      "andl $0x7fffffff,%0\n\t"
-+      "movl %0,%%cr0\n\t"
-+      "jmp 1f\n\t"    /* i486 and such */
-+      "1:"
-+
-+/* Clear the PAE bit in register %cr4 if we were in PAE mode.  The initial
-+ * page table set up by the new kernel's bootstrap code is non-PAE regardless
-+ * of whether the new kernel is a PAE kernel.  By clearing the PAE bit here,
-+ * we make sure the bootstrap code doesn't accidentally enable PAE mode when
-+ * it turns on address translation.
-+ */
-+#ifdef CONFIG_X86_PAE
-+      "movl %%cr4,%0\n\t"
-+      "andl $0xffffffdf,%0\n\t"
-+      "movl %0,%%cr4\n\t"
-+#endif
-+
-+      : "=&r" (msw) : : "memory");
-+}
-+
-+
-+/*
-+ * Stop any remaining concurrency in the system. If become_only_thread fails
-+ * but the system is still usable, become_only_thread should return an error
-+ * code. If no recovery is possible, it may as well panic.
-+ */
-+
-+static inline int become_only_thread(void)
-+{
-+#ifdef CONFIG_SMP
-+      smp_send_stop();
-+      disable_IO_APIC();
-+#endif
-+      cli();
-+      return 0;
-+}
-+
-+
-+/*
-+ * A conservative estimate of the number of bytes relocate_and_jump allocated
-+ * on the stack. This is only used for sanity checking before running code,
-+ * because we can't recover from failure in relocate_and_jump.
-+ */
-+
-+#define RESERVE_MIN_RELOC_STACK       256
-+
-+
-+/*
-+ * Change the stack pointer such that stack is at the end of the specified
-+ * page. No data on the old stack will be accessed anymore, so no copying is
-+ * required.
-+ */
-+
-+static inline void stack_on_page(void *page)
-+{
-+      __asm__ __volatile__(
-+      "push %%ds\n\t"
-+      "pop %%ss\n\t"
-+      "movl %0,%%esp\n\t"
-+      "addl $0x1000,%%esp\n\t"
-+      : : "r" (page));
-+}
-+
-+/*
-+ * Set up things such that the kernel will be comfortable (e.g. some
-+ * architectures expect the boot loader to set registers in certain ways),
-+ * and then jump to the kernel's entry address.
-+ */
-+
-+static inline void jump_to_kernel(void (*kernel_entry)(void))
-+{
-+      __asm__ __volatile__(
-+      "mov $0x90000,%%esi\n\t"
-+      : : );
-+
-+      kernel_entry();
-+}
-+
-+#endif
-Index: linux/include/asm-i386/crash.h
-===================================================================
-RCS file: linux/include/asm-i386/crash.h
-diff -N linux/include/asm-i386/crash.h
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/include/asm-i386/crash.h     1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,15 @@
-+#ifndef __ASM_CRASH_H
-+#define __ASM_CRASH_H
-+
-+#define UPPER_MEM_BACKUP 0
-+#define LOWER_MEM_FORWARD 0
-+#define LOW_OFFSET 100
-+
-+/*
-+ *  These two functions are inlined on alpha.  That's why they appear
-+ *  in the arch dependent include file.
-+ */
-+void crash_save_current_state(struct task_struct *);
-+void crash_halt_or_reboot(int);
-+
-+#endif
-Index: linux/include/linux/bootimg.h
-===================================================================
-RCS file: linux/include/linux/bootimg.h
-diff -N linux/include/linux/bootimg.h
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/include/linux/bootimg.h      1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,84 @@
-+/* linux/bootimg.h - Boot image, general definitions */
-+
-+/* Written 2000 by Werner Almesberger */
-+
-+
-+#ifndef _LINUX_BOOTIMG_H
-+#define _LINUX_BOOTIMG_H
-+
-+
-+/*
-+ * Constraints on image_map:
-+ *  - each image_map[n] is the virtual address of a page-sized memory region
-+ *    readable by the user
-+ *  - currently, image_map[n] is not required to be page-aligned, but this may
-+ *    change in the future if we want to map pages directly to lower memory
-+ *    pressure (NB: mapping works for ELF and plain binary images, but usually
-+ *    not for (b)zImages, because the prepended boot and setup sectors
-+ *    mis-align them)
-+ *
-+ * Constraints on load_map:
-+ *  - each load_map[] is the physical address of a page in RAM
-+ */
-+
-+struct boot_image {
-+      void **image_map;       /* pointers to image pages in user memory */
-+      int pages;              /* length in pages */
-+      unsigned long *load_map;/* list of destination pages (physical addr) */
-+      unsigned long start;    /* jump to this physical address */
-+      int flags;              /* for future use, must be zero for now */
-+};
-+
-+
-+#ifdef __KERNEL__
-+
-+#define __bootimg __attribute__ ((__section__ (".bootimg")))
-+
-+
-+struct bootimg_dsc {
-+      unsigned long self;             /* code page            ALL ADDRESSES */
-+      unsigned long scratch;          /* scratch page         ARE PHYSICAL !*/
-+      unsigned long **page_dir;       /* src & dst page tables              */
-+      void (*jump_to)(void);          /* start address                      */
-+      int pages;                      /* number of pages */
-+    unsigned long csum; /* Kernel Image checksum */
-+};
-+
-+/*
-+ * page_dir contains pointers to pages containing pointers to pages. We call
-+ * page_dir a "directory" and the page page_dir[n] points to a "table". The
-+ * first PAGES_PER_TABLE/2 entries of page_dir are for source pages, and other
-+ * half are for destination pages.
-+ */
-+
-+/*
-+ * Note that the definitions used here do not necessarily correspond to the
-+ * architecture-specific PTRS_PER_PTE, __pte_offset, etc.
-+ */
-+ 
-+#define PAGES_PER_TABLE       (PAGE_SIZE/sizeof(void *))
-+#define FROM_TABLE(i) ((i)/PAGES_PER_TABLE)
-+#define TO_TABLE(i)   ((i)/PAGES_PER_TABLE+PAGES_PER_TABLE/2)
-+#define PAGE_NR(i)    ((i) % PAGES_PER_TABLE)
-+
-+
-+extern char __bootimg_start,__bootimg_end;    /* linker segment boundaries */
-+extern unsigned long *unity_page; /* unity-mapped page for i386 */
-+
-+/*
-+ * relocate_and_jump runs in its own page with its own stack. This makes it
-+ * difficult to pass parameters. The solution chosen here is to use the global
-+ * variable bootimg_dsc, which is copied into an "auto" variable by
-+ * relocate_and_jump before any copying or relocation takes place.
-+ */
-+
-+extern struct bootimg_dsc bootimg_dsc;
-+
-+typedef void (*relocate_and_jump_t)(void);
-+
-+void relocate_and_jump(void);
-+int  boot_image(void);
-+
-+#endif /* __KERNEL__ */
-+
-+#endif
-Index: linux/include/linux/crash.h
-===================================================================
-RCS file: linux/include/linux/crash.h
-diff -N linux/include/linux/crash.h
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/include/linux/crash.h        1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,119 @@
-+#ifndef __LINUX_CRASH_H
-+#define __LINUX_CRASH_H
-+
-+/* defines for interfacing with user-space (ioctls, etc) */
-+struct ioctl_getdump {
-+      unsigned long kva;
-+      unsigned long buf;
-+};
-+
-+#define CRASH_IOC_MAGIC 'C'
-+
-+#define CRASH_IOCFREEDUMP _IO(CRASH_IOC_MAGIC, 0)
-+#define CRASH_IOCGETDUMP _IOWR(CRASH_IOC_MAGIC, 1, struct ioctl_getdump)
-+#define CRASH_IOCBOOTIMG _IOWR(CRASH_IOC_MAGIC, 2, struct boot_image)
-+#define CRASH_IOCVERSION _IO(CRASH_IOC_MAGIC, 3)
-+
-+/* kernel-only part of crash.h */
-+#ifdef __KERNEL__
-+#include <asm/crash.h>
-+
-+#define CRASH_K_MINOR (1)
-+#define CRASH_K_MAJOR (0)
-+
-+/*
-+ * Crash prototypes.
-+ */
-+void save_core(void);
-+void crash_mark_dump_reserved(void);
-+void crash_init(u_long bootmap_va, u_long crash_va, u_long end_alloc_va);
-+u_long crash_pages_needed(void);
-+void smp_crash_funnel_cpu(void);
-+void crash_cleanup_smp_state(void);
-+
-+/*
-+ *  Arch dependant crash.c funcs
-+ */
-+void crash_save_current_state(struct task_struct *);
-+void crash_halt_or_reboot(int);
-+inline void crash_save_regs(void);
-+
-+/*
-+ * Crash globals
-+ */
-+extern u_long crash_dump_header;
-+extern volatile u_long panic_ksp[];
-+extern volatile int crash_release;
-+extern int panic_on_oops;
-+extern char *panicmsg;
-+extern int panic_processor;
-+extern int crash_perform_sync;
-+extern unsigned long *panic_regs;
-+
-+/*
-+ * symbols not exported by linux header files
-+ */
-+extern void stop_this_cpu(void *);
-+
-+/*  struct crash_map_hdr located at byte offset 0 */
-+/* on-disk formats */
-+
-+#define trunc_page(x)   ((void *)(((unsigned long)(x)) & ~((unsigned long)(PAGE_SIZE - 1))))
-+#define round_page(x)   trunc_page(((unsigned long)(x)) + ((unsigned long)(PAGE_SIZE - 1)))
-+
-+#define CRASH_MAGIC 0x9a8bccdd
-+#define CRASH_SOURCE_PAGES 128
-+#define CRASH_SUB_MAP_BYTES ((u_long)round_page((CRASH_SOURCE_PAGES+1)*sizeof(u_long)))
-+#define CRASH_SUB_MAP_PAGES (CRASH_SUB_MAP_BYTES / PAGE_SIZE)
-+#define CRASH_UNCOMPR_BUF_PAGES (CRASH_SOURCE_PAGES + CRASH_SUB_MAP_PAGES)
-+#define CRASH_COMPR_BUF_PAGES (CRASH_UNCOMPR_BUF_PAGES + (CRASH_UNCOMPR_BUF_PAGES/4))
-+#define CRASH_COMPESS_PRIME_PAGES (2*CRASH_COMPR_BUF_PAGES)
-+#define CRASH_ZALLOC_PAGES 16*5*2     /* 2 to handle crash in crash */
-+#define CRASH_LOW_WATER_PAGES 100
-+
-+#define CRASH_CPU_TIMEOUT 5000        /* 5 sec wait for other cpus to stop */
-+
-+#define CRASH_MARK_RESERVED(addr) (set_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags))
-+#define CRASH_CLEAR_RESERVED(addr) (clear_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags))
-+#define CRASH_MARK_BOOT_RESERVED(addr) reserve_bootmem(virt_to_phys((void *)addr), PAGE_SIZE);
-+
-+typedef int boolean_t;
-+
-+#define TRUE 1
-+#define FALSE 0
-+
-+/* mem structure */
-+struct mem_crash_map_hdr {
-+      long magic[4];          /* identify crash dump */
-+      u_long map;             /* location of map */
-+      u_long map_pages;
-+      u_long data_pages;
-+      u_long compr_units;
-+      u_long boot_reserved_start;
-+      u_long boot_reserved_end;
-+};
-+struct mem_crash_map_entry {
-+      u_long src_va;          /* source start of larger non-contig 
-+                               * block.  a src_va of -1 means that 
-+                               * the dest_page_va is the location of 
-+                               * the next map page */
-+      u_long dest_page_va;    /* dest of this sub block */
-+      u_long check_sum;       /* check_sum for dest data */
-+};
-+
-+/* file structure */
-+struct crash_map_hdr {
-+      long magic[4];          /* identify crash dump */
-+      int blk_size;           /* block size for this device */
-+      int map_block;          /* location of map */
-+      int map_blocks;         /* number of blocks for map */
-+};
-+struct crash_map_entry {
-+      u_long start_va;        /* virtual address */
-+      char *exp_data;         /* expanded data in memory */
-+      int start_blk;          /* device location */
-+      int num_blks;
-+};
-+
-+#endif /* __KERNEL__ */
-+#endif /* __LINUX_CRASH_H */
-Index: linux/include/linux/mm.h
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/include/linux/mm.h,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.2
-diff -u -r1.2.2.1 -r1.2.2.1.2.2
---- linux/include/linux/mm.h   12 Mar 2003 19:51:27 -0000      1.2.2.1
-+++ linux/include/linux/mm.h   1 Apr 2003 17:55:35 -0000       1.2.2.1.2.2
-@@ -331,6 +331,11 @@
- #define PG_lru                        18
- #define PG_active_cache               19
- #define PG_fs_1                       20      /* Filesystem specific */
-+#ifdef CONFIG_MCL_COREDUMP
-+#define PG_free                       21
-+#define PG_shm                        22
-+#define PG_anon                       23
-+#endif
- /* Make it prettier to test the above... */
- #define UnlockPage(page)      unlock_page(page)
-@@ -452,6 +457,11 @@
- #define PageSetSlab(page)     set_bit(PG_slab, &(page)->flags)
- #define PageClearSlab(page)   clear_bit(PG_slab, &(page)->flags)
- #define PageReserved(page)    test_bit(PG_reserved, &(page)->flags)
-+#ifdef CONFIG_MCL_COREDUMP
-+#define PageFree(page)          (test_bit(PG_free, &(page)->flags))
-+#define PageAnon(page)          (test_bit(PG_anon, &(page)->flags))
-+#define PageShm(page)           (test_bit(PG_shm, &(page)->flags))
-+#endif
- #define PageActiveAnon(page)          test_bit(PG_active_anon, &(page)->flags)
- #define SetPageActiveAnon(page)       set_bit(PG_active_anon, &(page)->flags)
-Index: linux/include/linux/reboot.h
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/include/linux/reboot.h,v
-retrieving revision 1.1.1.1
-retrieving revision 1.1.1.1.10.2
-diff -u -r1.1.1.1 -r1.1.1.1.10.2
---- linux/include/linux/reboot.h       7 May 2002 21:53:47 -0000       1.1.1.1
-+++ linux/include/linux/reboot.h       1 Apr 2003 17:55:35 -0000       1.1.1.1.10.2
-@@ -20,6 +20,7 @@
-  * CAD_OFF     Ctrl-Alt-Del sequence sends SIGINT to init task.
-  * POWER_OFF   Stop OS and remove all power from system, if possible.
-  * RESTART2    Restart system using given command string.
-+ * COREDUMP    We're taking a core dump, secondary cpus already stopped.
-  */
- #define       LINUX_REBOOT_CMD_RESTART        0x01234567
-@@ -28,7 +29,9 @@
- #define       LINUX_REBOOT_CMD_CAD_OFF        0x00000000
- #define       LINUX_REBOOT_CMD_POWER_OFF      0x4321FEDC
- #define       LINUX_REBOOT_CMD_RESTART2       0xA1B2C3D4
--
-+#ifdef CONFIG_MCL_COREDUMP
-+#define LINUX_REBOOT_CMD_COREDUMP     0x9A8BCCDD
-+#endif
- #ifdef __KERNEL__
-Index: linux/include/linux/sysctl.h
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/include/linux/sysctl.h,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/include/linux/sysctl.h       12 Mar 2003 19:51:30 -0000      1.3.2.1
-+++ linux/include/linux/sysctl.h       1 Apr 2003 12:17:41 -0000       1.3.2.1.2.1
-@@ -126,6 +126,7 @@
-       KERN_CADPID=54,         /* int: PID of the process to notify on CAD */
-       KERN_CORE_PATTERN=56,   /* string: pattern for core-files */
-       KERN_PID_MAX=55,        /* int: max PID value of processes */
-+      KERN_PANIC_ON_OOPS      /* int: panic on oops enabled */
- };
-Index: linux/init/main.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/init/main.c,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.1
-diff -u -r1.2.2.1 -r1.2.2.1.2.1
---- linux/init/main.c  12 Mar 2003 19:51:35 -0000      1.2.2.1
-+++ linux/init/main.c  1 Apr 2003 12:17:41 -0000       1.2.2.1.2.1
-@@ -70,6 +70,10 @@
- #include <asm/smp.h>
- #endif
-+#ifdef CONFIG_BOOTIMG
-+#include <linux/bootimg.h>
-+#endif
-+
- /*
-  * Versions of gcc older than that listed below may actually compile
-  * and link okay, but the end product can have subtle run time bugs.
-@@ -352,10 +356,14 @@
- {
-       char * command_line;
-       extern char saved_command_line[];
-+#if defined(CONFIG_BOOTIMG) && defined(CONFIG_X86_LOCAL_APIC)
-+      unsigned long value;
-+#endif
- /*
-  * Interrupts are still disabled. Do necessary setups, then
-  * enable them
-  */
-+      printk("start_kernel\n");
-       lock_kernel();
-       printk(linux_banner);
-       setup_arch(&command_line);
-@@ -373,12 +381,26 @@
-        * this. But we do want output early, in case something goes wrong.
-        */
-       console_init();
-+
-+#ifdef CONFIG_BOOTIMG
-+      unity_page = alloc_bootmem_pages(PAGE_SIZE);
-+      printk("unity_page addr: %p\n",unity_page);
-+#endif
- #ifdef CONFIG_MODULES
-       init_modules();
- #endif
-       profile_init();
-       kmem_cache_init();
-       sti();
-+#if defined(CONFIG_BOOTIMG) && defined(CONFIG_X86_LOCAL_APIC)
-+      /* If we don't make sure the APIC is enabled, AND the LVT0
-+       * register is programmed properly, we won't get timer interrupts
-+       */
-+      setup_local_APIC();
-+      
-+      value = apic_read(APIC_LVT0);
-+      apic_write_around(APIC_LVT0, value & ~APIC_LVT_MASKED);
-+#endif
-       calibrate_delay();
- #ifdef CONFIG_BLK_DEV_INITRD
-       if (initrd_start && !initrd_below_start_ok &&
-Index: linux/kernel/Makefile
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/kernel/Makefile,v
-retrieving revision 1.1.1.1.4.1
-retrieving revision 1.1.1.1.4.1.2.1
-diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1
---- linux/kernel/Makefile      12 Mar 2003 19:51:36 -0000      1.1.1.1.4.1
-+++ linux/kernel/Makefile      1 Apr 2003 12:17:41 -0000       1.1.1.1.4.1.2.1
-@@ -22,7 +22,8 @@
- obj-$(CONFIG_PM) += pm.o
- obj-$(CONFIG_KALLSYMS) += kallsyms.o
- obj-$(CONFIG_CPU_FREQ) += cpufreq.o
--
-+obj-$(CONFIG_BOOTIMG) += bootimg.o bootimg_pic.o
-+obj-$(CONFIG_MCL_COREDUMP) += crash.o
- ifneq ($(CONFIG_IA64),y)
- # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
-Index: linux/kernel/bootimg.c
-===================================================================
-RCS file: linux/kernel/bootimg.c
-diff -N linux/kernel/bootimg.c
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/kernel/bootimg.c     1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,301 @@
-+/* bootimg.c - Boot another (kernel) image */
-+
-+/* Written 2000 by Werner Almesberger */
-+
-+
-+#include <linux/config.h>
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/capability.h>
-+#include <linux/bootimg.h>
-+#include <asm/bootimg.h>
-+#include <asm/uaccess.h>
-+#include <asm/io.h>
-+#include <asm/pgtable.h>
-+#include <linux/delay.h>
-+
-+#if 0
-+#define DPRINTK_CONT(format,args...) printk(format,##args)
-+#else
-+#define DPRINTK_CONT(format,args...)
-+#endif
-+#define DPRINTK(format,args...) DPRINTK_CONT(KERN_DEBUG format,##args)
-+
-+unsigned long **bootimg_page_dir;
-+
-+struct bootimg_dsc bootimg_dsc; /* communication with PIC */
-+unsigned long *unity_page; /* unity-mapped page for i386 */
-+
-+static unsigned long bootimg_checksum(unsigned long **page_dir, int num_pages)
-+{
-+      unsigned long checksum, *page;
-+      int i, j;
-+
-+      checksum = 0;
-+
-+      for (i = 0; i < num_pages; i++) {
-+              page = __va((unsigned long *)
-+                          page_dir[FROM_TABLE(i)][PAGE_NR(i)]);
-+
-+              for (j = 0; j < PAGES_PER_TABLE; j++)
-+                      checksum ^= page[j];
-+
-+              checksum ^= page_dir[TO_TABLE(i)][PAGE_NR(i)];
-+      }
-+
-+        return checksum;
-+}
-+
-+#ifdef CONFIG_X86_PAE
-+
-+static unsigned long get_identity_mapped_page(void)
-+{
-+      pgd_t *pgd;
-+      pmd_t *pmd;
-+      unsigned long phys_addr, page_base;
-+
-+      /* Set up a 2 Mb identity-mapped page. */
-+
-+      phys_addr = virt_to_phys(unity_page);
-+      pgd = pgd_offset(current->active_mm, phys_addr);
-+      pmd = pmd_offset(pgd, phys_addr);
-+
-+      /* We hardcode this rather than using PMD_MASK just in case the PAE
-+               * mode setup ever changes so that 2 Mb pages are no longer used.
-+               */
-+      page_base = phys_addr & ~((1 << 21) - 1);
-+
-+      set_pmd(pmd, __pmd(page_base | _PAGE_PSE | _KERNPG_TABLE));
-+      __flush_tlb_one(phys_addr);
-+
-+      return (unsigned long) unity_page;
-+}
-+
-+#else
-+
-+static unsigned long get_identity_mapped_page(void)
-+{
-+      set_pgd(pgd_offset(current->active_mm,virt_to_phys(unity_page)),
-+              __pgd((_KERNPG_TABLE + _PAGE_PSE + (virt_to_phys(unity_page)&PGDIR_MASK))));
-+      __flush_tlb_one(virt_to_phys(unity_page));
-+      return (unsigned long)unity_page;
-+}
-+
-+#endif
-+
-+#if 0 /* Perhaps we'll need this in the future? */
-+static void unmap_identity_mapped_page(void)
-+{
-+      set_pgd(pgd_offset(current->active_mm,virt_to_phys(unity_page)),__pgd(0));
-+      __flush_tlb();
-+}
-+#endif
-+
-+static int fill_page_dir(unsigned long **page_dir,struct boot_image *image)
-+{
-+      int i, count=0;
-+
-+      memset(page_dir,0,PAGE_SIZE);
-+      for (i = 0; i < image->pages; i += PAGES_PER_TABLE) {
-+              unsigned long **table;
-+              int bytes_left;
-+
-+              table = page_dir+FROM_TABLE(i);
-+              *table = (unsigned long *) get_free_page(GFP_KERNEL);
-+              if (!*table) return -ENOMEM;
-+
-+              memset(*table,0,PAGE_SIZE);
-+              DPRINTK("page %d: from table %p @ %p\n",i,*table,table);
-+              table = page_dir+TO_TABLE(i);
-+              *table = (unsigned long *) get_free_page(GFP_KERNEL);
-+              if (!*table) return -ENOMEM;
-+
-+              bytes_left = (image->pages-i)*sizeof(unsigned long);
-+              if (copy_from_user(*table,image->load_map+i,
-+                  bytes_left > PAGE_SIZE ? PAGE_SIZE : bytes_left))
-+                      return -EFAULT;
-+              DPRINTK("page %d: to table %p @ %p\n",i,*table,table);
-+              count+=2; /* 2 pages per loop */
-+      }
-+
-+      for (i = 0; i < image->pages; i++) {
-+              unsigned long page = get_free_page(GFP_KERNEL);
-+              void *src;
-+
-+              if (!page) return -ENOMEM;
-+              count++;
-+
-+              page_dir[FROM_TABLE(i)][PAGE_NR(i)] =
-+                  virt_to_phys((void *) page);
-+              if (get_user(src,image->image_map+i) ||
-+                  copy_from_user((void *) page,src,PAGE_SIZE))
-+                      return -EFAULT;
-+
-+              DPRINTK("page %d: %p->%p->%p @ %p\n",i,src,(void *) page,
-+                  (void *) page_dir[FROM_TABLE(i)][PAGE_NR(i)],
-+                  &page_dir[FROM_TABLE(i)][PAGE_NR(i)]);
-+      }
-+
-+      DPRINTK("fill_page_dir: %d pages allocated\n", count);
-+
-+      return 0;
-+}
-+
-+
-+static void free_page_dir(unsigned long **page_dir)
-+{
-+      int i,j,count=0;
-+
-+      for (i = 0; i < PAGES_PER_TABLE/2; i++)
-+              if (page_dir[i])
-+                      for (j = 0; j < PAGES_PER_TABLE; j++)
-+                              if (page_dir[i][j]) {
-+                                      free_page((unsigned long)
-+                                          phys_to_virt(page_dir[i][j]));
-+                                      count++;
-+                              }
-+      for (i = 0; i < PAGES_PER_TABLE; i++)
-+              if (page_dir[i]) {
-+                      free_page((unsigned long) *page_dir[i]);
-+                      count++;
-+              }
-+      DPRINTK("free_page_dir: %d pages freed\n", count);
-+}
-+
-+
-+static void convert_table_refs_to_phys(unsigned long **page_dir)
-+{
-+      int i;
-+
-+      DPRINTK("PAGES_PER_TABLE: %d\n",PAGES_PER_TABLE);
-+      for (i = 0; i < PAGES_PER_TABLE; i++)
-+              if (page_dir[i]) {
-+                      DPRINTK("table %i: mapped %p -> ",i,page_dir[i]);
-+                      page_dir[i] = (unsigned long *)
-+                          virt_to_phys(page_dir[i]);
-+                      DPRINTK_CONT("%p\n",page_dir[i]);
-+              }
-+}
-+
-+
-+
-+static int fill_bootimg_dsc(struct boot_image *image)
-+{
-+      unsigned long scratch;
-+      int error = -ENOMEM;
-+
-+      if(bootimg_page_dir) {
-+              /* free previously allocated memory */
-+              free_page_dir(bootimg_page_dir);
-+              free_page((unsigned long) bootimg_page_dir);
-+              DPRINTK("free_page (bootimg_page_dir)\n");
-+      }
-+
-+      bootimg_page_dir = (unsigned long **) get_free_page(GFP_KERNEL);
-+      if (!bootimg_page_dir) goto out0;
-+      DPRINTK("get_free_page (bootimg_page_dir)\n");
-+
-+      error = fill_page_dir(bootimg_page_dir,image);
-+      if (error) goto out1;
-+
-+      if(!bootimg_dsc.scratch) {
-+              scratch = get_free_page(GFP_KERNEL);
-+              DPRINTK("get_free_page (scratch)\n");
-+      } else
-+              scratch = 1; /* already allocated */
-+
-+      if (!scratch) goto out1;
-+      /*
-+       * Not all architectures need the code to be identity-mapped, but it
-+       * can't hurt ...
-+       */
-+      DPRINTK("bootimg_page_dir: mapped %p -> ",bootimg_page_dir);
-+      bootimg_dsc.page_dir = (unsigned long **) virt_to_phys(bootimg_page_dir);
-+      DPRINTK_CONT("%p\n",bootimg_dsc.page_dir);
-+      if(!bootimg_dsc.scratch)
-+              bootimg_dsc.scratch = virt_to_phys((void *) scratch);
-+      bootimg_dsc.jump_to = (void (*)(void)) image->start;
-+      bootimg_dsc.pages = image->pages;
-+      bootimg_dsc.csum = bootimg_checksum(bootimg_page_dir, image->pages);
-+
-+      return 0;
-+
-+out1:
-+      free_page_dir(bootimg_page_dir);
-+      free_page((unsigned long) bootimg_page_dir);
-+      DPRINTK("free_page (bootimg_page_dir)\n");
-+      bootimg_page_dir = 0;
-+out0:
-+      return error;
-+}
-+
-+extern char *panicmsg;
-+int boot_image()
-+{
-+      relocate_and_jump_t code;
-+      unsigned long code_page;
-+      int error = -ENOMEM;
-+
-+      if (bootimg_checksum(__va(bootimg_dsc.page_dir),bootimg_dsc.pages) 
-+              != bootimg_dsc.csum)
-+              printk("Checksum of kernel image failed.  Rebooting via BIOS\n");
-+
-+      code_page = get_identity_mapped_page();
-+      if (!code_page) goto out3;
-+      code = (relocate_and_jump_t) virt_to_phys((void *) code_page);
-+      memcpy(code,&__bootimg_start,&__bootimg_end-&__bootimg_start);
-+      flush_icache_range(&__bootimg_start, &__bootimg_end-&__bootimg_start);
-+
-+      bootimg_dsc.self = (unsigned long) code;
-+      printk(KERN_INFO "Running boot code at 0x%p\n",code);
-+      
-+      /*
-+       * The point of no return. Not even printk may work after a successful
-+       * return from become_only_thread.
-+       */
-+
-+      if (!panicmsg) {
-+                      error = become_only_thread();
-+                      if (error) goto out3;
-+      } else {
-+#ifdef CONFIG_SMP
-+                      disable_IO_APIC();
-+#endif
-+                      __cli();
-+      }
-+
-+      convert_table_refs_to_phys((unsigned long **)__va(bootimg_dsc.page_dir));
-+      stack_on_page(code);
-+
-+      code();
-+
-+      panic("PIC code exec failed");
-+out3:
-+      printk("boot_image() failed!\n");
-+      for(;;); 
-+}
-+
-+/* changed from asmlinkage because we're called via an IOCTL on /dev/crash now */
-+int sys_bootimg(struct boot_image *user_dsc)
-+{
-+      struct boot_image dsc;
-+
-+      if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_MODULE)) return -EPERM;
-+      if (&__bootimg_end-&__bootimg_start > PAGE_SIZE-RESERVE_MIN_RELOC_STACK)
-+         {
-+              printk(KERN_ERR "boot_image: PIC too large (%d bytes)\n",
-+                  &__bootimg_end-&__bootimg_start);
-+              return -EIO;
-+      }
-+      if ((void *) relocate_and_jump != (void *) &__bootimg_start) {
-+              printk(KERN_ERR "boot_image: relocate_and_jump is mis-placed"
-+                  "(0x%p != 0x%p)\n",relocate_and_jump,&__bootimg_start);
-+              return -EIO;
-+      }
-+      
-+      if (copy_from_user(&dsc,user_dsc,sizeof(dsc))) return -EFAULT;
-+      if (dsc.pages >= PAGES_PER_TABLE*PAGES_PER_TABLE/2) return -EFBIG;
-+      if (dsc.flags) return -EINVAL; /* for future use */
-+      return fill_bootimg_dsc(&dsc);
-+}
-Index: linux/kernel/bootimg_pic.c
-===================================================================
-RCS file: linux/kernel/bootimg_pic.c
-diff -N linux/kernel/bootimg_pic.c
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/kernel/bootimg_pic.c 1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,91 @@
-+/* bootimg_pic.c - Boot image, position-independent code */
-+
-+/* Written 2000 by Werner Almesberger */
-+
-+/*
-+ * Strongly inspired by FiPaBoL designed mainly by Otfried Cheong and Roger
-+ * Gammans, and written by the latter.
-+ */
-+
-+/*
-+ * This code is position-independent and must fit in a single page !
-+ * Furthermore, everything (text+data+stack) has to go into the
-+ * .bootimg segment.
-+ */
-+
-+
-+#include <linux/config.h>
-+#include <linux/kernel.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/bootimg.h>
-+#include <asm/bootimg.h>
-+
-+#include <asm/io.h>
-+
-+#define copy_and_swap(from,to) \
-+    ( { my_copy_page(from,to); \
-+    tmp = from; \
-+    from = to; \
-+    to = tmp; } )
-+
-+
-+static inline void my_copy_page(unsigned long from,unsigned long to)
-+{
-+      unsigned long end = from+PAGE_SIZE;
-+
-+      do *((unsigned long *) to)++ = *((unsigned long *) from)++;
-+      while (from != end);
-+}
-+
-+
-+void __bootimg relocate_and_jump(void)
-+{
-+      struct bootimg_dsc dsc = bootimg_dsc;
-+      int i;
-+
-+      stop_paging();
-+      for (i = 0; i < dsc.pages; i++) {
-+              unsigned long from,to,tmp;
-+
-+              from = dsc.page_dir[FROM_TABLE(i)][PAGE_NR(i)];
-+              to = dsc.page_dir[TO_TABLE(i)][PAGE_NR(i)];
-+              if (from == to) continue;
-+              if (to == dsc.self) {
-+                      copy_and_swap(dsc.self,dsc.scratch);
-+                      /* WARNING: flush_icache_range MUST BE INLINED !!! */
-+                      flush_icache_range(dsc.self,dsc.self+PAGE_SIZE-1);
-+                      jump_relocated(dsc.scratch,dsc.self);
-+              }
-+              else if (to == (unsigned long) dsc.page_dir)
-+                      copy_and_swap((unsigned long) dsc.page_dir,dsc.scratch);
-+              else {
-+                      /*
-+                       * O((n^2-n)/2), sigh ...
-+                       */
-+                      unsigned long **table;
-+                      int j;
-+
-+                      for (j = i+1; j < dsc.pages; j++) {
-+                              table = dsc.page_dir+FROM_TABLE(j);
-+                              if (((unsigned long) *table) == to) {
-+                                      copy_and_swap(*table,dsc.scratch);
-+                                      break;
-+                              }
-+                              if ((*table)[PAGE_NR(j)] == to) {
-+                                      copy_and_swap((*table)[PAGE_NR(j)],
-+                                          dsc.scratch);
-+                                      break;
-+                              }
-+                              table = dsc.page_dir+TO_TABLE(j);
-+                              if (((unsigned long) *table) == to) {
-+                                      copy_and_swap(*table,dsc.scratch);
-+                                      break;
-+                              }
-+                      }
-+              }
-+              my_copy_page(from,to);
-+              dsc.scratch = from;
-+      }
-+      jump_to_kernel(dsc.jump_to);
-+}
-Index: linux/kernel/crash.c
-===================================================================
-RCS file: linux/kernel/crash.c
-diff -N linux/kernel/crash.c
---- /dev/null  1 Jan 1970 00:00:00 -0000
-+++ linux/kernel/crash.c       1 Apr 2003 12:17:41 -0000       1.1.6.1
-@@ -0,0 +1,886 @@
-+#include <linux/locks.h>
-+#include <linux/slab.h>
-+#include <linux/crash.h>
-+#include <linux/vmalloc.h>
-+#include <linux/mm.h>
-+#include <linux/fs.h>
-+#include <linux/ext2_fs.h>
-+#include <asm/param.h>
-+#include <asm/uaccess.h>
-+#include <linux/zlib.h>
-+#include <linux/reboot.h>
-+#include <linux/delay.h>
-+#include <asm/io.h>
-+#include <linux/miscdevice.h>
-+#include <linux/bootmem.h>
-+
-+#ifdef CONFIG_BOOTIMG
-+#include <linux/bootimg.h>
-+#endif
-+
-+static void crash_print_data_around(u_long p);
-+static void crash_free_page(u_long addr);
-+static int crash_chksum_page(u_long pg_addr, u_long * sum_addr);
-+static void *czalloc(void *arg, unsigned int items, unsigned int size);
-+static void czfree(void *arg, void *ptr);
-+static u_long crash_alloc_dest_page(void);
-+static void crash_free_dest_page(u_long dest);
-+static void init_dest_page_alloc(void);
-+static int crash_audit_maps(void);
-+static u_long crash_get_source_page(void);
-+static u_long crash_update_map(u_long map, u_long src_base, u_long dest, u_long * pages);
-+static int crash_reset_stream(z_stream * stream);
-+static boolean_t crash_is_kseg(u_long addr);
-+static u_long *crash_link(u_long p);
-+static int crash_chksum(u_long limit, u_long * sum_addr);
-+static int crash_audit_map_page(u_long map);
-+static void crash_wait_cpus(void);
-+static int crash_is_dir_page(struct page *page);
-+
-+/* for the /dev/crash interface */
-+int crash_init_chrdev(void);
-+static int crashdev_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
-+
-+#define CRASH_DEBUG 1
-+
-+#ifdef CONFIG_BOOTIMG
-+extern int sys_bootimg(struct boot_image *);
-+#endif
-+
-+static u_long crash_compr_buf;
-+static u_long crash_uncompr_buf;
-+static u_long crash_dump_header = 0;
-+static u_long crash_dest_free_list = 0;
-+static u_long crash_debug = 0;
-+
-+static u_long crash_cur_pfn;
-+
-+static u_long src_pages_skipped = 0;
-+static u_long src_pages_saved = 0;
-+static u_long dest_pages_free = 0;
-+
-+/* this information is saved from within panic() */
-+char *panicmsg = (char *)0;
-+int panic_processor = 0;
-+int crash_perform_sync = 0;
-+
-+u_int console_crash = 0;      /* should be moved to alpha branch */
-+
-+// typedef struct task_struct *task_t;
-+
-+/*
-+ *  Threads active at time of panic:
-+ */
-+volatile task_t *panic_threads[NR_CPUS];
-+volatile unsigned long panic_ksp[NR_CPUS];
-+unsigned long *panic_regs = NULL;
-+
-+int panic_on_oops;            /* for /proc/sys/kernel/panic_on_oops */
-+
-+extern unsigned long max_low_pfn;
-+
-+u_long crash_zalloc_start; // , crash_zalloc_end, crash_zalloc_cur;
-+
-+/* 
-+ * Crash Kernel API functions below
-+ * crash_pages_needed, computes pages needed for header and compression temp
-+ * crash_init, partitions out the allocated pages, sets defaults and 
-+ *             initializes the character device.
-+ * crash_mark_dump_reserved, marks pages reserved from a previous dump.
-+ * save_core, called at panic time to save a dump to memory.
-+ */
-+u_long crash_pages_needed(void)
-+{
-+      /* one for the header */
-+      return (1 + CRASH_ZALLOC_PAGES + CRASH_UNCOMPR_BUF_PAGES + CRASH_COMPR_BUF_PAGES);
-+}
-+
-+void crash_init(u_long bootmap_va, u_long crash_va, u_long end_alloc_va)
-+{
-+      struct mem_crash_map_hdr *header;
-+      int i;
-+
-+      /* the default behavior is not NOT panic on a kernel OOPS */
-+      panic_on_oops = 0;
-+
-+      printk("crash_init (crash_va: %08lx)\n", crash_va);
-+      for (i = 0; i < NR_CPUS; i++)
-+              panic_threads[i] = 0;
-+      crash_dump_header = crash_va;
-+      crash_va += PAGE_SIZE;
-+      crash_zalloc_start = crash_va;
-+      crash_va += CRASH_ZALLOC_PAGES * PAGE_SIZE;
-+      crash_uncompr_buf = crash_va;
-+      crash_va += CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE;
-+      crash_compr_buf = crash_va;
-+      crash_va += CRASH_COMPR_BUF_PAGES * PAGE_SIZE;
-+#if 0
-+      if (crash_va != end_alloc_va)
-+              panic("crash_init inconsistency-1\n");
-+#endif
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+#ifdef CRASH_DEBUG
-+      printk("crash_dump_header %p {\n", header);
-+      printk("    magic[0]            = %lx\n", header->magic[0]);
-+      printk("    map                 = %lx\n", header->map);
-+      printk("    map_pages           = %lx\n", header->map_pages);
-+      printk("    data_pages          = %lx\n", header->data_pages);
-+      printk("    compr_units         = %lx\n", header->compr_units);
-+      printk("    boot_reserved_start = %lx\n", header->boot_reserved_start);
-+      printk("    boot_reserved_end   = %lx\n", header->boot_reserved_end);
-+#endif
-+
-+      if (header->magic[0] == CRASH_MAGIC) {
-+              printk("crash found\n");
-+              if ((header->boot_reserved_start != bootmap_va) ||
-+                  (header->boot_reserved_end != end_alloc_va)) {
-+                      /* crash audit will catch the corruption */
-+                      printk("crash_init inconsistency, dump may be corrupted\n");
-+              }
-+      } else {
-+printk("memset...");
-+              memset(header, 0, sizeof(*header));
-+printk("done\n");
-+      }
-+
-+      header->boot_reserved_start = bootmap_va;
-+      header->boot_reserved_end = end_alloc_va;
-+
-+}
-+
-+void crash_mark_dump_reserved(void)
-+{
-+      struct mem_crash_map_hdr *header;
-+      struct mem_crash_map_entry *m;
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      if (header->magic[0] != CRASH_MAGIC)
-+              return;
-+      m = (struct mem_crash_map_entry *)header->map;
-+#ifdef CRASH_DEBUG
-+      printk("\n\n\ncrash_mark_dump_reserved\n\n");
-+      printk("crash_dump_header %p {\n", header);
-+      printk("    magic[0]            = %lx\n", header->magic[0]);
-+      printk("    map                 = %lx\n", header->map);
-+      printk("    map_pages           = %lx\n", header->map_pages);
-+      printk("    data_pages          = %lx\n", header->data_pages);
-+      printk("    compr_units         = %lx\n", header->compr_units);
-+      printk("    boot_reserved_start = %lx\n", header->boot_reserved_start);
-+      printk("    boot_reserved_end   = %lx\n", header->boot_reserved_end);
-+      printk("mem_crash_map_entry %p {\n", m);
-+      printk("    src_va              = %lx\n", m->src_va);
-+      printk("    dest_page_va        = %lx\n", m->dest_page_va);
-+      printk("    check_sum           = %lx\n", m->check_sum);
-+#endif
-+
-+      if (crash_audit_maps()) {
-+              header->magic[0] = 0;
-+              return;
-+      }
-+
-+      m = (struct mem_crash_map_entry *)header->map;
-+ again:
-+      CRASH_MARK_BOOT_RESERVED(m);
-+      for (; m->src_va; m++) {
-+              if (m->src_va == -1) {
-+                      m = (struct mem_crash_map_entry *)m->dest_page_va;
-+                      goto again;
-+              }
-+              CRASH_MARK_BOOT_RESERVED(m->dest_page_va);
-+      }
-+      return;
-+}
-+
-+void save_core(void)
-+{
-+      int i, j, k;
-+      z_stream stream;
-+      int err;
-+      struct task_struct *tp;
-+      struct mem_crash_map_hdr *header;
-+      u_long *sub_map;
-+      u_long map;
-+      u_long src, dest, unc, cp, src_base, comp_pages;
-+
-+      k = 0;
-+      dest = 0;
-+      __cli();
-+      tp = current;
-+      mb();
-+      if (smp_processor_id() != 0) {  /* boot_cpu_id is always 0, i think */
-+              panic_threads[smp_processor_id()] = tp;
-+              crash_halt_or_reboot(0);
-+      } else {
-+              if (console_crash)
-+                      panic_threads[smp_processor_id()] = &init_task_union.task;
-+              else
-+                      panic_threads[smp_processor_id()] = tp;
-+
-+              crash_wait_cpus();
-+      }
-+
-+      printk("save_core: started on CPU%d\n", smp_processor_id());
-+      if (!crash_dump_header) {
-+              printk("save_core: not initialized\n");
-+              return;
-+      }
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      header->magic[0] = 0;
-+      header->map_pages = 0;
-+      header->data_pages = 0;
-+      header->compr_units = 0;
-+      header->map = 0;
-+
-+      stream.workspace=(void*)crash_zalloc_start;
-+      //      stream.zalloc = czalloc;
-+      //      stream.zfree = czfree;
-+      //      stream.opaque = (voidpf) 0;
-+      stream.next_out = (Bytef *) crash_compr_buf;
-+      stream.avail_out = (uInt) (CRASH_COMPR_BUF_PAGES * PAGE_SIZE);
-+      stream.next_in = (Bytef *) crash_uncompr_buf;
-+      stream.avail_in = (uInt) (CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE);
-+      err = zlib_deflateInit(&stream, Z_BEST_SPEED);
-+      if (err != Z_OK) {
-+              printk("save_core: bad return %d from deflateInit\n", err);
-+              return;
-+      }
-+
-+      init_dest_page_alloc();
-+      header->map = map = crash_update_map(0, 0, 0, &header->map_pages);
-+      if (!map) {
-+              printk("save_core: no dest pages\n");
-+              return;
-+      }
-+      crash_cur_pfn = 0;
-+      src_base = 0;
-+      src = 0;
-+      for (;;) {
-+              sub_map = (u_long *) crash_uncompr_buf;
-+              unc = crash_uncompr_buf + CRASH_SUB_MAP_PAGES * PAGE_SIZE;
-+              for (i = 0; i < CRASH_SOURCE_PAGES; i++) {
-+                      src = crash_get_source_page();
-+                      if (!src)
-+                              break;
-+                      if (!i)
-+                              src_base = src;
-+                      if (!crash_is_kseg(unc) || !crash_is_kseg(src)) {
-+                              printk("unc = 0x%lx, src = 0x%lx, i = %d\n", unc, src, i);
-+                              i = src = 0;
-+                              break;
-+                      }
-+                      memcpy((void *)unc, (void *)src, PAGE_SIZE);
-+                      unc += PAGE_SIZE;
-+                      *sub_map++ = src;
-+              }
-+              *sub_map = 0;
-+              if (!i && !src)
-+                      break;
-+              err = zlib_deflate(&stream, Z_FINISH);
-+              if (!(err == Z_STREAM_END)) {
-+                      zlib_deflateEnd(&stream);
-+                      printk("save_core: bad return %d from deflate, src_base = 0x%lx\n", err,
-+                             src_base);
-+                      return;
-+              }
-+              comp_pages = (u_long) round_page(stream.total_out) / PAGE_SIZE;
-+              if (crash_debug)
-+                      printk("src_base = 0x%lx compressed data in 0x%lx pages\n", src_base,
-+                             comp_pages);
-+
-+              cp = crash_compr_buf;
-+              j = 0;
-+              if (crash_debug)
-+                      printk("\nsrc = %lx\n", src_base);
-+              else {
-+                      printk(".");
-+                      if (!(k++ % 64))
-+                              printk("\n");
-+              }
-+              for (i = 0; i < comp_pages; i++) {
-+                      dest = crash_alloc_dest_page();
-+                      if (crash_debug) {
-+                              printk("%lx ", dest);
-+                              if (!(j++ % 8))
-+                                      printk("\n");
-+                      }
-+                      header->data_pages++;
-+                      if (!dest) {
-+                              printk("save_core: no dest pages\n");
-+                              return;
-+                      }
-+                      if (!crash_is_kseg(dest) || !crash_is_kseg(cp)) {
-+                              printk("dest = 0x%lx, cp = 0x%lx, i = %d, comp_pages = 0x%lx\n",
-+                                     dest, cp, i, comp_pages);
-+                              src = 0;
-+                              break;
-+                      }
-+                      memcpy((void *)dest, (void *)cp, PAGE_SIZE);
-+                      cp += PAGE_SIZE;
-+                      map = crash_update_map(map, src_base, dest, &header->map_pages); /* links a new map page, if necessary */
-+                      if (!map) {
-+                              printk("save_core: no map\n");
-+                              return;
-+                      }
-+              }
-+              header->compr_units++;
-+              if (!src)
-+                      break;
-+              if (crash_reset_stream(&stream))
-+                      return;
-+      }
-+
-+      map = crash_update_map(map, 0, 0, &header->map_pages);
-+      header->magic[0] = CRASH_MAGIC;
-+
-+      if (crash_audit_maps()) {
-+              header->magic[0] = 0;
-+              return;
-+      }
-+
-+      printk("\nsave_core: src pages skipped = 0x%lx src pages saved = 0x%lx\n",
-+             src_pages_skipped, src_pages_saved);
-+      printk("save_core: data_pages = 0x%lx map_pages = 0x%lx\n", header->data_pages,
-+             header->map_pages);
-+      printk("save_core: completed, crash_dump_header = 0x%lx\n", crash_dump_header);
-+}
-+
-+/* helper functions private to this file */
-+static int crash_reset_stream(z_stream * stream)
-+{
-+      int err;
-+
-+      stream->workspace=(void*)crash_zalloc_start;
-+      // stream->zalloc = czalloc;
-+      // stream->zfree = czfree;
-+      // stream->opaque = (voidpf) 0;
-+      stream->next_out = (Bytef *) crash_compr_buf;
-+      stream->avail_out = (uInt) (CRASH_COMPR_BUF_PAGES * PAGE_SIZE);
-+      stream->next_in = (Bytef *) crash_uncompr_buf;
-+      stream->avail_in = (uInt) (CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE);
-+      err = zlib_deflateReset(stream);
-+      if (err != Z_OK) {
-+              printk("crash_reset_stream: bad return %d from deflateReset\n", err);
-+              return 1;
-+      }
-+      return 0;
-+}
-+
-+static u_long crash_alloc_dest_page(void)
-+{
-+      u_long addr;
-+
-+      addr = crash_dest_free_list;
-+      if (addr) {
-+              crash_dest_free_list = *(u_long *) addr;
-+              dest_pages_free--;
-+      } else
-+              printk("crash_alloc_dest_page: free list empty\n");
-+      return addr;
-+}
-+
-+static void crash_free_dest_page(u_long dest)
-+{
-+      if (!dest) {
-+              printk("crash_free_dest_page: freeing addr 0\n");
-+              return;
-+      }
-+      dest_pages_free++;
-+      dest = (u_long) trunc_page(dest);
-+      *(u_long *) dest = crash_dest_free_list;
-+      crash_dest_free_list = dest;
-+}
-+
-+/*
-+ *  Stolen from setup.c
-+ */
-+#define PFN_PHYS(x)   ((x) << PAGE_SHIFT)
-+
-+static void init_dest_page_alloc(void)
-+{
-+      u_long va;
-+      long i;
-+      struct page *page;
-+      struct mem_crash_map_hdr *header;
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      for (i = ((1 << 24) >> PAGE_SHIFT) + LOWER_MEM_FORWARD;
-+           i < (max_low_pfn - UPPER_MEM_BACKUP); i++) {
-+              va = (u_long) phys_to_virt(PFN_PHYS(i));
-+              if ((va >= header->boot_reserved_start) && (va < header->boot_reserved_end))
-+                      continue;
-+              page = mem_map + i;
-+              if (PageLocked(page) || PageReserved(page))
-+                      continue;
-+              if (PageFree(page) || PageAnon(page) || PageShm(page) || page->buffers)
-+                      crash_free_dest_page(va);
-+      }
-+      if (crash_debug)
-+              printk("init_dest_page_alloc: dest_pages_free = 0x%lx\n", dest_pages_free);
-+}
-+
-+static int crash_is_dir_page(struct page *page) {
-+      struct inode *tmp_inode;
-+
-+      if(page->mapping && page->mapping->host) {
-+              tmp_inode = (struct inode *)page->mapping->host;
-+              if((tmp_inode->i_sb->s_magic == EXT2_SUPER_MAGIC) &&
-+                 (S_ISDIR(tmp_inode->i_mode)))
-+                      return 1;
-+      }
-+
-+      return 0;
-+}
-+
-+static u_long crash_get_source_page(void)
-+{
-+      struct page *page;
-+      u_long va;
-+
-+      while (crash_cur_pfn < max_low_pfn) {
-+              page = mem_map + crash_cur_pfn;
-+              if (!(PageFree(page) || PageAnon(page) || PageShm(page) || page->buffers))
-+                      break;
-+              src_pages_skipped++;
-+              crash_cur_pfn++;
-+      }
-+      if (crash_cur_pfn == max_low_pfn)
-+              return 0;
-+
-+      va = (u_long) phys_to_virt(PFN_PHYS(crash_cur_pfn));
-+      src_pages_saved++;
-+      crash_cur_pfn++;
-+      return va;
-+}
-+
-+static u_long crash_update_map(u_long map, u_long src_base, u_long dest, u_long * pages)
-+{
-+      struct mem_crash_map_entry *m;
-+
-+
-+      if (!map) {
-+              (*pages)++;
-+              return crash_alloc_dest_page();
-+      }
-+      m = (struct mem_crash_map_entry *)map;
-+      m->src_va = src_base;
-+      m->dest_page_va = dest;
-+      if (dest)
-+              if (crash_chksum_page(dest, &m->check_sum))
-+                      return 0;
-+
-+      map += sizeof(struct mem_crash_map_entry);
-+
-+      m = (struct mem_crash_map_entry *)map;
-+      if (!src_base) {        /* end of list */
-+              if (crash_chksum((u_long) m, &m->src_va))
-+                      return 0;
-+      } else if ((map + 3 * sizeof(struct mem_crash_map_entry)) > (u_long) round_page(map)) {
-+              m->src_va = -1;
-+              map = m->dest_page_va = crash_alloc_dest_page();
-+              if (crash_debug)
-+                      printk("\nm = 0x%lx m->src_va = 0x%lx m->dest_page_va = 0x%lx\n",
-+                             (u_long) trunc_page(m), m->src_va, m->dest_page_va);
-+              m++;
-+              if (crash_chksum((u_long) m, &m->src_va))
-+                      return 0;
-+              if (crash_debug)
-+                      printk("m = 0x%lx chksum =  m->src_va = 0x%lx\n", (u_long) trunc_page(m),
-+                             m->src_va);
-+              if (crash_audit_map_page((u_long) m))
-+                      return 0;
-+              (*pages)++;
-+      }
-+      return map;
-+}
-+
-+static int crash_chksum(u_long limit, u_long * sum_addr)
-+{
-+      u_long sum;
-+      u_long *addr;
-+
-+      if (!crash_is_kseg(limit)) {
-+              printk("bad addr = 0x%lx to crash_chksum\n", limit);
-+              return 1;
-+      }
-+      sum = 0;
-+      addr = (u_long *) trunc_page(limit);
-+      for (; (u_long) addr < limit; addr++)
-+              sum += *addr;
-+      *sum_addr = sum;
-+      return 0;
-+}
-+
-+static int crash_chksum_page(u_long pg_addr, u_long * sum_addr)
-+{
-+      u_long sum, limit;
-+      u_long *addr;
-+
-+      if (!crash_is_kseg(pg_addr)) {
-+              printk("bad addr = 0x%lx to crash_chksum_page\n", pg_addr);
-+              return 1;
-+      }
-+
-+      sum = 0;
-+      addr = (u_long *) trunc_page(pg_addr);
-+      limit = (u_long) addr + PAGE_SIZE;
-+      for (; (u_long) addr < limit; addr++)
-+              sum += *addr;
-+      *sum_addr = sum;
-+      return 0;
-+}
-+
-+static int crash_audit_maps(void)
-+{
-+      u_long m, count;
-+      u_long *link_addr;
-+      struct mem_crash_map_hdr *header;
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      if (header->magic[0] != CRASH_MAGIC)
-+              return 1;
-+
-+      link_addr = &header->map;
-+      m = header->map;
-+
-+      count = 0;
-+      for (;;) {
-+              if (!crash_is_kseg(m)) {
-+                      printk("crash_audit_maps: bad link 0x%lx at 0x%lx\n", m,
-+                             (u_long) link_addr);
-+                      return 1;
-+              }
-+              if (crash_audit_map_page(m)) {
-+                      printk("audit failed while on map page %ld\n", count);
-+                      return 1;
-+              }
-+              if (!crash_link(m))
-+                      break;
-+              link_addr = crash_link(m);
-+              m = *link_addr;
-+
-+              count++;
-+      }
-+      return 0;
-+}
-+
-+static int crash_audit_map_page(u_long map)
-+{
-+      struct mem_crash_map_entry *m;
-+      u_long sum;
-+
-+      if (!map || !crash_is_kseg(map)) {
-+              printk("crash_audit_map_page: bad map = 0x%lx\n", map);
-+              return 1;
-+      }
-+      map = (u_long) trunc_page((u_long) map);
-+      m = (struct mem_crash_map_entry *)map;
-+      for (;;) {
-+              if ((m->src_va == -1) || (m->src_va == 0)) {
-+                      m++;
-+                      if (crash_chksum((u_long) m, &sum))
-+                              return 1;
-+                      if (m->src_va != sum) {
-+                              printk("crash_audit_map_page: checksum failure1\n");
-+                              printk("m = 0x%lx, sum = 0x%lx, m->src_va = 0x%lx\n",
-+                                     (u_long) m, (u_long) sum, (u_long) m->src_va);
-+                              crash_print_data_around((u_long) & m->src_va);
-+                              return 1;
-+                      } else {
-+                              return 0;
-+                      }
-+              } else {
-+                      if (crash_chksum_page((u_long) m->dest_page_va, &sum)
-+                          || (m->check_sum != sum)) {
-+                              printk("crash_audit_map_page: checksum failure2\n");
-+                              printk
-+                                      ("dest_page_va = 0x%lx, &dest_page_va = 0x%lx, sum = 0x%lx, m->check_sum = 0x%lx\n",
-+                                       (u_long) m->dest_page_va, (u_long) (&m->check_sum),
-+                                       (u_long) sum, (u_long) m->check_sum);
-+                              crash_print_data_around((u_long) & m->check_sum);
-+                              return 1;
-+                      }
-+              }
-+              m++;
-+      }
-+}
-+
-+static void crash_print_data_around(u_long p)
-+{
-+      u_long *a;
-+      int i;
-+
-+      if (!crash_is_kseg(p)) {
-+              printk("crash_print_data_around: p = 0x%lx not kseg\n", p);
-+              return;
-+      }
-+      a = (u_long *) p;
-+      a -= 20;
-+      for (i = 0; i < 40; i++)
-+              printk("%lx\n", *a++);
-+}
-+
-+#ifdef CRASH_DEBUG
-+static void crash_print_map_page(u_long map)
-+{
-+      struct mem_crash_map_entry *m;
-+      int j = 0;
-+      u_long sum;
-+
-+      map = (u_long) trunc_page((u_long) map);
-+      m = (struct mem_crash_map_entry *)map;
-+      for (;;) {
-+              printk("%lx %lx %lx ", m->src_va, m->dest_page_va, m->check_sum);
-+              if (!(j++ % 4))
-+                      printk("\n");
-+              if ((m->src_va == -1) || (m->src_va == 0)) {
-+                      m++;
-+                      printk("%lx %lx ", m->src_va, m->dest_page_va);
-+                      if (crash_chksum((u_long) m, &sum));
-+                      else
-+                              printk("\nchksum = 0x%lx\n", sum);
-+                      return;
-+              }
-+              m++;
-+      }
-+}
-+#endif /* CRASH_DEBUG */
-+
-+static void crash_wait_cpus(void)
-+{
-+      int i;
-+      int msecs = 0;
-+
-+      for (i = 0; i < smp_num_cpus; i++) {
-+              if (i != smp_processor_id()) {
-+                      while (!panic_threads[i]) {
-+                              msecs++;
-+                              mdelay(1);
-+                              if (msecs > CRASH_CPU_TIMEOUT) {
-+                                      /* if other cpus are still running
-+                                       * we have to halt, otherwise we could
-+                                       * risk using buffer cache pages which
-+                                       * could subsequently get flushed to disk.
-+                                       */
-+                                      printk("Unable to halt other CPUs, halting system.\n");
-+                                      crash_halt_or_reboot(0);
-+                              }
-+                      }
-+              }
-+      }
-+
-+      crash_cleanup_smp_state();
-+}
-+
-+
-+#if 0
-+static void *czalloc(void *arg, unsigned int items, unsigned int size)
-+{
-+      u_long nbytes;
-+      u_long addr;
-+
-+      nbytes = (u_long) (items * size);
-+      nbytes = (u_long) round_page(nbytes);
-+      if ((crash_zalloc_cur + nbytes) > crash_zalloc_end)
-+              return 0;
-+      addr = crash_zalloc_cur;
-+      crash_zalloc_cur += nbytes;
-+      return ((void *)addr);
-+}
-+
-+static void czfree(void *arg, void *ptr)
-+{
-+      printk("zfree: ptr = 0x%lx\n", (u_long) ptr);
-+}
-+#endif
-+
-+static boolean_t crash_is_kseg(u_long addr)
-+{
-+      u_long phys;
-+
-+      phys = virt_to_phys((void *)addr);
-+      if (phys < PFN_PHYS(max_low_pfn))
-+              return TRUE;
-+      else
-+              return FALSE;
-+}
-+
-+static u_long *crash_link(u_long p)
-+{
-+      struct mem_crash_map_entry *m;
-+
-+      p = (u_long) trunc_page(p);
-+      m = (struct mem_crash_map_entry *)p;
-+      for (; m->src_va; m++)
-+              if (m->src_va == -1)
-+                      return &m->dest_page_va;
-+
-+      return 0;
-+}
-+
-+/* Call this after data written to disk. */
-+static int crash_free_crashmem(void)
-+{
-+      struct mem_crash_map_hdr *header;
-+      struct mem_crash_map_entry *m, *last_m;
-+
-+      if (crash_debug)
-+              printk("crash_free_crashmem: \n");
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      if (crash_audit_maps()) {
-+              header->magic[0] = 0;
-+              return 1;
-+      }
-+      m = (struct mem_crash_map_entry *)header->map;
-+ again:
-+      for (; m->src_va; m++) {
-+              if (m->src_va == -1) {
-+                      last_m = m;
-+                      m = (struct mem_crash_map_entry *)m->dest_page_va;
-+                      crash_free_page((unsigned long)last_m);
-+                      goto again;
-+              }
-+              crash_free_page(m->dest_page_va);
-+      }
-+      if (crash_debug)
-+              printk("crash_free_crashmem: 0x%lx freed\n",
-+                     (header->data_pages + header->map_pages) * PAGE_SIZE);
-+      header->magic[0] = 0;
-+      return 0;
-+}
-+
-+static void crash_free_page(u_long addr)
-+{
-+      struct page *page;
-+
-+      page = virt_to_page(addr);
-+      ClearPageReserved(page);
-+      set_page_count(page, 1);
-+      __free_page(page);
-+}
-+
-+static int get_dump_helper(u_long kva, u_long buf)
-+{
-+      struct page *page;
-+      struct mem_crash_map_hdr *header;
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      if (header->magic[0] != CRASH_MAGIC)
-+              return 1;
-+
-+      if (!kva) {
-+              if (crash_audit_maps()) {
-+                      printk("get_dump_helper: audit failure\n");
-+                      header->magic[0] = 0;
-+                      return 1;
-+              }
-+              page = virt_to_page((u_long) crash_dump_header);
-+              if (!PageReserved(page)) {
-+                      printk("not reserved: crash_dump_header = 0x%lx\n", crash_dump_header);
-+                      return 1;
-+              }
-+              if (copy_to_user((char *)buf, (char *)crash_dump_header,
-+                               sizeof(struct mem_crash_map_hdr))) {
-+                      printk("get_dump_helper: copy_to_user failed1\n");
-+                      return 1;
-+              }
-+      } else {
-+              page = virt_to_page(kva);
-+              if (!PageReserved(page)) {
-+                      printk("not reserved: kva = 0x%lx\n", kva);
-+                      return 1;
-+              }
-+              if (copy_to_user((char *)buf, (char *)trunc_page(kva), PAGE_SIZE)) {
-+                      printk("get_dump_helper: copy_to_user failed2\n");
-+                      return 1;
-+              }
-+      }
-+      return 0;
-+}
-+
-+static void free_dump_helper(void)
-+{
-+      struct mem_crash_map_hdr *header;
-+
-+      header = (struct mem_crash_map_hdr *)crash_dump_header;
-+      if (header->magic[0] != CRASH_MAGIC)
-+              return;
-+      if (crash_debug)
-+              printk("free_dump_helper\n");
-+      crash_free_crashmem();
-+}
-+
-+static int crashdev_open(struct inode *inode, struct file *file)
-+{
-+      /* always return success -- nothing to do here */
-+      return 0;
-+}
-+
-+/* character device implementation */
-+static struct file_operations crashdev_fops = {
-+      ioctl:crashdev_ioctl,
-+      open:crashdev_open,
-+};
-+
-+static struct miscdevice crash_miscdev = {
-+      190, "crash", &crashdev_fops
-+};
-+
-+int crash_init_chrdev(void)
-+{
-+      int result;
-+
-+      result = misc_register(&crash_miscdev);
-+
-+      if (result < 0)
-+              printk(KERN_WARNING "crash: can't register crash device (c 10 190)\n");
-+
-+      return result;
-+}
-+
-+/* call the original syscalls, just to get things going */
-+static int crashdev_ioctl(struct inode *inode, struct file *file,
-+                        unsigned int cmd, unsigned long arg)
-+{
-+      int retval = 0;
-+
-+      switch (cmd) {
-+      case CRASH_IOCFREEDUMP:
-+              free_dump_helper();
-+              break;
-+
-+      case CRASH_IOCGETDUMP:
-+              if (crash_debug) {
-+                      printk("crashdev_ioctl: get dump\n");
-+                      printk("vals: %08lx %08lx\n",
-+                             ((struct ioctl_getdump *)arg)->kva,
-+                             ((struct ioctl_getdump *)arg)->buf);
-+              }
-+
-+              retval = get_dump_helper((u_long) ((struct ioctl_getdump *)arg)->kva,
-+                                       (u_long) ((struct ioctl_getdump *)arg)->buf);
-+              break;
-+
-+#ifdef CONFIG_BOOTIMG
-+      case CRASH_IOCBOOTIMG:
-+              if (crash_debug)
-+                      printk("crashdev_ioctl: bootimg\n");
-+
-+              retval = sys_bootimg((struct boot_image *)arg);
-+              break;
-+#endif
-+
-+      case CRASH_IOCVERSION:
-+              if (crash_debug)
-+                      printk("crashdev_ioctl: version\n");
-+              retval = CRASH_K_MINOR | (CRASH_K_MAJOR << 16);
-+              break;
-+
-+      default:
-+              return -EINVAL;
-+      }
-+
-+      return retval;
-+}
-Index: linux/kernel/module.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/kernel/module.c,v
-retrieving revision 1.1.1.1.4.1
-retrieving revision 1.1.1.1.4.1.2.1
-diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1
---- linux/kernel/module.c      12 Mar 2003 19:51:36 -0000      1.1.1.1.4.1
-+++ linux/kernel/module.c      1 Apr 2003 12:17:41 -0000       1.1.1.1.4.1.2.1
-@@ -311,7 +311,14 @@
-               error = -EEXIST;
-               goto err1;
-       }
-+#if defined(CONFIG_MCL_COREDUMP)
-+      /* Call vmalloc_32 instead of module_map (vmalloc for i386)
-+       * to avoid being mapped in highmem where mcore can't see us.
-+       */
-+      if ((mod = (struct module *)vmalloc_32(size)) == NULL) {
-+#else
-       if ((mod = (struct module *)module_map(size)) == NULL) {
-+#endif
-               error = -ENOMEM;
-               goto err1;
-       }
-Index: linux/kernel/panic.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/kernel/panic.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/kernel/panic.c       12 Mar 2003 19:51:36 -0000      1.3.2.1
-+++ linux/kernel/panic.c       1 Apr 2003 12:17:41 -0000       1.3.2.1.2.1
-@@ -19,6 +19,10 @@
- #include <linux/vt_kern.h>
- #include <linux/pc_keyb.h>
-+#ifdef CONFIG_MCL_COREDUMP
-+#include <linux/crash.h>
-+#endif
-+
- asmlinkage void sys_sync(void);       /* it's really int */
- int panic_timeout;
-@@ -197,20 +201,43 @@
-         unsigned long caller = (unsigned long) __builtin_return_address(0);
- #endif
-+#ifdef CONFIG_MCL_COREDUMP
-+      crash_save_regs();
-+#endif
-+
-       bust_spinlocks(1);
-       va_start(args, fmt);
-       vsprintf(buf, fmt, args);
-       va_end(args);
-       printk(KERN_EMERG "Kernel panic: %s\n",buf);
-+
-+#ifdef CONFIG_MCL_COREDUMP
-+      if (!panicmsg) {
-+              panicmsg = buf;
-+              panic_processor = smp_processor_id();
-+              mb();
-+      }
-+#endif
-+
-       if (netdump_func)
-               BUG();
-       if (in_interrupt())
-               printk(KERN_EMERG "In interrupt handler - not syncing\n");
-       else if (!current->pid)
-               printk(KERN_EMERG "In idle task - not syncing\n");
-+#ifdef CONFIG_MCL_COREDUMP
-+      else if (crash_perform_sync)
-+#else
-       else
-+#endif
-               sys_sync();
-+
-       bust_spinlocks(0);
-+
-+#ifdef CONFIG_MCL_COREDUMP
-+      smp_call_function((void *)smp_crash_funnel_cpu,0,0,0);
-+      crash_save_current_state(current);
-+#endif
- #ifdef CONFIG_SMP
-       smp_send_stop();
-Index: linux/kernel/sysctl.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/kernel/sysctl.c,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.1
-diff -u -r1.2.2.1 -r1.2.2.1.2.1
---- linux/kernel/sysctl.c      12 Mar 2003 19:51:36 -0000      1.2.2.1
-+++ linux/kernel/sysctl.c      1 Apr 2003 12:17:41 -0000       1.2.2.1.2.1
-@@ -37,6 +37,10 @@
- #include <linux/nfs_fs.h>
- #endif
-+#ifdef CONFIG_MCL_COREDUMP
-+#include <linux/crash.h>
-+#endif
-+
- #if defined(CONFIG_SYSCTL)
- /* External variables not in a header file. */
-@@ -247,6 +251,10 @@
-       {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int),
-        0644, NULL, &proc_dointvec},
- #endif         
-+#ifdef CONFIG_MCL_COREDUMP
-+      {KERN_PANIC_ON_OOPS, "panic_on_oops", &panic_on_oops, sizeof(int),
-+       0644, NULL, &proc_dointvec},
-+#endif
-       {KERN_CADPID, "cad_pid", &cad_pid, sizeof (int),
-        0600, NULL, &proc_dointvec},
-       {KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int),
-Index: linux/lib/Config.in
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/lib/Config.in,v
-retrieving revision 1.2
-retrieving revision 1.2.4.1
-diff -u -r1.2 -r1.2.4.1
---- linux/lib/Config.in        14 Feb 2003 22:59:23 -0000      1.2
-+++ linux/lib/Config.in        1 Apr 2003 12:17:41 -0000       1.2.4.1
-@@ -23,12 +23,14 @@
-   fi
- fi
--if [ "$CONFIG_PPP_DEFLATE" = "y" -o \
-+if [ "$CONFIG_MCL_COREDUMP" = "y" -o \
-+     "$CONFIG_PPP_DEFLATE" = "y" -o \
-      "$CONFIG_JFFS2_FS" = "y" ]; then
-    define_tristate CONFIG_ZLIB_DEFLATE y
- else
-   if [ "$CONFIG_PPP_DEFLATE" = "m" -o \
--       "$CONFIG_JFFS2_FS" = "m" ]; then
-+       "$CONFIG_JFFS2_FS" = "m" -o \
-+       "$CONFIG_MCL_COREDUMP" = "m" ]; then
-      define_tristate CONFIG_ZLIB_DEFLATE m
-   else
-      tristate 'zlib compression support' CONFIG_ZLIB_DEFLATE
-Index: linux/mm/memory.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/mm/memory.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/mm/memory.c  12 Mar 2003 19:51:37 -0000      1.3.2.1
-+++ linux/mm/memory.c  1 Apr 2003 12:17:41 -0000       1.3.2.1.2.1
-@@ -1381,6 +1381,10 @@
-       }
-       lock_page(page);
-+#ifdef CONFIG_MCL_COREDUMP
-+      set_bit(PG_anon, &page->flags);
-+#endif
-+
-       /*
-        * Back out if somebody else faulted in this pte while we
-        * released the page table lock.
-@@ -1470,6 +1474,9 @@
-               mm->rss++;
-               flush_page_to_ram(page);
-               entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
-+#ifdef CONFIG_MCL_COREDUMP
-+              set_bit(PG_anon, &page->flags);
-+#endif
-               lru_cache_add(page);
-       }
-Index: linux/mm/page_alloc.c
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/mm/page_alloc.c,v
-retrieving revision 1.3.2.1
-retrieving revision 1.3.2.1.2.1
-diff -u -r1.3.2.1 -r1.3.2.1.2.1
---- linux/mm/page_alloc.c      12 Mar 2003 19:51:37 -0000      1.3.2.1
-+++ linux/mm/page_alloc.c      1 Apr 2003 12:17:41 -0000       1.3.2.1.2.1
-@@ -95,6 +95,10 @@
-       struct page *base;
-       per_cpu_t *per_cpu;
-       zone_t *zone;
-+#ifdef CONFIG_MCL_COREDUMP
-+      struct page *pagemap;
-+      int count = 1<<order;
-+#endif
-       /*
-        * Yes, think what happens when other parts of the kernel take 
-@@ -163,6 +167,15 @@
-       spin_lock(&zone->lock);
-+#ifdef CONFIG_MCL_COREDUMP
-+      pagemap = page;
-+      do {
-+              pagemap->flags |= (1<<PG_free);
-+              pagemap->flags &= ~((1<<PG_anon)|(1<<PG_shm));
-+              pagemap++;
-+      } while(--count);
-+#endif
-+
-       zone->free_pages -= mask;
-       while (mask + (1 << (MAX_ORDER-1))) {
-@@ -268,6 +281,16 @@
-                       zone->free_pages -= 1UL << order;
-                       page = expand(zone, page, index, order, curr_order, area);
-+#ifdef CONFIG_MCL_COREDUMP
-+                      {
-+                              struct page *pagemap = page;
-+                              int             count = 1<<order;
-+                              do {
-+                                      pagemap->flags &= ~(1<<PG_free);
-+                                      pagemap++;
-+                              } while (--count);
-+                      }
-+#endif
-                       spin_unlock_irqrestore(&zone->lock, flags);
-                       set_page_count(page, 1);
-Index: linux/arch/i386//boot/compressed/head.S
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/boot/compressed/head.S,v
-retrieving revision 1.1.1.1
-retrieving revision 1.1.1.1.12.6
-diff -u -r1.1.1.1 -r1.1.1.1.12.6
---- linux/arch/i386//boot/compressed/head.S    7 May 2002 21:53:54 -0000       1.1.1.1
-+++ linux/arch/i386//boot/compressed/head.S    5 Apr 2003 05:51:27 -0000       1.1.1.1.12.6
-@@ -23,6 +23,7 @@
-  */
- .text
-+#include <linux/config.h>
- #include <linux/linkage.h>
- #include <asm/segment.h>
-@@ -31,6 +32,55 @@
- startup_32:
-       cld
-       cli
-+
-+#ifdef CONFIG_BOOTIMG
-+/*
-+ * GDT is invalid if we're booted by bootimg, so reload it now
-+ */
-+      lgdt    %cs:gdt_descr
-+      ljmp    $(__KERNEL_CS),$1f
-+
-+gdt_table_limit = gdt_table_end - gdt_table - 1
-+gdt_descr:
-+      .word   gdt_table_limit
-+      .long   gdt_table
-+
-+gdt_table: /* stolen from arch/i386/kernel/head.S */
-+      .quad 0x0000000000000000        /* NULL descriptor */
-+      .quad 0x0000000000000000        /* 0x0b reserved */
-+      .quad 0x0000000000000000        /* 0x13 reserved */
-+      .quad 0x0000000000000000        /* 0x1b reserved */
-+      .quad 0x00cffa000000ffff        /* 0x23 user 4GB code at 0x00000000 */
-+      .quad 0x00cff2000000ffff        /* 0x2b user 4GB data at 0x00000000 */
-+      .quad 0x0000000000000000        /* 0x33 TLS entry 1 */
-+      .quad 0x0000000000000000        /* 0x3b TLS entry 2 */
-+      .quad 0x0000000000000000        /* 0x43 TLS entry 3 */
-+      .quad 0x0000000000000000        /* 0x4b reserved */
-+      .quad 0x0000000000000000        /* 0x53 reserved */
-+      .quad 0x0000000000000000        /* 0x5b reserved */
-+
-+      .quad 0x00cf9a000000ffff        /* 0x60 kernel 4GB code at 0x00000000 */
-+      .quad 0x00cf92000000ffff        /* 0x68 kernel 4GB data at 0x00000000 */
-+      .quad 0x0000000000000000        /* 0x70 TSS descriptor */
-+      .quad 0x0000000000000000        /* 0x78 LDT descriptor */
-+
-+      /* Segments used for calling PnP BIOS */
-+      .quad 0x00c09a0000000000        /* 0x80 32-bit code */
-+      .quad 0x00809a0000000000        /* 0x88 16-bit code */
-+      .quad 0x0080920000000000        /* 0x90 16-bit data */
-+      .quad 0x0080920000000000        /* 0x98 16-bit data */
-+      .quad 0x0080920000000000        /* 0xa0 16-bit data */
-+      /*
-+       * The APM segments have byte granularity and their bases
-+       * and limits are set at run time.
-+       */
-+      .quad 0x00409a0000000000        /* 0xa8 APM CS    code */
-+      .quad 0x00009a0000000000        /* 0xb0 APM CS 16 code (16 bit) */
-+      .quad 0x0040920000000000        /* 0xb8 APM DS    data */
-+gdt_table_end:
-+
-+1:
-+#endif
-       movl $(__KERNEL_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-@@ -92,7 +142,6 @@
-       cld
-       rep
-       movsl
--
-       popl %esi       # discard the address
-       popl %ebx       # real mode pointer
-       popl %esi       # low_buffer_start
-@@ -124,5 +173,10 @@
-       movsl
-       movl %ebx,%esi  # Restore setup pointer
-       xorl %ebx,%ebx
-+#ifdef CONFIG_BOOTIMG
-+        movl $0x100000,%eax
-+        jmpl *%eax
-+#else
-       ljmp $(__KERNEL_CS), $0x100000
-+#endif
- move_routine_end:
-Index: linux/arch/i386//kernel/head.S
-===================================================================
-RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/head.S,v
-retrieving revision 1.2.2.1
-retrieving revision 1.2.2.1.2.5
-diff -u -r1.2.2.1 -r1.2.2.1.2.5
---- linux/arch/i386//kernel/head.S     12 Mar 2003 19:49:06 -0000      1.2.2.1
-+++ linux/arch/i386//kernel/head.S     5 Apr 2003 05:51:27 -0000       1.2.2.1.2.5
-@@ -42,6 +42,21 @@
-  * On entry, %esi points to the real-mode code as a 32-bit pointer.
-  */
- startup_32:
-+#ifdef CONFIG_BOOTIMG
-+/*
-+ * GDT is invalid if we're booted by bootimg, so reload it now
-+ */
-+      lgdt %cs:_gdt_descr-__PAGE_OFFSET
-+      ljmp $(__KERNEL_CS),$1f-__PAGE_OFFSET
-+
-+gdt_limit = SYMBOL_NAME(cpu_gdt_table_end) - SYMBOL_NAME(cpu_gdt_table) - 1
-+
-+_gdt_descr:
-+      .word gdt_limit
-+      .long SYMBOL_NAME(cpu_gdt_table)-__PAGE_OFFSET
-+
-+1:
-+#endif
- /*
-  * Set segments to known values
-  */
-@@ -452,6 +467,7 @@
-       .quad 0x00409a0000000000        /* 0xa8 APM CS    code */
-       .quad 0x00009a0000000000        /* 0xb0 APM CS 16 code (16 bit) */
-       .quad 0x0040920000000000        /* 0xb8 APM DS    data */
-+ENTRY(cpu_gdt_table_end)
- #if CONFIG_SMP
-       .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
diff --git a/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch b/lustre/kernel_patches/patches/mkdep-revert-rh-2.4.patch
deleted file mode 100644 (file)
index 5cc34b8..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-Index: linux-2.4.20-30.9/scripts/mkdep.c
-===================================================================
---- linux-2.4.20-30.9.orig/scripts/mkdep.c     2004-02-19 19:40:51.000000000 -0500
-+++ linux-2.4.20-30.9/scripts/mkdep.c  2004-04-28 17:24:54.000000000 -0400
-@@ -48,8 +48,6 @@
- char __depname[512] = "\n\t@touch ";
- #define depname (__depname+9)
- int hasdep;
--char cwd[PATH_MAX];
--int lcwd;
- struct path_struct {
-       int len;
-@@ -204,22 +202,8 @@
-               memcpy(path->buffer+path->len, name, len);
-               path->buffer[path->len+len] = '\0';
-               if (access(path->buffer, F_OK) == 0) {
--                      int l = lcwd + strlen(path->buffer);
--                      char name2[l+2], *p;
--                      if (path->buffer[0] == '/') {
--                              memcpy(name2, path->buffer, l+1);
--                      }
--                      else {
--                              memcpy(name2, cwd, lcwd);
--                              name2[lcwd] = '/';
--                              memcpy(name2+lcwd+1, path->buffer, path->len+len+1);
--                      }
--                      while ((p = strstr(name2, "/../"))) {
--                              *p = '\0';
--                              strcpy(strrchr(name2, '/'), p+3);
--                      }
-                       do_depname();
--                      printf(" \\\n   %s", name2);
-+                      printf(" \\\n   %s", path->buffer);
-                       return;
-               }
-       }
-@@ -601,12 +585,6 @@
-               return 1;
-       }
--      if (!getcwd(cwd, sizeof(cwd))) {
--              fprintf(stderr, "mkdep: getcwd() failed %m\n");
--              return 1;
--      }
--      lcwd = strlen(cwd);
--
-       add_path(".");          /* for #include "..." */
-       while (++argv, --argc > 0) {
index 9c66a2a..4707842 100644 (file)
@@ -1,8 +1,49 @@
-Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/nfs/dir.c     2005-02-25 13:43:42.454529040 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/nfs/dir.c  2005-02-25 13:46:04.832884240 +0200
-@@ -791,7 +791,7 @@
+diff -urp a/fs/cifs/dir.c b/fs/cifs/dir.c
+--- a/fs/cifs/dir.c    2006-03-10 18:50:15.000000000 -0800
++++ b/fs/cifs/dir.c    2006-03-10 18:50:44.000000000 -0800
+@@ -146,23 +146,23 @@ cifs_create(struct inode *inode, struct 
+       }
+       if(nd) {
+-              if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
++              if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
+                       desiredAccess = GENERIC_READ;
+-              else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) {
++              else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) {
+                       desiredAccess = GENERIC_WRITE;
+                       write_only = TRUE;
+-              } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
++              } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
+                       /* GENERIC_ALL is too much permission to request */
+                       /* can cause unnecessary access denied on create */
+                       /* desiredAccess = GENERIC_ALL; */
+                       desiredAccess = GENERIC_READ | GENERIC_WRITE;
+               }
+-              if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++              if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+                       disposition = FILE_CREATE;
+-              else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
++              else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+                       disposition = FILE_OVERWRITE_IF;
+-              else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
++              else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
+                       disposition = FILE_OPEN_IF;
+               else {
+                       cFYI(1,("Create flag not set in create function"));
+diff -urp a/fs/nfs/dir.c b/fs/nfs/dir.c
+--- a/fs/nfs/dir.c     2006-03-10 19:07:50.000000000 -0800
++++ b/fs/nfs/dir.c     2006-03-10 17:27:15.000000000 -0800
+@@ -752,7 +752,7 @@ int nfs_is_exclusive_create(struct inode
+               return 0;
+       if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+               return 0;
+-      return (nd->intent.open.flags & O_EXCL) != 0;
++      return (nd->intent.it_flags & O_EXCL) != 0;
+ }
+ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+@@ -827,7 +827,7 @@ static int is_atomic_open(struct inode *
        if (nd->flags & LOOKUP_DIRECTORY)
                return 0;
        /* Are we trying to write to a read only partition? */
@@ -11,7 +52,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
                return 0;
        return 1;
  }
-@@ -812,7 +812,7 @@
+@@ -848,7 +848,7 @@ static struct dentry *nfs_atomic_lookup(
        dentry->d_op = NFS_PROTO(dir)->dentry_ops;
  
        /* Let vfs_create() deal with O_EXCL */
@@ -20,16 +61,16 @@ Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
                goto no_entry;
  
        /* Open the file on the server */
-@@ -820,7 +820,7 @@
-       /* Revalidate parent directory attribute cache */
-       nfs_revalidate_inode(NFS_SERVER(dir), dir);
+@@ -860,7 +860,7 @@ static struct dentry *nfs_atomic_lookup(
+               goto out;
+       }
  
 -      if (nd->intent.open.flags & O_CREAT) {
 +      if (nd->intent.it_flags & O_CREAT) {
                nfs_begin_data_update(dir);
                inode = nfs4_atomic_open(dir, dentry, nd);
                nfs_end_data_update(dir);
-@@ -836,7 +836,7 @@
+@@ -876,7 +876,7 @@ static struct dentry *nfs_atomic_lookup(
                                break;
                        /* This turned out not to be a regular file */
                        case -ELOOP:
@@ -38,7 +79,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
                                        goto no_open;
                        /* case -EISDIR: */
                        /* case -EINVAL: */
-@@ -875,7 +875,7 @@
+@@ -915,7 +915,7 @@ static int nfs_open_revalidate(struct de
        /* NFS only supports OPEN on regular files */
        if (!S_ISREG(inode->i_mode))
                goto no_open;
@@ -47,11 +88,19 @@ Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
        /* We cannot do exclusive creation on a positive dentry */
        if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
                goto no_open;
-Index: linux-2.6.9-5.0.3.EL/fs/nfs/nfs4proc.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/nfs/nfs4proc.c        2004-10-19 00:54:40.000000000 +0300
-+++ linux-2.6.9-5.0.3.EL/fs/nfs/nfs4proc.c     2005-02-25 13:44:27.537675360 +0200
-@@ -775,17 +775,17 @@
+@@ -1080,7 +1080,7 @@ static int nfs_create(struct inode *dir,
+       attr.ia_valid = ATTR_MODE;
+       if (nd && (nd->flags & LOOKUP_CREATE))
+-              open_flags = nd->intent.open.flags;
++              open_flags = nd->intent.it_flags;
+       /*
+        * The 0 argument passed into the create function should one day
+diff -urp a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
+--- a/fs/nfs/nfs4proc.c        2006-03-10 17:19:45.000000000 -0800
++++ b/fs/nfs/nfs4proc.c        2006-03-10 17:19:58.000000000 -0800
+@@ -776,17 +776,17 @@ nfs4_atomic_open(struct inode *dir, stru
        struct nfs4_state *state;
  
        if (nd->flags & LOOKUP_CREATE) {
@@ -72,46 +121,3 @@ Index: linux-2.6.9-5.0.3.EL/fs/nfs/nfs4proc.c
        put_rpccred(cred);
        if (IS_ERR(state))
                return (struct inode *)state;
-Index: linux-2.6.9-5.0.3.EL/fs/cifs/dir.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/cifs/dir.c    2004-10-19 00:54:37.000000000 +0300
-+++ linux-2.6.9-5.0.3.EL/fs/cifs/dir.c 2005-02-25 13:44:27.539675056 +0200
-@@ -199,23 +199,23 @@
-       }
-       if(nd) {
--              if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
-+              if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
-                       desiredAccess = GENERIC_READ;
--              else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) {
-+              else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) {
-                       desiredAccess = GENERIC_WRITE;
-                       write_only = TRUE;
--              } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
-+              } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
-                       /* GENERIC_ALL is too much permission to request */
-                       /* can cause unnecessary access denied on create */
-                       /* desiredAccess = GENERIC_ALL; */
-                       desiredAccess = GENERIC_READ | GENERIC_WRITE;
-               }
--              if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-+              if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-                       disposition = FILE_CREATE;
--              else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
-+              else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
-                       disposition = FILE_OVERWRITE_IF;
--              else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
-+              else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
-                       disposition = FILE_OPEN_IF;
-               else {
-                       cFYI(1,("Create flag not set in create function"));
-@@ -400,7 +400,7 @@
-             parent_dir_inode, direntry->d_name.name, direntry));
-       if(nd) {  /* BB removeme */
--              cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags));
-+              cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags));
-       } /* BB removeme BB */
-       /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
diff --git a/lustre/kernel_patches/patches/nfs_export_kernel-2.4.19-bgl.patch b/lustre/kernel_patches/patches/nfs_export_kernel-2.4.19-bgl.patch
deleted file mode 100644 (file)
index aa6276f..0000000
+++ /dev/null
@@ -1,742 +0,0 @@
- fs/Makefile        |    3 
- fs/file_table.c    |   11 ++
- fs/inode.c         |   23 ++++-
- fs/namei.c         |   12 ++
- fs/nfsd/export.c   |    5 +
- fs/nfsd/nfsfh.c    |   65 +++++++++++++-
- fs/nfsd/vfs.c      |  240 ++++++++++++++++++++++++++++++++++++++++++++++++-----
- include/linux/fs.h |   10 ++
- kernel/ksyms.c     |    2 
- 9 files changed, 337 insertions(+), 34 deletions(-)
-
-Index: linux-bgl/fs/nfsd/vfs.c
-===================================================================
---- linux-bgl.orig/fs/nfsd/vfs.c       2003-07-02 08:44:33.000000000 -0700
-+++ linux-bgl/fs/nfsd/vfs.c    2004-12-28 17:13:59.940919832 -0800
-@@ -77,6 +77,129 @@
- static struct raparms *               raparml;
- static struct raparms *               raparm_cache;
-+static int link_raw(struct dentry *dold, struct dentry *ddir,
-+                    struct dentry *dnew)
-+{
-+      int err;
-+
-+      struct nameidata old_nd = { .dentry = dold };
-+      struct nameidata nd = { .dentry = ddir, .last = dnew->d_name };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->link_raw(&old_nd, &nd);
-+      igrab(dold->d_inode);
-+      d_instantiate(dnew, dold->d_inode);
-+      if(dold->d_inode->i_op && dold->d_inode->i_op->revalidate_it)
-+              dold->d_inode->i_op->revalidate_it(dnew, NULL);
-+
-+      return err;
-+}
-+
-+static int unlink_raw(struct dentry *dentry, char *fname, int flen,
-+                      struct dentry *rdentry)
-+{
-+      int err;
-+        struct qstr last = { .name = fname, .len = flen };
-+      struct nameidata nd = { .dentry = dentry, .last = last };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->unlink_raw(&nd);
-+      if (!err)
-+              d_delete(rdentry);
-+
-+      return err;
-+}
-+
-+static int rmdir_raw(struct dentry *dentry, char *fname, int flen,
-+                     struct dentry *rdentry)
-+{
-+      int err;
-+        struct qstr last = { .name = fname, .len = flen };
-+      struct nameidata nd = { .dentry = dentry, .last = last };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->rmdir_raw(&nd);
-+      if(!err) {
-+              rdentry->d_inode->i_flags |= S_DEAD;
-+              d_delete(rdentry);
-+      }
-+
-+      return err;
-+}
-+
-+static int symlink_raw(struct dentry *dentry,  char *fname, int flen,
-+                       char *path)
-+{
-+      int err;
-+        struct qstr last = { .name = fname, .len = flen };
-+      struct nameidata nd = { .dentry = dentry, .last = last };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->symlink_raw(&nd, path);
-+
-+      return err;
-+}
-+
-+static int mkdir_raw(struct dentry *dentry, char *fname, int flen, int mode)
-+{
-+      int err;
-+        struct qstr last = { .name = fname, .len = flen };
-+      struct nameidata nd = { .dentry = dentry, .last = last };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->mkdir_raw(&nd, mode);
-+
-+      return err;
-+}
-+
-+static int mknod_raw(struct dentry *dentry, char *fname, int flen, int mode,
-+                     dev_t dev)
-+{
-+      int err;
-+        struct qstr last = { .name = fname, .len = flen };
-+      struct nameidata nd = { .dentry = dentry, .last = last };
-+      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+      err = op->mknod_raw(&nd, mode, dev);
-+
-+      return err;
-+}     
-+
-+static int rename_raw(struct dentry *fdentry, struct dentry *tdentry,
-+                      struct dentry *odentry, struct dentry *ndentry)
-+{
-+      int err;
-+
-+      struct nameidata old_nd = { .dentry = fdentry, .last = odentry->d_name};
-+      struct nameidata new_nd = { .dentry = tdentry, .last = ndentry->d_name};
-+      struct inode_operations *op = old_nd.dentry->d_inode->i_op;
-+      err = op->rename_raw(&old_nd, &new_nd);
-+      d_move(odentry, ndentry);
-+
-+      return err;
-+}
-+
-+static int setattr_raw(struct inode *inode, struct iattr *iap)
-+{
-+      int err;
-+
-+      iap->ia_valid |= ATTR_RAW;
-+      err = inode->i_op->setattr_raw(inode, iap);
-+
-+      return err;
-+}
-+
-+int revalidate_it(struct dentry *dentry, struct lookup_intent *it)
-+{
-+      int err = 0;
-+
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
-+              if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
-+                      !d_invalidate(dentry)) {
-+                      dput(dentry);
-+                      err = -EINVAL;
-+                      dentry = NULL;
-+                      return err;
-+              }
-+      }
-+
-+      return err;
-+}
-+
- /*
-  * Look up one component of a pathname.
-  * N.B. After this call _both_ fhp and resfh need an fh_put
-@@ -304,7 +426,10 @@
-       }
-       err = nfserr_notsync;
-       if (!check_guard || guardtime == inode->i_ctime) {
--              err = notify_change(dentry, iap);
-+              if ( dentry->d_inode->i_op && dentry->d_inode->i_op->setattr_raw)
-+                      err = setattr_raw(dentry->d_inode, iap);
-+              else
-+                      err = notify_change(dentry, iap);
-               err = nfserrno(err);
-       }
-       if (size_change) {
-@@ -431,6 +556,7 @@
- {
-       struct dentry   *dentry;
-       struct inode    *inode;
-+      struct lookup_intent it;
-       int             err;
-       /* If we get here, then the client has already done an "open", and (hopefully)
-@@ -477,6 +603,14 @@
-               filp->f_mode  = FMODE_READ;
-       }
-+      intent_init(&it, IT_OPEN, (filp->f_flags & ~O_ACCMODE) | filp->f_mode);
-+
-+      err = revalidate_it(dentry, &it);
-+      if (err)
-+              goto out_nfserr;
-+      
-+      filp->f_it = &it;
-+      
-       err = 0;
-       if (filp->f_op && filp->f_op->open) {
-               err = filp->f_op->open(inode, filp);
-@@ -491,7 +625,11 @@
-                       atomic_dec(&filp->f_count);
-               }
-       }
-+
- out_nfserr:
-+      if (it.it_op_release)
-+              intent_release(&it);
-+
-       if (err)
-               err = nfserrno(err);
- out:
-@@ -822,7 +960,7 @@
- {
-       struct dentry   *dentry, *dchild;
-       struct inode    *dirp;
--      int             err;
-+      int             err, error = -EOPNOTSUPP;
-       err = nfserr_perm;
-       if (!flen)
-@@ -838,20 +976,44 @@
-       dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
-+      switch (type) {
-+                      case S_IFDIR:
-+                              if (dirp->i_op->mkdir_raw)
-+                          error = mkdir_raw(dentry, fname, flen, iap->ia_mode);
-+                              break;
-+                      case S_IFCHR:
-+                      case S_IFBLK:
-+                      case S_IFIFO:
-+                      case S_IFSOCK:
-+                      case S_IFREG:
-+                          if (dirp->i_op->mknod_raw) {
-+                                      if (type == S_IFREG)
-+                                              rdev = 0;
-+                                      error = mknod_raw(dentry, fname, flen, iap->ia_mode, rdev);
-+                              }
-+                              break;
-+                              default:
-+                      printk("nfsd: bad file type %o in nfsd_create\n", type);
-+      }
-+
-       err = nfserr_notdir;
--      if(!dirp->i_op || !dirp->i_op->lookup)
-+      if(!dirp->i_op || !(dirp->i_op->lookup || dirp->i_op->lookup_it))
-               goto out;
-       /*
-        * Check whether the response file handle has been verified yet.
-        * If it has, the parent directory should already be locked.
-        */
--      if (!resfhp->fh_dentry) {
--              /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
--              fh_lock(fhp);
-+      if (!resfhp->fh_dentry || dirp->i_op->lookup_it) {
-+              /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create
-+                 and nfsd_proc_create in case of lustre
-+              */
-+              if (!resfhp->fh_dentry)
-+                      fh_lock(fhp);
-               dchild = lookup_one_len(fname, dentry, flen);
-               err = PTR_ERR(dchild);
-               if (IS_ERR(dchild))
-                       goto out_nfserr;
-+              resfhp->fh_dentry = NULL;
-               err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
-               if (err)
-                       goto out;
-@@ -872,10 +1034,12 @@
-        * Make sure the child dentry is still negative ...
-        */
-       err = nfserr_exist;
--      if (dchild->d_inode) {
--              dprintk("nfsd_create: dentry %s/%s not negative!\n",
--                      dentry->d_name.name, dchild->d_name.name);
--              goto out; 
-+      if ( error == -EOPNOTSUPP) {
-+              if (dchild->d_inode) {
-+                      dprintk("nfsd_create: dentry %s/%s not negative!\n",
-+                              dentry->d_name.name, dchild->d_name.name);
-+                      goto out; 
-+              }
-       }
-       if (!(iap->ia_valid & ATTR_MODE))
-@@ -888,16 +1052,19 @@
-       err = nfserr_perm;
-       switch (type) {
-       case S_IFREG:
--              err = vfs_create(dirp, dchild, iap->ia_mode);
-+              if (error == -EOPNOTSUPP)
-+                      err = vfs_create(dirp, dchild, iap->ia_mode);
-               break;
-       case S_IFDIR:
--              err = vfs_mkdir(dirp, dchild, iap->ia_mode);
-+              if (error == -EOPNOTSUPP)
-+                      err = vfs_mkdir(dirp, dchild, iap->ia_mode);
-               break;
-       case S_IFCHR:
-       case S_IFBLK:
-       case S_IFIFO:
-       case S_IFSOCK:
--              err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
-+              if (error == -EOPNOTSUPP)       
-+                      err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
-               break;
-       default:
-               printk("nfsd: bad file type %o in nfsd_create\n", type);
-@@ -966,7 +1133,13 @@
-       /* Get all the sanity checks out of the way before
-        * we lock the parent. */
-       err = nfserr_notdir;
--      if(!dirp->i_op || !dirp->i_op->lookup)
-+      if (dirp->i_op->mknod_raw) {
-+              err = mknod_raw(dentry, fname, flen, iap->ia_mode, 0);
-+              if (err && err != -EOPNOTSUPP)
-+                      goto out;
-+      }
-+
-+      if(!dirp->i_op ||  !(dirp->i_op->lookup || dirp->i_op->lookup_it))
-               goto out;
-       fh_lock(fhp);
-@@ -1017,6 +1190,8 @@
-               case NFS3_CREATE_GUARDED:
-                       err = nfserr_exist;
-               }
-+              if(dirp->i_op->mknod_raw)
-+                      err = 0;
-               goto out;
-       }
-@@ -1123,7 +1298,7 @@
-                               struct iattr *iap)
- {
-       struct dentry   *dentry, *dnew;
--      int             err, cerr;
-+      int             err, cerr, error = -EOPNOTSUPP;
-       err = nfserr_noent;
-       if (!flen || !plen)
-@@ -1137,12 +1312,18 @@
-               goto out;
-       fh_lock(fhp);
-       dentry = fhp->fh_dentry;
-+      
-+      if (dentry->d_inode->i_op->symlink_raw)
-+              error = symlink_raw(dentry, fname, flen, path);
-+
-       dnew = lookup_one_len(fname, dentry, flen);
-       err = PTR_ERR(dnew);
-       if (IS_ERR(dnew))
-               goto out_nfserr;
--      err = vfs_symlink(dentry->d_inode, dnew, path);
-+      err = error;
-+      if (err == -EOPNOTSUPP || !dentry->d_inode->i_op->symlink_raw)
-+              err = vfs_symlink(dentry->d_inode, dnew, path);
-       if (!err) {
-               if (EX_ISSYNC(fhp->fh_export))
-                       nfsd_sync_dir(dentry);
-@@ -1152,7 +1333,10 @@
-                               iap->ia_valid |= ATTR_CTIME;
-                               iap->ia_mode = (iap->ia_mode&S_IALLUGO)
-                                       | S_IFLNK;
--                              err = notify_change(dnew, iap);
-+                              if (dnew->d_inode->i_op && dnew->d_inode->i_op->setattr_raw)
-+                                      err = setattr_raw(dnew->d_inode, iap);
-+                              else
-+                                      err = notify_change(dnew, iap);
-                               if (!err && EX_ISSYNC(fhp->fh_export))
-                                       write_inode_now(dentry->d_inode, 1);
-                      }
-@@ -1210,7 +1394,10 @@
-       dold = tfhp->fh_dentry;
-       dest = dold->d_inode;
--      err = vfs_link(dold, dirp, dnew);
-+      if (dirp->i_op->link_raw)
-+              err = link_raw(dold, ddir, dnew);
-+      else
-+              err = vfs_link(dold, dirp, dnew);
-       if (!err) {
-               if (EX_ISSYNC(ffhp->fh_export)) {
-                       nfsd_sync_dir(ddir);
-@@ -1295,7 +1482,10 @@
-                       err = nfserr_perm;
-       } else
- #endif
--      err = vfs_rename(fdir, odentry, tdir, ndentry);
-+      if(fdir->i_op->rename_raw)
-+              err = rename_raw(fdentry, tdentry, odentry, ndentry);
-+      else
-+              err = vfs_rename(fdir, odentry, tdir, ndentry);
-       if (!err && EX_ISSYNC(tfhp->fh_export)) {
-               nfsd_sync_dir(tdentry);
-               nfsd_sync_dir(fdentry);
-@@ -1316,7 +1506,7 @@
-       fill_post_wcc(tfhp);
-       double_up(&tdir->i_sem, &fdir->i_sem);
-       ffhp->fh_locked = tfhp->fh_locked = 0;
--      
-+
- out:
-       return err;
- }
-@@ -1362,9 +1552,15 @@
-                       err = nfserr_perm;
-               } else
- #endif
--              err = vfs_unlink(dirp, rdentry);
-+              if (dirp->i_op->unlink_raw)
-+                      err = unlink_raw(dentry, fname, flen, rdentry);
-+              else
-+                      err = vfs_unlink(dirp, rdentry);
-       } else { /* It's RMDIR */
--              err = vfs_rmdir(dirp, rdentry);
-+              if (dirp->i_op->rmdir_raw)
-+                      err = rmdir_raw(dentry, fname, flen, rdentry);
-+              else
-+                      err = vfs_rmdir(dirp, rdentry);
-       }
-       dput(rdentry);
-Index: linux-bgl/fs/nfsd/nfsfh.c
-===================================================================
---- linux-bgl.orig/fs/nfsd/nfsfh.c     2003-07-02 08:44:08.000000000 -0700
-+++ linux-bgl/fs/nfsd/nfsfh.c  2004-12-28 17:13:59.942919514 -0800
-@@ -36,6 +36,15 @@
-       int sequence;           /* sequence counter */
- };
-+static struct dentry *lookup_it(struct inode *inode, struct dentry * dentry)
-+{
-+      if (inode->i_op->lookup_it)
-+          return inode->i_op->lookup_it(inode, dentry, NULL, 0);
-+      else
-+          return inode->i_op->lookup(inode, dentry);
-+              
-+}
-+
- /*
-  * A rather strange filldir function to capture
-  * the name matching the specified inode number.
-@@ -75,6 +84,8 @@
-       int error;
-       struct file file;
-       struct nfsd_getdents_callback buffer;
-+      struct lookup_intent it;
-+      struct file *filp = NULL;
-       error = -ENOTDIR;
-       if (!dir || !S_ISDIR(dir->i_mode))
-@@ -85,9 +96,37 @@
-       /*
-        * Open the directory ...
-        */
--      error = init_private_file(&file, dentry, FMODE_READ);
--      if (error)
-+      if (dentry->d_op && dentry->d_op->d_revalidate_it) {
-+              if ((dentry->d_flags & DCACHE_NFSD_DISCONNECTED) &&
-+                  (dentry->d_parent == dentry) ) {
-+                      it.it_op_release = NULL;
-+                      /* 
-+                       * XXX Temporary Hack: Simulating init_private_file without
-+                       * f_op->open for disconnected dentry Since we don't have actual
-+                       * dentry->d_name to revalidate in revalidate_it()
-+                       */
-+                      filp = &file;
-+                      memset(filp, 0, sizeof(*filp));
-+                      filp->f_mode   = FMODE_READ;
-+                      atomic_set(&filp->f_count, 1);
-+                      filp->f_dentry = dentry;
-+                      filp->f_uid = current->fsuid;
-+                      filp->f_gid = current->fsgid;
-+                      filp->f_op = dentry->d_inode->i_fop;
-+                      error = 0;
-+              } else {
-+                      intent_init(&it, IT_OPEN, 0);
-+                      error = revalidate_it(dentry, &it);
-+                      if (error)
-+                              goto out;
-+                      error = init_private_file_it(&file, dentry, FMODE_READ, &it);
-+              }
-+      } else {
-+              error = init_private_file_it(&file, dentry, FMODE_READ, NULL);
-+      }
-+      if (error) 
-               goto out;
-+
-       error = -EINVAL;
-       if (!file.f_op->readdir)
-               goto out_close;
-@@ -113,9 +152,13 @@
-       }
- out_close:
--      if (file.f_op->release)
-+      if (file.f_op->release && !filp)
-               file.f_op->release(dir, &file);
- out:
-+      if (dentry->d_op &&
-+          dentry->d_op->d_revalidate_it &&
-+          it.it_op_release && !filp)
-+              intent_release(&it);
-       return error;
- }
-@@ -273,7 +316,7 @@
-       /* I'm going to assume that if the returned dentry is different, then
-        * it is well connected.  But nobody returns different dentrys do they?
-        */
--      pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry);
-+      pdentry = lookup_it(child->d_inode, tdentry);
-       d_drop(tdentry); /* we never want ".." hashed */
-       if (!pdentry && tdentry->d_inode == NULL) {
-               /* File system cannot find ".." ... sad but possible */
-@@ -304,6 +347,8 @@
-                               igrab(tdentry->d_inode);
-                               pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
-                       }
-+                      if (child->d_op && child->d_op->d_revalidate_it)
-+                              pdentry->d_op = child->d_op;
-               }
-               if (pdentry == NULL)
-                       pdentry = ERR_PTR(-ENOMEM);
-@@ -461,6 +506,8 @@
-               struct dentry *pdentry;
-               struct inode *parent;
-+              if (result->d_op && result->d_op->d_revalidate_it)
-+                      dentry->d_op = result->d_op;
-               pdentry = nfsd_findparent(dentry);
-               err = PTR_ERR(pdentry);
-               if (IS_ERR(pdentry))
-@@ -648,6 +695,11 @@
-       inode = dentry->d_inode;
-+      /* cache coherency for non-device filesystems */
-+      if (inode->i_op && inode->i_op->revalidate_it) {
-+          inode->i_op->revalidate_it(dentry, NULL);
-+      }
-+
-       /* Type check. The correct error return for type mismatches
-        * does not seem to be generally agreed upon. SunOS seems to
-        * use EISDIR if file isn't S_IFREG; a comment in the NFSv3
-@@ -878,8 +930,9 @@
-               dentry->d_parent->d_name.name, dentry->d_name.name);
-       goto out;
- out_uptodate:
--      printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
--              dentry->d_parent->d_name.name, dentry->d_name.name);
-+      if(!dentry->d_parent->d_inode->i_op->mkdir_raw)
-+              printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
-+                      dentry->d_parent->d_name.name, dentry->d_name.name);
-       goto out;
- }
-Index: linux-bgl/fs/Makefile
-===================================================================
---- linux-bgl.orig/fs/Makefile 2004-12-28 17:13:56.898868625 -0800
-+++ linux-bgl/fs/Makefile      2004-12-28 17:13:59.943919356 -0800
-@@ -7,7 +7,8 @@
- O_TARGET := fs.o
--export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
-+export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o namei.o \
-+              file_table.o
- mod-subdirs :=        nls
- obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
-Index: linux-bgl/fs/namei.c
-===================================================================
---- linux-bgl.orig/fs/namei.c  2004-12-28 17:13:56.265835195 -0800
-+++ linux-bgl/fs/namei.c       2004-12-28 17:13:59.947918720 -0800
-@@ -22,6 +22,7 @@
- #include <linux/dnotify.h>
- #include <linux/smp_lock.h>
- #include <linux/personality.h>
-+#include <linux/module.h>
- #include <asm/namei.h>
- #include <asm/uaccess.h>
-@@ -100,6 +101,7 @@
-               it->it_op_release(it);
- }
-+EXPORT_SYMBOL(intent_release);
- /* In order to reduce some races, while at the same time doing additional
-  * checking and hopefully speeding things up, we copy filenames to the
-@@ -889,7 +891,8 @@
- /* SMP-safe */
--struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
-+struct dentry * lookup_one_len_it(const char * name, struct dentry * base,
-+                                  int len, struct lookup_intent *it)
- {
-       unsigned long hash;
-       struct qstr this;
-@@ -909,11 +912,16 @@
-       }
-       this.hash = end_name_hash(hash);
--      return lookup_hash_it(&this, base, NULL);
-+      return lookup_hash_it(&this, base, it);
- access:
-       return ERR_PTR(-EACCES);
- }
-+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
-+{
-+      return lookup_one_len_it(name, base, len, NULL);
-+}
-+
- /*
-  *    namei()
-  *
-Index: linux-bgl/fs/file_table.c
-===================================================================
---- linux-bgl.orig/fs/file_table.c     2003-07-02 08:44:42.000000000 -0700
-+++ linux-bgl/fs/file_table.c  2004-12-28 17:13:59.948918562 -0800
-@@ -82,7 +82,8 @@
-  * and call the open function (if any).  The caller must verify that
-  * inode->i_fop is not NULL.
-  */
--int init_private_file(struct file *filp, struct dentry *dentry, int mode)
-+int init_private_file_it(struct file *filp, struct dentry *dentry, int mode,
-+                         struct lookup_intent *it)
- {
-       memset(filp, 0, sizeof(*filp));
-       filp->f_mode   = mode;
-@@ -90,12 +91,20 @@
-       filp->f_dentry = dentry;
-       filp->f_uid    = current->fsuid;
-       filp->f_gid    = current->fsgid;
-+      if (it)
-+              filp->f_it = it;
-       filp->f_op     = dentry->d_inode->i_fop;
-       if (filp->f_op->open)
-               return filp->f_op->open(dentry->d_inode, filp);
-       else
-               return 0;
- }
-+EXPORT_SYMBOL(init_private_file_it);
-+
-+int init_private_file(struct file *filp, struct dentry *dentry, int mode)
-+{
-+      return init_private_file_it(filp, dentry, mode, NULL);
-+}
- void fput(struct file * file)
- {
-Index: linux-bgl/fs/inode.c
-===================================================================
---- linux-bgl.orig/fs/inode.c  2004-12-28 17:13:56.635910389 -0800
-+++ linux-bgl/fs/inode.c       2004-12-28 17:13:59.950918244 -0800
-@@ -971,9 +971,10 @@
- }
--struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
-+static inline struct inode *ifind(struct super_block *sb, unsigned long ino,
-+                                  struct list_head *head,
-+                                  find_inode_t find_actor, void *opaque)
- {
--      struct list_head * head = inode_hashtable + hash(sb,ino);
-       struct inode * inode;
-       spin_lock(&inode_lock);
-@@ -986,6 +987,24 @@
-       }
-       spin_unlock(&inode_lock);
-+      return NULL;
-+}
-+
-+struct inode *ilookup4(struct super_block *sb, unsigned long ino,
-+                       find_inode_t find_actor, void *opaque)
-+{
-+      struct list_head * head = inode_hashtable + hash(sb,ino);
-+      return ifind(sb, ino, head, find_actor, opaque);
-+}
-+
-+struct inode *iget4(struct super_block *sb, unsigned long ino,
-+                    find_inode_t find_actor, void *opaque)
-+{
-+      struct list_head * head = inode_hashtable + hash(sb,ino);
-+      struct inode *inode = ifind(sb, ino, head, find_actor, opaque);
-+      if (inode)
-+              return inode;
-+
-       /*
-        * get_new_inode() will do the right thing, re-trying the search
-        * in case it had to block at any point.
-Index: linux-bgl/kernel/ksyms.c
-===================================================================
---- linux-bgl.orig/kernel/ksyms.c      2004-12-28 17:13:56.978855920 -0800
-+++ linux-bgl/kernel/ksyms.c   2004-12-28 17:13:59.951918085 -0800
-@@ -142,6 +142,7 @@
- EXPORT_SYMBOL(igrab);
- EXPORT_SYMBOL(iunique);
- EXPORT_SYMBOL(iget4);
-+EXPORT_SYMBOL(ilookup4);
- EXPORT_SYMBOL(iput);
- EXPORT_SYMBOL(force_delete);
- EXPORT_SYMBOL(follow_up);
-@@ -152,6 +153,7 @@
- EXPORT_SYMBOL(path_release);
- EXPORT_SYMBOL(__user_walk);
- EXPORT_SYMBOL(lookup_one_len);
-+EXPORT_SYMBOL(lookup_one_len_it);
- EXPORT_SYMBOL(lookup_hash);
- EXPORT_SYMBOL(sys_close);
- EXPORT_SYMBOL(dcache_lock);
-Index: linux-bgl/include/linux/fs.h
-===================================================================
---- linux-bgl.orig/include/linux/fs.h  2004-12-28 17:13:59.471860200 -0800
-+++ linux-bgl/include/linux/fs.h       2004-12-28 17:13:59.955917450 -0800
-@@ -93,6 +93,9 @@
- #define FS_SINGLE     8 /* Filesystem that can have only one superblock */
- #define FS_NOMOUNT    16 /* Never mount from userland */
- #define FS_LITTER     32 /* Keeps the tree in dcache */
-+#define FS_NFSEXP_FSID 64 /* Use file system specific fsid for
-+                         * exporting non device filesystems.
-+                         */
- #define FS_ODD_RENAME 32768   /* Temporary stuff; will go away as soon
-                                 * as nfs_rename() will be cleaned up
-                                 */
-@@ -1149,6 +1152,9 @@
-                        struct nameidata *nd, struct lookup_intent *it);
- extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-                           int flags, struct lookup_intent *it);
-+extern int revalidate_it(struct dentry *dentry, struct lookup_intent *it);
-+extern int init_private_file_it(struct file *, struct dentry *dentry, int mode,
-+                                struct lookup_intent *it);
- extern int filp_close(struct file *, fl_owner_t id);
- extern char * getname(const char *);
-@@ -1418,6 +1424,8 @@
- extern int follow_down(struct vfsmount **, struct dentry **);
- extern int follow_up(struct vfsmount **, struct dentry **);
- extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
-+extern struct dentry * lookup_one_len_it(const char *, struct dentry *, int,
-+                                         struct lookup_intent *);
- extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
- #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
- #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
-@@ -1431,6 +1439,8 @@
- typedef int (*find_inode_t)(struct inode *, unsigned long, void *);
- extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *);
-+extern struct inode * ilookup4(struct super_block *, unsigned long,
-+                               find_inode_t, void *);
- static inline struct inode *iget(struct super_block *sb, unsigned long ino)
- {
-       return iget4(sb, ino, NULL, NULL);
index 563cee2..6f53041 100644 (file)
@@ -93660,14 +93660,14 @@ Index: linux-2.4.21/mm/mmap.c
  #include <linux/personality.h>
  #include <linux/compiler.h>
  #include <linux/profile.h>
-@@ -1459,6 +1460,7 @@
-       mm->total_vm = 0;
-       mm->locked_vm = 0;
+@@ -1450,6 +1451,7 @@
+       release_segments(mm);
+  
+       spin_lock(&mm->page_table_lock);
 +      coproc_release(mm);
-       flush_cache_mm(mm);
-       while (mpnt) {
-               struct vm_area_struct * next = mpnt->vm_next;
+       mpnt = mm->mmap;
+       mm->mmap = mm->mmap_cache = NULL;
+       mm->mm_rb = RB_ROOT;
 Index: linux-2.4.21/mm/mprotect.c
 ===================================================================
 --- linux-2.4.21.orig/mm/mprotect.c    2005-06-01 22:51:50.000000000 -0400
diff --git a/lustre/kernel_patches/patches/removepage-2.4.19-suse.patch b/lustre/kernel_patches/patches/removepage-2.4.19-suse.patch
deleted file mode 100644 (file)
index 4602f96..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
- include/linux/fs.h |    1 +
- mm/filemap.c       |    3 +++
- 2 files changed, 4 insertions(+)
-
-Index: linux-2.4.19.SuSE/include/linux/fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/fs.h  Sun Nov 16 00:40:59 2003
-+++ linux-2.4.19.SuSE/include/linux/fs.h       Sun Nov 16 01:38:06 2003
-@@ -428,6 +428,7 @@
-       int (*releasepage) (struct page *, int);
- #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */
-       int (*direct_IO)(int, struct file *, struct kiobuf *, unsigned long, int);
-+      void (*removepage)(struct page *); /* called when page gets removed from the inode */
- };
- struct address_space {
-Index: linux-2.4.19.SuSE/mm/filemap.c
-===================================================================
---- linux-2.4.19.SuSE.orig/mm/filemap.c        Sat Nov 15 18:02:15 2003
-+++ linux-2.4.19.SuSE/mm/filemap.c     Sun Nov 16 01:37:11 2003
-@@ -97,6 +97,9 @@
- {
-       struct address_space * mapping = page->mapping;
-+      if (mapping->a_ops->removepage)
-+              mapping->a_ops->removepage(page);
-+      
-       mapping->nrpages--;
-       list_del(&page->list);
-       page->mapping = NULL;
diff --git a/lustre/kernel_patches/patches/resched-2.4.19-pre1.patch b/lustre/kernel_patches/patches/resched-2.4.19-pre1.patch
deleted file mode 100644 (file)
index 567e1e8..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Index: linux-2.4.19-pre1/include/linux/sched.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/sched.h       2003-11-21 04:05:05.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/sched.h    2003-11-21 04:10:29.000000000 +0300
-@@ -927,6 +927,11 @@
-       return res;
- }
-+static inline int need_resched(void)
-+{
-+              return (unlikely(current->need_resched));
-+}
-+
- #endif /* __KERNEL__ */
- #endif
diff --git a/lustre/kernel_patches/patches/socket-exports-2.4.19-bgl.patch b/lustre/kernel_patches/patches/socket-exports-2.4.19-bgl.patch
deleted file mode 100644 (file)
index e60f473..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
- include/linux/socket.h |    4 ++++
- net/netsyms.c          |    2 ++
- net/socket.c           |    2 +-
- 3 files changed, 7 insertions(+), 1 deletion(-)
-
-Index: linux-DRV401/include/linux/socket.h
-===================================================================
---- linux-DRV401.orig/include/linux/socket.h   2004-10-15 10:26:20.000000000 -0700
-+++ linux-DRV401/include/linux/socket.h        2004-10-15 11:11:09.000000000 -0700
-@@ -260,6 +260,10 @@
- extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen);
- extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
- extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
-+struct socket;
-+extern int sock_map_fd(struct socket *sock);
-+extern struct socket *sockfd_lookup(int fd, int *err);
-+
- #endif
- #endif /* not kernel and not glibc */
- #endif /* _LINUX_SOCKET_H */
-Index: linux-DRV401/net/netsyms.c
-===================================================================
---- linux-DRV401.orig/net/netsyms.c    2004-10-15 11:10:52.000000000 -0700
-+++ linux-DRV401/net/netsyms.c 2004-10-15 11:11:09.000000000 -0700
-@@ -159,6 +159,8 @@
- EXPORT_SYMBOL(put_cmsg);
- EXPORT_SYMBOL(sock_kmalloc);
- EXPORT_SYMBOL(sock_kfree_s);
-+EXPORT_SYMBOL(sockfd_lookup);
-+EXPORT_SYMBOL(sock_map_fd);
- #ifdef CONFIG_FILTER
- EXPORT_SYMBOL(sk_run_filter);
-Index: linux-DRV401/net/socket.c
-===================================================================
---- linux-DRV401.orig/net/socket.c     2004-10-15 10:24:16.000000000 -0700
-+++ linux-DRV401/net/socket.c  2004-10-15 11:11:09.000000000 -0700
-@@ -326,7 +326,7 @@
-  *    but we take care of internal coherence yet.
-  */
--static int sock_map_fd(struct socket *sock)
-+int sock_map_fd(struct socket *sock)
- {
-       int fd;
-       struct qstr this;
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.4.19-pre1.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.4.19-pre1.patch
deleted file mode 100644 (file)
index bcd3f73..0000000
+++ /dev/null
@@ -1,461 +0,0 @@
-Index: linux-2.4.19-pre1/include/linux/skbuff.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/linux/skbuff.h      2001-11-22 22:46:26.000000000 +0300
-+++ linux-2.4.19-pre1/include/linux/skbuff.h   2004-01-14 01:15:13.000000000 +0300
-@@ -116,6 +116,30 @@
-       __u16 size;
- };
-+/* Support for callback when skb data has been released */
-+typedef struct zccd                           /* Zero Copy Callback Descriptor */
-+{                                             /* (embed as first member of custom struct) */
-+      atomic_t        zccd_count;             /* reference count */
-+      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
-+} zccd_t;
-+
-+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
-+{
-+      atomic_set (&d->zccd_count, 1);
-+      d->zccd_destructor = callback;
-+}
-+
-+static inline void zccd_get (zccd_t *d)               /* take a reference */
-+{
-+      atomic_inc (&d->zccd_count);
-+}
-+
-+static inline void zccd_put (zccd_t *d)               /* release a reference */
-+{
-+      if (atomic_dec_and_test (&d->zccd_count))
-+              (d->zccd_destructor)(d);
-+}
-+
- /* This data is invariant across clones and lives at
-  * the end of the header data, ie. at skb->end.
-  */
-@@ -123,6 +147,12 @@
-       atomic_t        dataref;
-       unsigned int    nr_frags;
-       struct sk_buff  *frag_list;
-+      zccd_t          *zccd;                  /* zero copy descriptor */
-+      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
-+      /* NB we expect zero-copy data to be at least 1 packet, so
-+       * having 2 zccds means we don't unneccessarily split the packet
-+       * where consecutive zero-copy sends abutt.
-+       */
-       skb_frag_t      frags[MAX_SKB_FRAGS];
- };
-Index: linux-2.4.19-pre1/include/net/tcp.h
-===================================================================
---- linux-2.4.19-pre1.orig/include/net/tcp.h   2001-11-22 22:47:22.000000000 +0300
-+++ linux-2.4.19-pre1/include/net/tcp.h        2004-01-14 01:15:13.000000000 +0300
-@@ -640,6 +640,8 @@
- extern int                    tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size);
- extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
-+extern ssize_t                        tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                                                int flags, zccd_t *zccd);
- extern int                    tcp_ioctl(struct sock *sk, 
-                                         int cmd, 
-@@ -733,6 +735,9 @@
-                                           struct msghdr *msg,
-                                           int len, int nonblock, 
-                                           int flags, int *addr_len);
-+extern int                    tcp_recvpackets(struct sock *sk,
-+                                              struct sk_buff_head *packets,
-+                                              int len, int nonblock);
- extern int                    tcp_listen_start(struct sock *sk);
-Index: linux-2.4.19-pre1/net/netsyms.c
-===================================================================
---- linux-2.4.19-pre1.orig/net/netsyms.c       2004-01-14 01:10:37.000000000 +0300
-+++ linux-2.4.19-pre1/net/netsyms.c    2004-01-14 01:15:54.000000000 +0300
-@@ -409,6 +409,9 @@
- #endif
-+EXPORT_SYMBOL(tcp_sendpage_zccd);
-+EXPORT_SYMBOL(tcp_recvpackets);
-+
- EXPORT_SYMBOL(netlink_set_err);
- EXPORT_SYMBOL(netlink_broadcast);
- EXPORT_SYMBOL(netlink_unicast);
-Index: linux-2.4.19-pre1/net/core/skbuff.c
-===================================================================
---- linux-2.4.19-pre1.orig/net/core/skbuff.c   2001-12-21 20:42:05.000000000 +0300
-+++ linux-2.4.19-pre1/net/core/skbuff.c        2004-01-14 01:15:13.000000000 +0300
-@@ -208,6 +208,8 @@
-       atomic_set(&(skb_shinfo(skb)->dataref), 1);
-       skb_shinfo(skb)->nr_frags = 0;
-       skb_shinfo(skb)->frag_list = NULL;
-+      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
-+      skb_shinfo(skb)->zccd2 = NULL;
-       return skb;
- nodata:
-@@ -276,6 +278,10 @@
- {
-       if (!skb->cloned ||
-           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
-+              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
-+              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
-+                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
-               if (skb_shinfo(skb)->nr_frags) {
-                       int i;
-                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-@@ -532,6 +538,8 @@
-       atomic_set(&(skb_shinfo(skb)->dataref), 1);
-       skb_shinfo(skb)->nr_frags = 0;
-       skb_shinfo(skb)->frag_list = NULL;
-+      skb_shinfo(skb)->zccd = NULL;           /* copied data => no user zero copy descriptor */
-+      skb_shinfo(skb)->zccd2 = NULL;
-       /* We are no longer a clone, even if we were. */
-       skb->cloned = 0;
-@@ -578,6 +586,14 @@
-       n->data_len = skb->data_len;
-       n->len = skb->len;
-+      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
-+
-+      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
-+              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
-+      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
-+
-       if (skb_shinfo(skb)->nr_frags) {
-               int i;
-@@ -620,6 +636,8 @@
-       u8 *data;
-       int size = nhead + (skb->end - skb->head) + ntail;
-       long off;
-+      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
-+      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
-       if (skb_shared(skb))
-               BUG();
-@@ -641,6 +659,11 @@
-       if (skb_shinfo(skb)->frag_list)
-               skb_clone_fraglist(skb);
-+      if (zccd != NULL)                       /* user zero copy descriptor? */
-+              zccd_get (zccd);                /* extra ref (pages are shared) */
-+      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
-+              zccd_get (zccd2);               /* extra ref (pages are shared) */
-+
-       skb_release_data(skb);
-       off = (data+nhead) - skb->head;
-@@ -655,6 +678,8 @@
-       skb->nh.raw += off;
-       skb->cloned = 0;
-       atomic_set(&skb_shinfo(skb)->dataref, 1);
-+      skb_shinfo(skb)->zccd = zccd;
-+      skb_shinfo(skb)->zccd2 = zccd2;
-       return 0;
- nodata:
-Index: linux-2.4.19-pre1/net/ipv4/tcp.c
-===================================================================
---- linux-2.4.19-pre1.orig/net/ipv4/tcp.c      2001-12-21 20:42:05.000000000 +0300
-+++ linux-2.4.19-pre1/net/ipv4/tcp.c   2004-01-14 01:15:13.000000000 +0300
-@@ -744,7 +744,7 @@
-       goto out;
- }
--ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags);
-+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd);
- static inline int
- can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
-@@ -823,7 +823,8 @@
-       return err;
- }
--ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags)
-+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
-+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd)
- {
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       int mss_now;
-@@ -871,6 +872,17 @@
-                       copy = size;
-               i = skb_shinfo(skb)->nr_frags;
-+
-+              if (zccd != NULL &&             /* this is a zcc I/O */
-+                  skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
-+                  skb_shinfo(skb)->zccd2 != NULL &&
-+                  skb_shinfo(skb)->zccd != zccd && /* not the same one */
-+                  skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      tcp_mark_push (tp, skb);
-+                      goto new_segment;
-+              }
-+
-               if (can_coalesce(skb, i, page, offset)) {
-                       skb_shinfo(skb)->frags[i-1].size += copy;
-               } else if (i < MAX_SKB_FRAGS) {
-@@ -881,6 +893,20 @@
-                       goto new_segment;
-               }
-+              if (zccd != NULL &&     /* this is a zcc I/O */
-+                  skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
-+                  skb_shinfo(skb)->zccd2 != zccd)
-+              {
-+                      zccd_get (zccd);        /* bump ref count */
-+
-+                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
-+
-+                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
-+                              skb_shinfo(skb)->zccd = zccd;
-+                      else
-+                              skb_shinfo(skb)->zccd2 = zccd;
-+              }
-+
-               skb->len += copy;
-               skb->data_len += copy;
-               skb->ip_summed = CHECKSUM_HW;
-@@ -944,7 +970,31 @@
-       lock_sock(sk);
-       TCP_CHECK_TIMER(sk);
--      res = do_tcp_sendpages(sk, &page, offset, size, flags);
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return res;
-+}
-+
-+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
-+                        int flags, zccd_t *zccd)
-+{
-+      ssize_t res;
-+      struct sock *sk = sock->sk;
-+
-+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
-+
-+      if (!(sk->route_caps & NETIF_F_SG) ||   /* caller shouldn't waste her time */
-+          !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
-+              BUG ();
-+
-+#undef TCP_ZC_CSUM_FLAGS
-+
-+      lock_sock(sk);
-+      TCP_CHECK_TIMER(sk);
-+
-+      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
-+
-       TCP_CHECK_TIMER(sk);
-       release_sock(sk);
-       return res;
-@@ -1683,6 +1733,202 @@
-       goto out;
- }
-+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
-+                   int len, int nonblock)
-+{
-+      struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-+      int copied;
-+      long timeo;
-+
-+      BUG_TRAP (len > 0);
-+      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
-+
-+      lock_sock(sk);
-+
-+      TCP_CHECK_TIMER(sk);
-+
-+      copied = -ENOTCONN;
-+      if (sk->state == TCP_LISTEN)
-+              goto out;
-+
-+      copied = 0;
-+      timeo = sock_rcvtimeo(sk, nonblock);
-+
-+      do {
-+              struct sk_buff * skb;
-+              u32 offset;
-+              unsigned long used;
-+              int exhausted;
-+              int eaten;
-+
-+              /* Are we at urgent data? Stop if we have read anything. */
-+              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
-+                      break;
-+
-+              /* We need to check signals first, to get correct SIGURG
-+               * handling. FIXME: Need to check this doesnt impact 1003.1g
-+               * and move it down to the bottom of the loop
-+               */
-+              if (signal_pending(current)) {
-+                      if (copied)
-+                              break;
-+                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
-+                      break;
-+              }
-+
-+              /* Next get a buffer. */
-+
-+              skb = skb_peek(&sk->receive_queue);
-+
-+              if (skb == NULL)                /* nothing ready */
-+              {
-+                      if (copied) {
-+                              if (sk->err ||
-+                                  sk->state == TCP_CLOSE ||
-+                                  (sk->shutdown & RCV_SHUTDOWN) ||
-+                                  !timeo ||
-+                                  (0))
-+                                      break;
-+                      } else {
-+                              if (sk->done)
-+                                      break;
-+
-+                              if (sk->err) {
-+                                      copied = sock_error(sk);
-+                                      break;
-+                              }
-+
-+                              if (sk->shutdown & RCV_SHUTDOWN)
-+                                      break;
-+
-+                              if (sk->state == TCP_CLOSE) {
-+                                      if (!sk->done) {
-+                                              /* This occurs when user tries to read
-+                                               * from never connected socket.
-+                                               */
-+                                              copied = -ENOTCONN;
-+                                              break;
-+                                      }
-+                                      break;
-+                              }
-+
-+                              if (!timeo) {
-+                                      copied = -EAGAIN;
-+                                      break;
-+                              }
-+                      }
-+
-+                      cleanup_rbuf(sk, copied);
-+                      timeo = tcp_data_wait(sk, timeo);
-+                      continue;
-+              }
-+
-+              BUG_TRAP (atomic_read (&skb->users) == 1);
-+
-+              exhausted = eaten = 0;
-+
-+              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
-+              if (skb->h.th->syn)
-+                      offset--;
-+
-+              used = skb->len - offset;
-+
-+              if (tp->urg_data) {
-+                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
-+                      if (urg_offset < used) {
-+                              if (!urg_offset) { /* at urgent date */
-+                                      if (!sk->urginline) {
-+                                              tp->copied_seq++; /* discard the single byte of urgent data */
-+                                              offset++;
-+                                              used--;
-+                                      }
-+                              } else          /* truncate read */
-+                                      used = urg_offset;
-+                      }
-+              }
-+
-+              BUG_TRAP (used >= 0);
-+              if (len < used)
-+                      used = len;
-+
-+              if (used == 0)
-+                      exhausted = 1;
-+              else
-+              {
-+                      if (skb_is_nonlinear (skb))
-+                      {
-+                              int   rc = skb_linearize (skb, GFP_KERNEL);
-+
-+                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
-+
-+                              if (rc)
-+                              {
-+                                      if (!copied)
-+                                              copied = rc;
-+                                      break;
-+                              }
-+                      }
-+
-+                      if ((offset + used) == skb->len) /* consuming the whole packet */
-+                      {
-+                              __skb_unlink (skb, &sk->receive_queue);
-+                              dst_release (skb->dst);
-+                              skb_orphan (skb);
-+                              __skb_pull (skb, offset);
-+                              __skb_queue_tail (packets, skb);
-+                              exhausted = eaten = 1;
-+                      }
-+                      else                    /* consuming only part of the packet */
-+                      {
-+                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
-+
-+                              if (skb2 == NULL)
-+                              {
-+                                      if (!copied)
-+                                              copied = -ENOMEM;
-+                                      break;
-+                              }
-+
-+                              dst_release (skb2->dst);
-+                              __skb_pull (skb2, offset);
-+                              __skb_trim (skb2, used);
-+                              __skb_queue_tail (packets, skb2);
-+                      }
-+
-+                      tp->copied_seq += used;
-+                      copied += used;
-+                      len -= used;
-+              }
-+
-+              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
-+                      tp->urg_data = 0;
-+                      tcp_fast_path_check(sk, tp);
-+              }
-+
-+              if (!exhausted)
-+                      continue;
-+
-+              if (skb->h.th->fin)
-+              {
-+                      tp->copied_seq++;
-+                      if (!eaten)
-+                              tcp_eat_skb (sk, skb);
-+                      break;
-+              }
-+
-+              if (!eaten)
-+                      tcp_eat_skb (sk, skb);
-+
-+      } while (len > 0);
-+
-+ out:
-+      /* Clean up data we have read: This will do ACK frames. */
-+      cleanup_rbuf(sk, copied);
-+      TCP_CHECK_TIMER(sk);
-+      release_sock(sk);
-+      return copied;
-+}
-+
- /*
-  *    State processing on a close. This implements the state shift for
-  *    sending our FIN frame. Note that we only send a FIN for some
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-bgl.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-bgl.patch
deleted file mode 100644 (file)
index eec0362..0000000
+++ /dev/null
@@ -1,1849 +0,0 @@
- fs/dcache.c               |   19 ++
- fs/exec.c                 |   17 +-
- fs/namei.c                |  295 +++++++++++++++++++++++++++++++++++++++-------
- fs/namespace.c            |   28 +++-
- fs/open.c                 |  172 +++++++++++++++++++-------
- fs/stat.c                 |   52 +++++---
- include/linux/dcache.h    |   60 +++++++++
- include/linux/fs.h        |   32 ++++
- include/linux/fs_struct.h |    4 
- kernel/exit.c             |    3 
- kernel/fork.c             |    3 
- kernel/ksyms.c            |    1 
- 12 files changed, 558 insertions(+), 128 deletions(-)
-
-Index: linux.mcp2/fs/dcache.c
-===================================================================
---- linux.mcp2.orig/fs/dcache.c        2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/dcache.c     2004-05-05 14:19:59.000000000 -0700
-@@ -181,6 +181,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -830,13 +837,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux.mcp2/fs/exec.c
-===================================================================
---- linux.mcp2.orig/fs/exec.c  2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/exec.c       2004-05-05 14:19:59.000000000 -0700
-@@ -107,8 +107,10 @@
-       struct file * file;
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                  .it_flags = FMODE_READ|FMODE_EXEC };
--      error = user_path_walk(library, &nd);
-+      error = user_path_walk_it(library, &nd, &it);
-       if (error)
-               goto out;
-@@ -120,7 +122,8 @@
-       if (error)
-               goto exit;
--      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+      intent_release(&it);
-       error = PTR_ERR(file);
-       if (IS_ERR(file))
-               goto out;
-@@ -342,9 +345,11 @@
-       struct inode *inode;
-       struct file *file;
-       int err = 0;
-+      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                  .it_flags = FMODE_READ|FMODE_EXEC };
-       if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              err = path_walk(name, &nd);
-+              err = path_walk_it(name, &nd, &it);
-       file = ERR_PTR(err);
-       if (!err) {
-               inode = nd.dentry->d_inode;
-@@ -356,7 +361,8 @@
-                               err = -EACCES;
-                       file = ERR_PTR(err);
-                       if (!err) {
--                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+                              intent_release(&it);
-                               if (!IS_ERR(file)) {
-                                       err = deny_write_access(file);
-                                       if (err) {
-@@ -368,6 +374,7 @@
-                               return file;
-                       }
-               }
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       goto out;
-@@ -969,7 +976,7 @@
-               goto close_fail;
-       if (!file->f_op->write)
-               goto close_fail;
--      if (do_truncate(file->f_dentry, 0) != 0)
-+      if (do_truncate(file->f_dentry, 0, 0) != 0)
-               goto close_fail;
-       retval = binfmt->core_dump(signr, regs, file);
-Index: linux.mcp2/fs/namei.c
-===================================================================
---- linux.mcp2.orig/fs/namei.c 2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/namei.c      2004-05-05 14:28:26.000000000 -0700
-@@ -94,6 +94,13 @@
-  * XEmacs seems to be relying on it...
-  */
-+void intent_release(struct lookup_intent *it)
-+{
-+      if (it && it->it_op_release)
-+              it->it_op_release(it);
-+
-+}
-+
- /* In order to reduce some races, while at the same time doing additional
-  * checking and hopefully speeding things up, we copy filenames to the
-  * kernel data space before using them..
-@@ -260,10 +267,19 @@
-  * Internal lookup() using the new generic dcache.
-  * SMP-safe
-  */
--static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
-+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
-+                                  int flags, struct lookup_intent *it)
- {
-       struct dentry * dentry = d_lookup(parent, name);
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
-+              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
-+                  !d_invalidate(dentry)) {
-+                      dput(dentry);
-+                      dentry = NULL;
-+              }
-+              return dentry;
-+      } else
-       if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
-               if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
-                       dput(dentry);
-@@ -281,11 +297,15 @@
-  * make sure that nobody added the entry to the dcache in the meantime..
-  * SMP-safe
-  */
--static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
-+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
-+                                int flags, struct lookup_intent *it)
- {
-       struct dentry * result;
-       struct inode *dir = parent->d_inode;
-+      int counter = 0;
-+again:
-+      counter++;
-       down(&dir->i_sem);
-       /*
-        * First re-do the cached lookup just in case it was created
-@@ -300,6 +320,9 @@
-               result = ERR_PTR(-ENOMEM);
-               if (dentry) {
-                       lock_kernel();
-+                      if (dir->i_op->lookup_it)
-+                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
-+                      else
-                       result = dir->i_op->lookup(dir, dentry);
-                       unlock_kernel();
-                       if (result)
-@@ -321,6 +344,15 @@
-                       dput(result);
-                       result = ERR_PTR(-ENOENT);
-               }
-+      } else if (result->d_op && result->d_op->d_revalidate_it) {
-+              if (!result->d_op->d_revalidate_it(result, flags, it) &&
-+                  !d_invalidate(result)) {
-+                      dput(result);
-+                      if (counter > 10)
-+                              result = ERR_PTR(-ESTALE);
-+                      if (!IS_ERR(result))
-+                              goto again;
-+              }
-       }
-       return result;
- }
-@@ -332,7 +364,8 @@
-  * Without that kind of total limit, nasty chains of consecutive
-  * symlinks can cause almost arbitrarily long lookups. 
-  */
--static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
-+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
-+                               struct lookup_intent *it)
- {
-       int err;
-       if (current->link_count >= 5)
-@@ -346,10 +379,12 @@
-       current->link_count++;
-       current->total_link_count++;
-       UPDATE_ATIME(dentry->d_inode);
-+      nd->intent = it;
-       err = dentry->d_inode->i_op->follow_link(dentry, nd);
-       current->link_count--;
-       return err;
- loop:
-+      intent_release(it);
-       path_release(nd);
-       return -ELOOP;
- }
-@@ -447,7 +482,8 @@
-  *
-  * We expect 'base' to be positive and a directory.
-  */
--int link_path_walk(const char * name, struct nameidata *nd)
-+int link_path_walk_it(const char *name, struct nameidata *nd,
-+                    struct lookup_intent *it)
- {
-       struct dentry *dentry;
-       struct inode *inode;
-@@ -520,9 +556,10 @@
-                               break;
-               }
-               /* This does the actual lookups.. */
--              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
-               if (!dentry) {
--                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE,
-+                                           NULL);
-                       err = PTR_ERR(dentry);
-                       if (IS_ERR(dentry))
-                               break;
-@@ -540,7 +577,7 @@
-                       goto out_dput;
-               if (inode->i_op->follow_link) {
--                      err = do_follow_link(dentry, nd);
-+                      err = do_follow_link(dentry, nd, NULL);
-                       dput(dentry);
-                       if (err)
-                               goto return_err;
-@@ -556,7 +593,7 @@
-                       nd->dentry = dentry;
-               }
-               err = -ENOTDIR; 
--              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
-                       break;
-               continue;
-               /* here ends the main loop */
-@@ -583,9 +620,9 @@
-                       if (err < 0)
-                               break;
-               }
--              dentry = cached_lookup(nd->dentry, &this, 0);
-+              dentry = cached_lookup(nd->dentry, &this, 0, it);
-               if (!dentry) {
--                      dentry = real_lookup(nd->dentry, &this, 0);
-+                      dentry = real_lookup(nd->dentry, &this, 0, it);
-                       err = PTR_ERR(dentry);
-                       if (IS_ERR(dentry))
-                               break;
-@@ -595,7 +632,7 @@
-               inode = dentry->d_inode;
-               if ((lookup_flags & LOOKUP_FOLLOW)
-                   && inode && inode->i_op && inode->i_op->follow_link) {
--                      err = do_follow_link(dentry, nd);
-+                      err = do_follow_link(dentry, nd, it);
-                       dput(dentry);
-                       if (err)
-                               goto return_err;
-@@ -609,7 +646,8 @@
-                       goto no_inode;
-               if (lookup_flags & LOOKUP_DIRECTORY) {
-                       err = -ENOTDIR; 
--                      if (!inode->i_op || !inode->i_op->lookup)
-+                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
-                               break;
-               }
-               goto return_base;
-@@ -633,6 +671,34 @@
-                * Check the cached dentry for staleness.
-                */
-               dentry = nd->dentry;
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
-+                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
-+                              struct dentry *new;
-+                              err = permission(dentry->d_parent->d_inode,
-+                                               MAY_EXEC);
-+                              if (err)
-+                                      break;
-+                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, NULL);
-+                              if (IS_ERR(new)) { 
-+                                      err = PTR_ERR(new);
-+                                      break;
-+                              }
-+                              d_invalidate(dentry);
-+                              dput(dentry);
-+                              nd->dentry = new;
-+                      }
-+                      if (!nd->dentry->d_inode)
-+                              goto no_inode;
-+                      if (lookup_flags & LOOKUP_DIRECTORY) {
-+                              err = -ENOTDIR; 
-+                              if (!nd->dentry->d_inode->i_op ||
-+                                  (!nd->dentry->d_inode->i_op->lookup &&
-+                                   !nd->dentry->d_inode->i_op->lookup_it))
-+                                      break;
-+                      }
-+              } else
-               if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
-                       err = -ESTALE;
-                       if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -646,15 +703,28 @@
-               dput(dentry);
-               break;
-       }
-+      if (err)
-+              intent_release(it);
-       path_release(nd);
- return_err:
-       return err;
- }
-+int link_path_walk(const char * name, struct nameidata *nd)
-+{
-+      return link_path_walk_it(name, nd, NULL);
-+}
-+
-+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
-+{
-+      current->total_link_count = 0;
-+      return link_path_walk_it(name, nd, it);
-+}
-+
- int path_walk(const char * name, struct nameidata *nd)
- {
-       current->total_link_count = 0;
--      return link_path_walk(name, nd);
-+      return link_path_walk_it(name, nd, NULL);
- }
- /* SMP-safe */
-@@ -743,6 +813,7 @@
- {
-       nd->last_type = LAST_ROOT; /* if there are only slashes... */
-       nd->flags = flags;
-+      nd->intent = NULL;
-       if (*name=='/')
-               return walk_init_root(name,nd);
-       read_lock(&current->fs->lock);
-@@ -757,7 +828,8 @@
-  * needs parent already locked. Doesn't follow mounts.
-  * SMP-safe.
-  */
--struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
-+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
-+                             struct lookup_intent *it)
- {
-       struct dentry * dentry;
-       struct inode *inode;
-@@ -780,13 +852,16 @@
-                       goto out;
-       }
--      dentry = cached_lookup(base, name, 0);
-+      dentry = cached_lookup(base, name, 0, it);
-       if (!dentry) {
-               struct dentry *new = d_alloc(base, name);
-               dentry = ERR_PTR(-ENOMEM);
-               if (!new)
-                       goto out;
-               lock_kernel();
-+              if (inode->i_op->lookup_it)
-+                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
-+              else
-               dentry = inode->i_op->lookup(inode, new);
-               unlock_kernel();
-               if (!dentry)
-@@ -798,6 +873,12 @@
-       return dentry;
- }
-+struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
-+{
-+      return lookup_hash_it(name, base, NULL);
-+}
-+
-+
- /* SMP-safe */
- struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
- {
-@@ -819,7 +900,7 @@
-       }
-       this.hash = end_name_hash(hash);
--      return lookup_hash(&this, base);
-+      return lookup_hash_it(&this, base, NULL);
- access:
-       return ERR_PTR(-EACCES);
- }
-@@ -851,6 +932,23 @@
-       return err;
- }
-+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
-+                 struct lookup_intent *it)
-+{
-+      char *tmp;
-+      int err;
-+
-+      tmp = getname(name);
-+      err = PTR_ERR(tmp);
-+      if (!IS_ERR(tmp)) {
-+              err = 0;
-+              if (path_init(tmp, flags, nd))
-+                      err = path_walk_it(tmp, nd, it);
-+              putname(tmp);
-+      }
-+      return err;
-+}
-+
- /*
-  * It's inline, so penalty for filesystems that don't use sticky bit is
-  * minimal.
-@@ -946,7 +1044,8 @@
-       return retval;
- }
--int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
-+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
-+                       struct lookup_intent *it)
- {
-       int error;
-@@ -959,12 +1058,15 @@
-               goto exit_lock;
-       error = -EACCES;        /* shouldn't it be ENOSYS? */
--      if (!dir->i_op || !dir->i_op->create)
-+      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
-               goto exit_lock;
-       DQUOT_INIT(dir);
-       lock_kernel();
--      error = dir->i_op->create(dir, dentry, mode);
-+      if (dir->i_op->create_it)
-+              error = dir->i_op->create_it(dir, dentry, mode, it);
-+      else
-+              error = dir->i_op->create(dir, dentry, mode);
-       unlock_kernel();
- exit_lock:
-       up(&dir->i_zombie);
-@@ -973,6 +1075,11 @@
-       return error;
- }
-+int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
-+{
-+      return vfs_create_it(dir, dentry, mode, NULL);
-+}
-+
- /*
-  *    open_namei()
-  *
-@@ -987,7 +1094,8 @@
-  * for symlinks (where the permissions are checked later).
-  * SMP-safe
-  */
--int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
-+int open_namei_it(const char *pathname, int flag, int mode,
-+                struct nameidata *nd, struct lookup_intent *it)
- {
-       int acc_mode, error = 0;
-       struct inode *inode;
-@@ -997,12 +1105,14 @@
-       acc_mode = ACC_MODE(flag);
-+      if (it)
-+              it->it_flags = flag;
-       /*
-        * The simplest case - just a plain lookup.
-        */
-       if (!(flag & O_CREAT)) {
-               if (path_init(pathname, lookup_flags(flag), nd))
--                      error = path_walk(pathname, nd);
-+                      error = path_walk_it(pathname, nd, it);
-               if (error)
-                       return error;
-               dentry = nd->dentry;
-@@ -1012,6 +1122,10 @@
-       /*
-        * Create - we need to know the parent.
-        */
-+      if (it) {
-+              it->it_create_mode = mode;
-+              it->it_op |= IT_CREAT;
-+      }
-       if (path_init(pathname, LOOKUP_PARENT, nd))
-               error = path_walk(pathname, nd);
-       if (error)
-@@ -1028,7 +1142,7 @@
-       dir = nd->dentry;
-       down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
- do_last:
-       error = PTR_ERR(dentry);
-@@ -1037,10 +1151,11 @@
-               goto exit;
-       }
-+      it->it_create_mode = mode;
-       /* Negative dentry, just create the file */
-       if (!dentry->d_inode) {
--              error = vfs_create(dir->d_inode, dentry,
--                                 mode & ~current->fs->umask);
-+              error = vfs_create_it(dir->d_inode, dentry,
-+                                    mode & ~current->fs->umask, it);
-               up(&dir->d_inode->i_sem);
-               dput(nd->dentry);
-               nd->dentry = dentry;
-@@ -1144,7 +1259,7 @@
-               if (!error) {
-                       DQUOT_INIT(inode);
-                       
--                      error = do_truncate(dentry, 0);
-+                      error = do_truncate(dentry, 0, 1);
-               }
-               put_write_access(inode);
-               if (error)
-@@ -1156,8 +1271,10 @@
-       return 0;
- exit_dput:
-+      intent_release(it);
-       dput(dentry);
- exit:
-+      intent_release(it);
-       path_release(nd);
-       return error;
-@@ -1176,7 +1293,10 @@
-        * are done. Procfs-like symlinks just set LAST_BIND.
-        */
-       UPDATE_ATIME(dentry->d_inode);
-+      nd->intent = it;
-       error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+      if (error)
-+              intent_release(it);
-       dput(dentry);
-       if (error)
-               return error;
-@@ -1198,13 +1318,20 @@
-       }
-       dir = nd->dentry;
-       down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
-       putname(nd->last.name);
-       goto do_last;
- }
-+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
-+{
-+      return open_namei_it(pathname, flag, mode, nd, NULL);
-+}
-+
-+
- /* SMP-safe */
--static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
-+static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
-+                                  struct lookup_intent *it)
- {
-       struct dentry *dentry;
-@@ -1212,7 +1339,7 @@
-       dentry = ERR_PTR(-EEXIST);
-       if (nd->last_type != LAST_NORM)
-               goto fail;
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
-       if (IS_ERR(dentry))
-               goto fail;
-       if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1269,7 +1396,20 @@
-               error = path_walk(tmp, &nd);
-       if (error)
-               goto out;
--      dentry = lookup_create(&nd, 0);
-+
-+      if (nd.last_type != LAST_NORM) {
-+              error = -EEXIST;
-+              goto out2;
-+      }
-+      if (nd.dentry->d_inode->i_op->mknod_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod_raw(&nd, mode, dev);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto out2;
-+      }
-+
-+      dentry = lookup_create(&nd, 0, NULL);
-       error = PTR_ERR(dentry);
-       mode &= ~current->fs->umask;
-@@ -1290,6 +1426,7 @@
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-+out2:
-       path_release(&nd);
- out:
-       putname(tmp);
-@@ -1338,7 +1475,18 @@
-                       error = path_walk(tmp, &nd);
-               if (error)
-                       goto out;
--              dentry = lookup_create(&nd, 1);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out2;
-+              }
-+              if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir_raw(&nd, mode);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
-+              dentry = lookup_create(&nd, 1, NULL);
-               error = PTR_ERR(dentry);
-               if (!IS_ERR(dentry)) {
-                       error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1346,6 +1490,7 @@
-                       dput(dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-+out2:
-               path_release(&nd);
- out:
-               putname(tmp);
-@@ -1447,8 +1592,16 @@
-                       error = -EBUSY;
-                       goto exit1;
-       }
-+      if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              error = op->rmdir_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-       down(&nd.dentry->d_inode->i_sem);
--      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
-       error = PTR_ERR(dentry);
-       if (!IS_ERR(dentry)) {
-               error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1507,8 +1660,15 @@
-       error = -EISDIR;
-       if (nd.last_type != LAST_NORM)
-               goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-       down(&nd.dentry->d_inode->i_sem);
--      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
-       error = PTR_ERR(dentry);
-       if (!IS_ERR(dentry)) {
-               /* Why not before? Because we want correct error value */
-@@ -1576,15 +1736,27 @@
-                       error = path_walk(to, &nd);
-               if (error)
-                       goto out;
--              dentry = lookup_create(&nd, 0);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out2;
-+              }
-+              if (nd.dentry->d_inode->i_op->symlink_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink_raw(&nd, from);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
-+              dentry = lookup_create(&nd, 0, NULL);
-               error = PTR_ERR(dentry);
-               if (!IS_ERR(dentry)) {
-                       error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-                       dput(dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-+      out2:
-               path_release(&nd);
--out:
-+      out:
-               putname(to);
-       }
-       putname(from);
-@@ -1667,7 +1835,18 @@
-               error = -EXDEV;
-               if (old_nd.mnt != nd.mnt)
-                       goto out_release;
--              new_dentry = lookup_create(&nd, 0);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out_release;
-+              }
-+              if (nd.dentry->d_inode->i_op->link_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link_raw(&old_nd, &nd);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out_release;
-+              }
-+              new_dentry = lookup_create(&nd, 0, NULL);
-               error = PTR_ERR(new_dentry);
-               if (!IS_ERR(new_dentry)) {
-                       error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1713,7 +1888,7 @@
-  *       locking].
-  */
- int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry)
- {
-       int error;
-       struct inode *target;
-@@ -1792,7 +1967,7 @@
- }
- int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry)
- {
-       int error;
-@@ -1883,9 +2058,18 @@
-       if (newnd.last_type != LAST_NORM)
-               goto exit2;
-+      if (old_dir->d_inode->i_op->rename_raw) {
-+              lock_kernel();
-+              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit2;
-+      }
-+
-       double_lock(new_dir, old_dir);
--      old_dentry = lookup_hash(&oldnd.last, old_dir);
-+      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
-       error = PTR_ERR(old_dentry);
-       if (IS_ERR(old_dentry))
-               goto exit3;
-@@ -1901,16 +2085,16 @@
-               if (newnd.last.name[newnd.last.len])
-                       goto exit4;
-       }
--      new_dentry = lookup_hash(&newnd.last, new_dir);
-+      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
-       error = PTR_ERR(new_dentry);
-       if (IS_ERR(new_dentry))
-               goto exit4;
-+
-       lock_kernel();
-       error = vfs_rename(old_dir->d_inode, old_dentry,
-                                  new_dir->d_inode, new_dentry);
-       unlock_kernel();
--
-       dput(new_dentry);
- exit4:
-       dput(old_dentry);
-@@ -1961,20 +2145,26 @@
- }
- static inline int
--__vfs_follow_link(struct nameidata *nd, const char *link)
-+__vfs_follow_link(struct nameidata *nd, const char *link,
-+                struct lookup_intent *it)
- {
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-+      if (it == NULL)
-+              it = nd->intent;
-+      else if (it != nd->intent)
-+              printk("it != nd->intent: tell phil@clusterfs.com\n");
-+
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
--      res = link_path_walk(link, nd);
-+      res = link_path_walk_it(link, nd, it);
- out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-@@ -1996,7 +2186,13 @@
- int vfs_follow_link(struct nameidata *nd, const char *link)
- {
--      return __vfs_follow_link(nd, link);
-+      return __vfs_follow_link(nd, link, NULL);
-+}
-+
-+int vfs_follow_link_it(struct nameidata *nd, const char *link,
-+                     struct lookup_intent *it)
-+{
-+      return __vfs_follow_link(nd, link, it);
- }
- /* get the link contents into pagecache */
-@@ -2038,7 +2234,7 @@
- {
-       struct page *page = NULL;
-       char *s = page_getlink(dentry, &page);
--      int res = __vfs_follow_link(nd, s);
-+      int res = __vfs_follow_link(nd, s, NULL);
-       if (page) {
-               kunmap(page);
-               page_cache_release(page);
-Index: linux.mcp2/fs/namespace.c
-===================================================================
---- linux.mcp2.orig/fs/namespace.c     2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/namespace.c  2004-05-05 14:22:06.000000000 -0700
-@@ -97,6 +97,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -108,6 +109,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -491,15 +493,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -522,6 +527,7 @@
-       }
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -706,6 +712,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -731,9 +738,11 @@
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -745,6 +754,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -910,6 +921,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
-@@ -924,7 +937,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -938,7 +951,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -994,8 +1007,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
-Index: linux.mcp2/fs/open.c
-===================================================================
---- linux.mcp2.orig/fs/open.c  2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/open.c       2004-05-05 14:30:34.000000000 -0700
-@@ -19,6 +19,8 @@
- #include <asm/uaccess.h>
- #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-+extern int path_walk_it(const char *name, struct nameidata *nd,
-+                      struct lookup_intent *it);
- int vfs_statfs(struct super_block *sb, struct statfs *buf)
- {
-@@ -95,9 +97,10 @@
-       write_unlock(&files->file_lock);
- }
--int do_truncate(struct dentry *dentry, loff_t length)
-+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
- {
-       struct inode *inode = dentry->d_inode;
-+      struct inode_operations *op = dentry->d_inode->i_op;
-       int error;
-       struct iattr newattrs;
-@@ -108,7 +111,13 @@
-       down(&inode->i_sem);
-       newattrs.ia_size = length;
-       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
--      error = notify_change(dentry, &newattrs);
-+      if (called_from_open)
-+              newattrs.ia_valid |= ATTR_FROM_OPEN;
-+      if (op->setattr_raw) {
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+      } else
-+              error = notify_change(dentry, &newattrs);
-       up(&inode->i_sem);
-       return error;
- }
-@@ -118,12 +127,13 @@
-       struct nameidata nd;
-       struct inode * inode;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       error = -EINVAL;
-       if (length < 0) /* sorry, but loff_t says... */
-               goto out;
--      error = user_path_walk(path, &nd);
-+      error = user_path_walk_it(path, &nd, &it);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-@@ -163,11 +173,13 @@
-       error = locks_verify_truncate(inode, NULL, length);
-       if (!error) {
-               DQUOT_INIT(inode);
--              error = do_truncate(nd.dentry, length);
-+              intent_release(&it);
-+              error = do_truncate(nd.dentry, length, 0);
-       }
-       put_write_access(inode);
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
-@@ -215,7 +227,7 @@
-       error = locks_verify_truncate(inode, file, length);
-       if (!error)
--              error = do_truncate(dentry, length);
-+              error = do_truncate(dentry, length, 0);
- out_putf:
-       fput(file);
- out:
-@@ -260,11 +272,13 @@
-       struct inode * inode;
-       struct iattr newattrs;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, NULL);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-+      /* this is safe without a Lustre lock because it only depends
-+         on the super block */
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto dput_and_out;
-@@ -279,11 +293,25 @@
-                       goto dput_and_out;
-               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
--      } else {
-+      }
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto dput_and_out;
-+      }
-+
-+      error = -EPERM;
-+      if (!times) {
-               if (current->fsuid != inode->i_uid &&
-                   (error = permission(inode,MAY_WRITE)) != 0)
-                       goto dput_and_out;
-       }
-+
-       error = notify_change(nd.dentry, &newattrs);
- dput_and_out:
-       path_release(&nd);
-@@ -304,12 +332,14 @@
-       struct inode * inode;
-       struct iattr newattrs;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, NULL);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-+      /* this is safe without a Lustre lock because it only depends
-+         on the super block */
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto dput_and_out;
-@@ -324,7 +354,20 @@
-               newattrs.ia_atime = times[0].tv_sec;
-               newattrs.ia_mtime = times[1].tv_sec;
-               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
--      } else {
-+      }
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto dput_and_out;
-+      }
-+
-+      error = -EPERM;
-+      if (!utimes) {
-               if (current->fsuid != inode->i_uid &&
-                   (error = permission(inode,MAY_WRITE)) != 0)
-                       goto dput_and_out;
-@@ -347,6 +390,7 @@
-       int old_fsuid, old_fsgid;
-       kernel_cap_t old_cap;
-       int res;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
-               return -EINVAL;
-@@ -364,13 +408,14 @@
-       else
-               current->cap_effective = current->cap_permitted;
--      res = user_path_walk(filename, &nd);
-+      res = user_path_walk_it(filename, &nd, &it);
-       if (!res) {
-               res = permission(nd.dentry->d_inode, mode);
-               /* SuS v2 requires we report a read only fs too */
-               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
-                  && !special_file(nd.dentry->d_inode->i_mode))
-                       res = -EROFS;
-+              intent_release(&it);
-               path_release(&nd);
-       }
-@@ -386,6 +431,7 @@
-       int error;
-       struct nameidata nd;
-       char *name;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       name = getname(filename);
-       error = PTR_ERR(name);
-@@ -394,7 +440,7 @@
-       error = 0;
-       if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
--              error = path_walk(name, &nd);
-+              error = path_walk_it(name, &nd, &it);
-       putname(name);
-       if (error)
-               goto out;
-@@ -406,6 +452,7 @@
-       set_fs_pwd(current->fs, nd.mnt, nd.dentry);
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
-@@ -446,6 +493,7 @@
-       int error;
-       struct nameidata nd;
-       char *name;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       name = getname(filename);
-       error = PTR_ERR(name);
-@@ -454,7 +502,7 @@
-       path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-                     LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
--      error = path_walk(name, &nd);   
-+      error = path_walk_it(name, &nd, &it);
-       putname(name);
-       if (error)
-               goto out;
-@@ -471,39 +519,56 @@
-       set_fs_altroot();
-       error = 0;
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
- }
--asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
-+int chmod_common(struct dentry *dentry, mode_t mode)
- {
--      struct inode * inode;
--      struct dentry * dentry;
--      struct file * file;
--      int err = -EBADF;
-+      struct inode *inode = dentry->d_inode;
-       struct iattr newattrs;
-+      int err = -EROFS;
--      file = fget(fd);
--      if (!file)
-+      if (IS_RDONLY(inode))
-               goto out;
--      dentry = file->f_dentry;
--      inode = dentry->d_inode;
-+      if (inode->i_op->setattr_raw) {
-+              newattrs.ia_mode = mode;
-+              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-+              newattrs.ia_valid |= ATTR_RAW;
-+              err = inode->i_op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (err != -EOPNOTSUPP)
-+                      goto out;
-+      }
--      err = -EROFS;
--      if (IS_RDONLY(inode))
--              goto out_putf;
-       err = -EPERM;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
--              goto out_putf;
-+              goto out;
-+
-       if (mode == (mode_t) -1)
-               mode = inode->i_mode;
-       newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
-       newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       err = notify_change(dentry, &newattrs);
--out_putf:
-+out:
-+      return err;
-+}
-+
-+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
-+{
-+      struct file * file;
-+      int err = -EBADF;
-+
-+      file = fget(fd);
-+      if (!file)
-+              goto out;
-+
-+      err = chmod_common(file->f_dentry, mode);
-+
-       fput(file);
- out:
-       return err;
-@@ -512,30 +577,14 @@
- asmlinkage long sys_chmod(const char * filename, mode_t mode)
- {
-       struct nameidata nd;
--      struct inode * inode;
-       int error;
--      struct iattr newattrs;
-       error = user_path_walk(filename, &nd);
-       if (error)
-               goto out;
--      inode = nd.dentry->d_inode;
--
--      error = -EROFS;
--      if (IS_RDONLY(inode))
--              goto dput_and_out;
--      error = -EPERM;
--      if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
--              goto dput_and_out;
-+      error = chmod_common(nd.dentry, mode);
--      if (mode == (mode_t) -1)
--              mode = inode->i_mode;
--      newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
--      newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
--      error = notify_change(nd.dentry, &newattrs);
--
--dput_and_out:
-       path_release(&nd);
- out:
-       return error;
-@@ -555,6 +604,20 @@
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto out;
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = dentry->d_inode->i_op;
-+
-+              newattrs.ia_uid = user;
-+              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      return error;
-+      }
-+
-       error = -EPERM;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               goto out;
-@@ -659,6 +722,7 @@
- {
-       int namei_flags, error;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN };
-       namei_flags = flags;
-       if ((namei_flags+1) & O_ACCMODE)
-@@ -666,14 +730,15 @@
-       if (namei_flags & O_TRUNC)
-               namei_flags |= 2;
--      error = open_namei(filename, namei_flags, mode, &nd);
--      if (!error)
--              return dentry_open(nd.dentry, nd.mnt, flags);
-+      error = open_namei_it(filename, namei_flags, mode, &nd, &it);
-+      if (error)
-+              return ERR_PTR(error);
--      return ERR_PTR(error);
-+      return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
- }
--struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it)
- {
-       struct file * f;
-       struct inode *inode;
-@@ -710,12 +775,15 @@
-       }
-       if (f->f_op && f->f_op->open) {
-+              f->f_it = it;
-               error = f->f_op->open(inode,f);
-+              f->f_it = NULL;
-               if (error)
-                       goto cleanup_all;
-       }
-       f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-+      intent_release(it);
-       return f;
- cleanup_all:
-@@ -730,11 +798,17 @@
- cleanup_file:
-       put_filp(f);
- cleanup_dentry:
-+      intent_release(it);
-       dput(dentry);
-       mntput(mnt);
-       return ERR_PTR(error);
- }
-+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+{
-+      return dentry_open_it(dentry, mnt, flags, NULL);
-+}
-+
- /*
-  * Find an empty file descriptor entry, and mark it busy.
-  */
-Index: linux.mcp2/fs/stat.c
-===================================================================
---- linux.mcp2.orig/fs/stat.c  2004-01-19 07:49:43.000000000 -0800
-+++ linux.mcp2/fs/stat.c       2004-05-05 14:19:59.000000000 -0700
-@@ -17,10 +17,12 @@
-  * Revalidate the inode. This is required for proper NFS attribute caching.
-  */
- static __inline__ int
--do_revalidate(struct dentry *dentry)
-+do_revalidate(struct dentry *dentry, struct lookup_intent *it)
- {
-       struct inode * inode = dentry->d_inode;
--      if (inode->i_op && inode->i_op->revalidate)
-+      if (inode->i_op && inode->i_op->revalidate_it)
-+              return inode->i_op->revalidate_it(dentry, it);
-+      else if (inode->i_op && inode->i_op->revalidate)
-               return inode->i_op->revalidate(dentry);
-       return 0;
- }
-@@ -135,13 +139,15 @@
- asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -151,13 +157,15 @@
- asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -172,13 +180,15 @@
- asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -189,13 +199,15 @@
- asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -216,7 +228,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_old_stat(dentry->d_inode, statbuf);
-               fput(f);
-@@ -235,7 +247,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_new_stat(dentry->d_inode, statbuf);
-               fput(f);
-@@ -257,7 +269,7 @@
-               error = -EINVAL;
-               if (inode->i_op && inode->i_op->readlink &&
--                  !(error = do_revalidate(nd.dentry))) {
-+                  !(error = do_revalidate(nd.dentry, NULL))) {
-                       UPDATE_ATIME(inode);
-                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
-               }
-@@ -333,12 +345,14 @@
- {
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -348,12 +362,14 @@
- {
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -368,7 +384,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_new_stat64(dentry->d_inode, statbuf);
-               fput(f);
-Index: linux.mcp2/include/linux/dcache.h
-===================================================================
---- linux.mcp2.orig/include/linux/dcache.h     2004-04-23 16:52:28.000000000 -0700
-+++ linux.mcp2/include/linux/dcache.h  2004-05-05 14:19:59.000000000 -0700
-@@ -5,6 +5,51 @@
- #include <asm/atomic.h>
- #include <linux/mount.h>
-+#include <linux/string.h>
-+
-+#define IT_OPEN     0x0001
-+#define IT_CREAT    0x0002
-+#define IT_READDIR  0x0004
-+#define IT_GETATTR  0x0008
-+#define IT_LOOKUP   0x0010
-+#define IT_UNLINK   0x0020
-+#define IT_GETXATTR 0x0040
-+#define IT_EXEC     0x0080
-+#define IT_PIN      0x0100
-+
-+#define IT_FL_LOCKED   0x0001
-+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
-+
-+#define INTENT_MAGIC 0x19620323
-+
-+
-+struct lustre_intent_data {
-+      int       it_disposition;
-+      int       it_status;
-+      __u64     it_lock_handle;
-+      void     *it_data;
-+      int       it_lock_mode;
-+      int it_int_flags;
-+};
-+struct lookup_intent {
-+      int     it_magic;
-+      void    (*it_op_release)(struct lookup_intent *);
-+      int     it_op;
-+      int     it_flags;
-+      int     it_create_mode;
-+      union {
-+              struct lustre_intent_data lustre;
-+      } d;
-+};
-+
-+static inline void intent_init(struct lookup_intent *it, int op, int flags)
-+{
-+      memset(it, 0, sizeof(*it));
-+      it->it_magic = INTENT_MAGIC;
-+      it->it_op = op;
-+      it->it_flags = flags;
-+}
-+
- /*
-  * linux/include/linux/dcache.h
-@@ -90,8 +135,22 @@
-       int (*d_delete)(struct dentry *);
-       void (*d_release)(struct dentry *);
-       void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_pin)(struct dentry *, struct vfsmount * , int);
-+      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
- };
-+#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
-+                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
-+                              de->d_op->d_unpin(de, mnt, flag);
-+
-+
-+/* defined in fs/namei.c */
-+extern void intent_release(struct lookup_intent *it);
-+/* defined in fs/dcache.c */
-+extern void __d_rehash(struct dentry * entry, int lock);
-+
- /* the dentry parameter passed to d_hash and d_compare is the parent
-  * directory of the entries to be compared. It is used in case these
-  * functions need any directory specific information for determining
-@@ -123,6 +182,7 @@
-                                        * s_nfsd_free_path semaphore will be down
-                                        */
- #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
-+#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
- extern spinlock_t dcache_lock;
-Index: linux.mcp2/include/linux/fs.h
-===================================================================
---- linux.mcp2.orig/include/linux/fs.h 2004-05-05 14:12:28.000000000 -0700
-+++ linux.mcp2/include/linux/fs.h      2004-05-05 14:19:59.000000000 -0700
-@@ -73,6 +73,7 @@
- #define FMODE_READ 1
- #define FMODE_WRITE 2
-+#define FMODE_EXEC 4
- #define READ 0
- #define WRITE 1
-@@ -335,6 +336,9 @@
- #define ATTR_MTIME_SET        256
- #define ATTR_FORCE    512     /* Not a change, but a change it */
- #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET        0x2000
- /*
-  * This is the Inode Attributes structure, used for notify_change().  It
-@@ -470,6 +474,7 @@
-       struct pipe_inode_info  *i_pipe;
-       struct block_device     *i_bdev;
-       struct char_device      *i_cdev;
-+      void                    *i_filterdata;
-       unsigned long           i_dnotify_mask; /* Directory notify events */
-       struct dnotify_struct   *i_dnotify; /* for directory notifications */
-@@ -574,6 +579,7 @@
-       /* needed for tty driver, and maybe others */
-       void                    *private_data;
-+      struct lookup_intent    *f_it;
-       /* preallocated helper kiobuf to speedup O_DIRECT */
-       struct kiobuf           *f_iobuf;
-@@ -692,6 +698,7 @@
-       struct qstr last;
-       unsigned int flags;
-       int last_type;
-+      struct lookup_intent *intent;
- };
- #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
-@@ -840,7 +847,8 @@
- extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
- extern int vfs_rmdir(struct inode *, struct dentry *);
- extern int vfs_unlink(struct inode *, struct dentry *);
--extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
-+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+             struct inode *new_dir, struct dentry *new_dentry);
- /*
-  * File types
-@@ -900,21 +908,32 @@
- struct inode_operations {
-       int (*create) (struct inode *,struct dentry *,int);
-+      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
-       struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
-       int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link_raw) (struct nameidata *,struct nameidata *);
-       int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink_raw) (struct nameidata *);
-       int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink_raw) (struct nameidata *,const char *);
-       int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir_raw) (struct nameidata *,int);
-       int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir_raw) (struct nameidata *);
-       int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod_raw) (struct nameidata *,int,dev_t);
-       int (*rename) (struct inode *, struct dentry *,
-                       struct inode *, struct dentry *);
-+      int (*rename_raw) (struct nameidata *, struct nameidata *);
-       int (*readlink) (struct dentry *, char *,int);
-       int (*follow_link) (struct dentry *, struct nameidata *);
-       void (*truncate) (struct inode *);
-       int (*permission) (struct inode *, int);
-       int (*revalidate) (struct dentry *);
-+      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
-       int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
-       int (*getattr) (struct dentry *, struct iattr *);
- };
-@@ -1115,10 +1134,14 @@
- asmlinkage long sys_open(const char *, int, int);
- asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
--extern int do_truncate(struct dentry *, loff_t start);
-+extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
- extern struct file *filp_open(const char *, int, int);
- extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
-+extern int open_namei_it(const char *filename, int namei_flags, int mode,
-+                       struct nameidata *nd, struct lookup_intent *it);
-+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it);
- extern int filp_close(struct file *, fl_owner_t id);
- extern char * getname(const char *);
-@@ -1380,6 +1403,7 @@
- extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
- extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
-+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
- extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
- extern int FASTCALL(path_walk(const char *, struct nameidata *));
- extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-@@ -1390,6 +1414,8 @@
- extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
- #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
- #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
-+#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
-+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
- extern void iput(struct inode *);
- extern void force_delete(struct inode *);
-@@ -1499,6 +1525,8 @@
- extern int vfs_readlink(struct dentry *, char *, int, const char *);
- extern int vfs_follow_link(struct nameidata *, const char *);
-+extern int vfs_follow_link_it(struct nameidata *, const char *,
-+                            struct lookup_intent *it);
- extern int page_readlink(struct dentry *, char *, int);
- extern int page_follow_link(struct dentry *, struct nameidata *);
- extern struct inode_operations page_symlink_inode_operations;
-Index: linux.mcp2/include/linux/fs_struct.h
-===================================================================
---- linux.mcp2.orig/include/linux/fs_struct.h  2004-01-19 07:49:42.000000000 -0800
-+++ linux.mcp2/include/linux/fs_struct.h       2004-05-05 14:19:59.000000000 -0700
-@@ -34,10 +34,12 @@
-       write_lock(&fs->lock);
-       old_root = fs->root;
-       old_rootmnt = fs->rootmnt;
-+      PIN(dentry, mnt, 1);
-       fs->rootmnt = mntget(mnt);
-       fs->root = dget(dentry);
-       write_unlock(&fs->lock);
-       if (old_root) {
-+              UNPIN(old_root, old_rootmnt, 1);
-               dput(old_root);
-               mntput(old_rootmnt);
-       }
-@@ -57,10 +59,12 @@
-       write_lock(&fs->lock);
-       old_pwd = fs->pwd;
-       old_pwdmnt = fs->pwdmnt;
-+      PIN(dentry, mnt, 0);
-       fs->pwdmnt = mntget(mnt);
-       fs->pwd = dget(dentry);
-       write_unlock(&fs->lock);
-       if (old_pwd) {
-+              UNPIN(old_pwd, old_pwdmnt, 0);
-               dput(old_pwd);
-               mntput(old_pwdmnt);
-       }
-Index: linux.mcp2/kernel/exit.c
-===================================================================
---- linux.mcp2.orig/kernel/exit.c      2004-01-19 07:49:44.000000000 -0800
-+++ linux.mcp2/kernel/exit.c   2004-05-05 14:19:59.000000000 -0700
-@@ -252,11 +252,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
-Index: linux.mcp2/kernel/fork.c
-===================================================================
---- linux.mcp2.orig/kernel/fork.c      2004-01-19 07:49:44.000000000 -0800
-+++ linux.mcp2/kernel/fork.c   2004-05-05 14:19:59.000000000 -0700
-@@ -384,10 +384,13 @@
-               fs->umask = old->umask;
-               read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-+              PIN(old->pwd, old->pwdmnt, 0);
-+              PIN(old->root, old->rootmnt, 1);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-+                      PIN(old->altroot, old->altrootmnt, 1);
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
-               } else {
-Index: linux.mcp2/kernel/ksyms.c
-===================================================================
---- linux.mcp2.orig/kernel/ksyms.c     2004-05-05 14:12:28.000000000 -0700
-+++ linux.mcp2/kernel/ksyms.c  2004-05-05 14:19:59.000000000 -0700
-@@ -264,6 +264,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.19-suse.patch
deleted file mode 100644 (file)
index 340ce7c..0000000
+++ /dev/null
@@ -1,1858 +0,0 @@
- fs/dcache.c               |   19 ++
- fs/exec.c                 |   17 +-
- fs/namei.c                |  295 +++++++++++++++++++++++++++++++++++++++-------
- fs/namespace.c            |   28 +++-
- fs/open.c                 |  172 +++++++++++++++++++-------
- fs/stat.c                 |   52 +++++---
- include/linux/dcache.h    |   60 +++++++++
- include/linux/fs.h        |   32 ++++
- include/linux/fs_struct.h |    4 
- kernel/exit.c             |    3 
- kernel/fork.c             |    3 
- kernel/ksyms.c            |    1 
- 12 files changed, 558 insertions(+), 128 deletions(-)
-
-Index: linux-2.4.19.SuSE/fs/dcache.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/dcache.c Mon Jan 27 05:08:04 2003
-+++ linux-2.4.19.SuSE/fs/dcache.c      Sat Nov 15 17:29:03 2003
-@@ -186,6 +186,13 @@
-               spin_unlock(&dcache_lock);
-               return 0;
-       }
-+
-+      /* network invalidation by Lustre */
-+      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
-+              spin_unlock(&dcache_lock);
-+              return 0;
-+      }
-+
-       /*
-        * Check whether to do a partial shrink_dcache
-        * to get rid of unused child entries.
-@@ -838,13 +845,19 @@
-  * Adds a dentry to the hash according to its name.
-  */
-  
--void d_rehash(struct dentry * entry)
-+void __d_rehash(struct dentry * entry, int lock)
- {
-       struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-       if (!list_empty(&entry->d_hash)) BUG();
--      spin_lock(&dcache_lock);
-+      if (lock) spin_lock(&dcache_lock);
-       list_add(&entry->d_hash, list);
--      spin_unlock(&dcache_lock);
-+      if (lock) spin_unlock(&dcache_lock);
-+}
-+EXPORT_SYMBOL(__d_rehash);
-+
-+void d_rehash(struct dentry * entry)
-+{
-+      __d_rehash(entry, 1);
- }
- #define do_switch(x,y) do { \
-Index: linux-2.4.19.SuSE/fs/exec.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/exec.c   Mon Jan 27 05:08:35 2003
-+++ linux-2.4.19.SuSE/fs/exec.c        Sat Nov 15 17:34:06 2003
-@@ -107,8 +107,10 @@
-       struct file * file;
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                  .it_flags = FMODE_READ|FMODE_EXEC };
--      error = user_path_walk(library, &nd);
-+      error = user_path_walk_it(library, &nd, &it);
-       if (error)
-               goto out;
-@@ -120,7 +122,8 @@
-       if (error)
-               goto exit;
--      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+      intent_release(&it);
-       error = PTR_ERR(file);
-       if (IS_ERR(file))
-               goto out;
-@@ -346,9 +349,11 @@
-       struct inode *inode;
-       struct file *file;
-       int err = 0;
-+      struct lookup_intent it = { .it_op = IT_OPEN,
-+                                  .it_flags = FMODE_READ|FMODE_EXEC };
-       if (path_init(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              err = path_walk(name, &nd);
-+              err = path_walk_it(name, &nd, &it);
-       file = ERR_PTR(err);
-       if (!err) {
-               inode = nd.dentry->d_inode;
-@@ -360,7 +365,8 @@
-                               err = -EACCES;
-                       file = ERR_PTR(err);
-                       if (!err) {
--                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+                              intent_release(&it);
-                               if (!IS_ERR(file)) {
-                                       err = deny_write_access(file);
-                                       if (err) {
-@@ -372,6 +378,7 @@
-                               return file;
-                       }
-               }
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       goto out;
-@@ -981,7 +988,7 @@
-               goto close_fail;
-       if (!file->f_op->write)
-               goto close_fail;
--      if (do_truncate(file->f_dentry, 0) != 0)
-+      if (do_truncate(file->f_dentry, 0, 0) != 0)
-               goto close_fail;
-       retval = binfmt->core_dump(signr, regs, file);
-Index: linux-2.4.19.SuSE/fs/namei.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/namei.c  Mon Jan 27 05:08:07 2003
-+++ linux-2.4.19.SuSE/fs/namei.c       Sat Nov 15 17:52:03 2003
-@@ -94,6 +94,13 @@
-  * XEmacs seems to be relying on it...
-  */
-+void intent_release(struct lookup_intent *it)
-+{
-+      if (it && it->it_op_release)
-+              it->it_op_release(it);
-+
-+}
-+
- /* In order to reduce some races, while at the same time doing additional
-  * checking and hopefully speeding things up, we copy filenames to the
-  * kernel data space before using them..
-@@ -260,10 +267,19 @@
-  * Internal lookup() using the new generic dcache.
-  * SMP-safe
-  */
--static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
-+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
-+                                  int flags, struct lookup_intent *it)
- {
-       struct dentry * dentry = d_lookup(parent, name);
-+      if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
-+              if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
-+                  !d_invalidate(dentry)) {
-+                      dput(dentry);
-+                      dentry = NULL;
-+              }
-+              return dentry;
-+      } else
-       if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
-               if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
-                       dput(dentry);
-@@ -281,11 +297,15 @@
-  * make sure that nobody added the entry to the dcache in the meantime..
-  * SMP-safe
-  */
--static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
-+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
-+                                int flags, struct lookup_intent *it)
- {
-       struct dentry * result;
-       struct inode *dir = parent->d_inode;
-+      int counter = 0;
-+again:
-+      counter++;
-       down(&dir->i_sem);
-       /*
-        * First re-do the cached lookup just in case it was created
-@@ -300,6 +320,9 @@
-               result = ERR_PTR(-ENOMEM);
-               if (dentry) {
-                       lock_kernel();
-+                      if (dir->i_op->lookup_it)
-+                              result = dir->i_op->lookup_it(dir, dentry, it, flags);
-+                      else
-                       result = dir->i_op->lookup(dir, dentry);
-                       unlock_kernel();
-                       if (result)
-@@ -321,6 +344,15 @@
-                       dput(result);
-                       result = ERR_PTR(-ENOENT);
-               }
-+      } else if (result->d_op && result->d_op->d_revalidate_it) {
-+              if (!result->d_op->d_revalidate_it(result, flags, it) &&
-+                  !d_invalidate(result)) {
-+                      dput(result);
-+                      if (counter > 10)
-+                              result = ERR_PTR(-ESTALE);
-+                      if (!IS_ERR(result))
-+                              goto again;
-+              }
-       }
-       return result;
- }
-@@ -332,7 +364,8 @@
-  * Without that kind of total limit, nasty chains of consecutive
-  * symlinks can cause almost arbitrarily long lookups. 
-  */
--static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
-+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
-+                               struct lookup_intent *it)
- {
-       int err;
-       if (current->link_count >= 8)
-@@ -346,10 +379,12 @@
-       current->link_count++;
-       current->total_link_count++;
-       UPDATE_ATIME(dentry->d_inode);
-+      nd->intent = it;
-       err = dentry->d_inode->i_op->follow_link(dentry, nd);
-       current->link_count--;
-       return err;
- loop:
-+      intent_release(it);
-       path_release(nd);
-       return -ELOOP;
- }
-@@ -447,7 +482,8 @@
-  *
-  * We expect 'base' to be positive and a directory.
-  */
--int link_path_walk(const char * name, struct nameidata *nd)
-+int link_path_walk_it(const char *name, struct nameidata *nd,
-+                    struct lookup_intent *it)
- {
-       struct dentry *dentry;
-       struct inode *inode;
-@@ -524,12 +560,13 @@
-                               break;
-               }
-               /* This does the actual lookups.. */
--              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+              dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
-               if (!dentry) {
-                       err = -EWOULDBLOCKIO;
-                       if (atomic)
-                               break;
--                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+                      dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE,
-+                                           NULL);
-                       err = PTR_ERR(dentry);
-                       if (IS_ERR(dentry))
-                               break;
-@@ -547,7 +584,7 @@
-                       goto out_dput;
-               if (inode->i_op->follow_link) {
--                      err = do_follow_link(dentry, nd);
-+                      err = do_follow_link(dentry, nd, NULL);
-                       dput(dentry);
-                       if (err)
-                               goto return_err;
-@@ -563,7 +600,7 @@
-                       nd->dentry = dentry;
-               }
-               err = -ENOTDIR; 
--              if (!inode->i_op->lookup)
-+              if (!inode->i_op->lookup && !inode->i_op->lookup_it)
-                       break;
-               continue;
-               /* here ends the main loop */
-@@ -590,12 +627,12 @@
-                       if (err < 0)
-                               break;
-               }
--              dentry = cached_lookup(nd->dentry, &this, 0);
-+              dentry = cached_lookup(nd->dentry, &this, 0, it);
-               if (!dentry) {
-                       err = -EWOULDBLOCKIO;
-                       if (atomic)
-                               break;
--                      dentry = real_lookup(nd->dentry, &this, 0);
-+                      dentry = real_lookup(nd->dentry, &this, 0, it);
-                       err = PTR_ERR(dentry);
-                       if (IS_ERR(dentry))
-                               break;
-@@ -605,7 +642,7 @@
-               inode = dentry->d_inode;
-               if ((lookup_flags & LOOKUP_FOLLOW)
-                   && inode && inode->i_op && inode->i_op->follow_link) {
--                      err = do_follow_link(dentry, nd);
-+                      err = do_follow_link(dentry, nd, it);
-                       dput(dentry);
-                       if (err)
-                               goto return_err;
-@@ -619,7 +656,8 @@
-                       goto no_inode;
-               if (lookup_flags & LOOKUP_DIRECTORY) {
-                       err = -ENOTDIR; 
--                      if (!inode->i_op || !inode->i_op->lookup)
-+                      if (!inode->i_op ||
-+                          (!inode->i_op->lookup && !inode->i_op->lookup_it))
-                               break;
-               }
-               goto return_base;
-@@ -643,6 +681,32 @@
-                * Check the cached dentry for staleness.
-                */
-               dentry = nd->dentry;
-+              if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
-+                      err = -ESTALE;
-+                      if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
-+                              struct dentry *new;
-+                              err = permission(dentry->d_parent->d_inode,
-+                                               MAY_EXEC);
-+                              if (err)
-+                                      break;
-+                              new = real_lookup(dentry->d_parent,
-+                                                &dentry->d_name, 0, it);
-+                              if (IS_ERR(new)) {
-+                                      err = PTR_ERR(new);
-+                                      break;
-+                              }
-+                              d_invalidate(dentry);
-+                              dput(dentry);
-+                              nd->dentry = new;
-+                      }
-+                      if (!nd->dentry->d_inode)
-+                              goto no_inode;
-+                      if (lookup_flags & LOOKUP_DIRECTORY) {
-+                              err = -ENOTDIR; 
-+                              if (!nd->dentry->d_inode->i_op ||
-+                                  (!nd->dentry->d_inode->i_op->lookup &&
-+                                   !nd->dentry->d_inode->i_op->lookup_it))
-+                                      break;
-+                      }
-+              } else
-               if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
-                       err = -ESTALE;
-                       if (!dentry->d_op->d_revalidate(dentry, lookup_flags & LOOKUP_PARENT)) {
-@@ -656,15 +713,28 @@
-               dput(dentry);
-               break;
-       }
-+      if (err)
-+              intent_release(it);
-       path_release(nd);
- return_err:
-       return err;
- }
-+int link_path_walk(const char * name, struct nameidata *nd)
-+{
-+      return link_path_walk_it(name, nd, NULL);
-+}
-+
-+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
-+{
-+      current->total_link_count = 0;
-+      return link_path_walk_it(name, nd, it);
-+}
-+
- int path_walk(const char * name, struct nameidata *nd)
- {
-       current->total_link_count = 0;
--      return link_path_walk(name, nd);
-+      return link_path_walk_it(name, nd, NULL);
- }
- /* SMP-safe */
-@@ -753,6 +823,7 @@
- {
-       nd->last_type = LAST_ROOT; /* if there are only slashes... */
-       nd->flags = flags;
-+      nd->intent = NULL;
-       if (*name=='/')
-               return walk_init_root(name,nd);
-       read_lock(&current->fs->lock);
-@@ -767,7 +838,8 @@
-  * needs parent already locked. Doesn't follow mounts.
-  * SMP-safe.
-  */
--struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
-+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
-+                             struct lookup_intent *it)
- {
-       struct dentry * dentry;
-       struct inode *inode;
-@@ -790,13 +862,16 @@
-                       goto out;
-       }
--      dentry = cached_lookup(base, name, 0);
-+      dentry = cached_lookup(base, name, 0, it);
-       if (!dentry) {
-               struct dentry *new = d_alloc(base, name);
-               dentry = ERR_PTR(-ENOMEM);
-               if (!new)
-                       goto out;
-               lock_kernel();
-+              if (inode->i_op->lookup_it)
-+                      dentry = inode->i_op->lookup_it(inode, new, it, 0);
-+              else
-               dentry = inode->i_op->lookup(inode, new);
-               unlock_kernel();
-               if (!dentry)
-@@ -808,6 +883,12 @@
-       return dentry;
- }
-+struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
-+{
-+      return lookup_hash_it(name, base, NULL);
-+}
-+
-+
- /* SMP-safe */
- struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
- {
-@@ -829,7 +910,7 @@
-       }
-       this.hash = end_name_hash(hash);
--      return lookup_hash(&this, base);
-+      return lookup_hash_it(&this, base, NULL);
- access:
-       return ERR_PTR(-EACCES);
- }
-@@ -861,6 +942,23 @@
-       return err;
- }
-+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
-+                 struct lookup_intent *it)
-+{
-+      char *tmp;
-+      int err;
-+
-+      tmp = getname(name);
-+      err = PTR_ERR(tmp);
-+      if (!IS_ERR(tmp)) {
-+              err = 0;
-+              if (path_init(tmp, flags, nd))
-+                      err = path_walk_it(tmp, nd, it);
-+              putname(tmp);
-+      }
-+      return err;
-+}
-+
- /*
-  * It's inline, so penalty for filesystems that don't use sticky bit is
-  * minimal.
-@@ -958,7 +1056,8 @@
-       return retval;
- }
--int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
-+static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
-+                       struct lookup_intent *it)
- {
-       int error;
-@@ -971,12 +1070,15 @@
-               goto exit_lock;
-       error = -EACCES;        /* shouldn't it be ENOSYS? */
--      if (!dir->i_op || !dir->i_op->create)
-+      if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
-               goto exit_lock;
-       DQUOT_INIT(dir);
-       lock_kernel();
--      error = dir->i_op->create(dir, dentry, mode);
-+      if (dir->i_op->create_it)
-+              error = dir->i_op->create_it(dir, dentry, mode, it);
-+      else
-+              error = dir->i_op->create(dir, dentry, mode);
-       unlock_kernel();
- exit_lock:
-       up(&dir->i_zombie);
-@@ -985,6 +1087,11 @@
-       return error;
- }
-+int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
-+{
-+      return vfs_create_it(dir, dentry, mode, NULL);
-+}
-+
- /*
-  *    open_namei()
-  *
-@@ -999,7 +1106,8 @@
-  * for symlinks (where the permissions are checked later).
-  * SMP-safe
-  */
--int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
-+int open_namei_it(const char *pathname, int flag, int mode,
-+                struct nameidata *nd, struct lookup_intent *it)
- {
-       int acc_mode, error = 0;
-       struct inode *inode;
-@@ -1009,12 +1117,14 @@
-       acc_mode = ACC_MODE(flag);
-+      if (it)
-+              it->it_flags = flag;
-       /*
-        * The simplest case - just a plain lookup.
-        */
-       if (!(flag & O_CREAT)) {
-               if (path_init(pathname, lookup_flags(flag), nd))
--                      error = path_walk(pathname, nd);
-+                      error = path_walk_it(pathname, nd, it);
-               if (error)
-                       return error;
-               dentry = nd->dentry;
-@@ -1024,6 +1134,10 @@
-       /*
-        * Create - we need to know the parent.
-        */
-+      if (it) {
-+              it->it_create_mode = mode;
-+              it->it_op |= IT_CREAT;
-+      }
-       if (path_init(pathname, LOOKUP_PARENT, nd))
-               error = path_walk(pathname, nd);
-       if (error)
-@@ -1040,7 +1154,7 @@
-       dir = nd->dentry;
-       down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
- do_last:
-       error = PTR_ERR(dentry);
-@@ -1049,11 +1163,13 @@
-               goto exit;
-       }
-+      it->it_create_mode = mode;
-       /* Negative dentry, just create the file */
-       if (!dentry->d_inode) {
-               if (!IS_POSIXACL(dir->d_inode))
-                       mode &= ~current->fs->umask;
--              error = vfs_create(dir->d_inode, dentry, mode);
-+              error = vfs_create_it(dir->d_inode, dentry,
-+                                    mode & ~current->fs->umask, it);
-               up(&dir->d_inode->i_sem);
- #ifndef DENTRY_WASTE_RAM
-               if (error)
-@@ -1161,7 +1277,7 @@
-               if (!error) {
-                       DQUOT_INIT(inode);
-                       
--                      error = do_truncate(dentry, 0);
-+                      error = do_truncate(dentry, 0, 1);
-               }
-               put_write_access(inode);
-               if (error)
-@@ -1173,8 +1289,10 @@
-       return 0;
- exit_dput:
-+      intent_release(it);
-       dput(dentry);
- exit:
-+      intent_release(it);
-       path_release(nd);
-       return error;
-@@ -1193,7 +1311,10 @@
-        * are done. Procfs-like symlinks just set LAST_BIND.
-        */
-       UPDATE_ATIME(dentry->d_inode);
-+      nd->intent = it;
-       error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+      if (error)
-+              intent_release(it);
-       dput(dentry);
-       if (error)
-               return error;
-@@ -1215,13 +1336,20 @@
-       }
-       dir = nd->dentry;
-       down(&dir->d_inode->i_sem);
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
-       putname(nd->last.name);
-       goto do_last;
- }
-+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd)
-+{
-+      return open_namei_it(pathname, flag, mode, nd, NULL);
-+}
-+
-+
- /* SMP-safe */
--static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
-+static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
-+                                  struct lookup_intent *it)
- {
-       struct dentry *dentry;
-@@ -1229,7 +1357,7 @@
-       dentry = ERR_PTR(-EEXIST);
-       if (nd->last_type != LAST_NORM)
-               goto fail;
--      dentry = lookup_hash(&nd->last, nd->dentry);
-+      dentry = lookup_hash_it(&nd->last, nd->dentry, it);
-       if (IS_ERR(dentry))
-               goto fail;
-       if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1286,7 +1414,20 @@
-               error = path_walk(tmp, &nd);
-       if (error)
-               goto out;
--      dentry = lookup_create(&nd, 0);
-+
-+      if (nd.last_type != LAST_NORM) {
-+              error = -EEXIST;
-+              goto out2;
-+      }
-+      if (nd.dentry->d_inode->i_op->mknod_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->mknod_raw(&nd, mode, dev);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto out2;
-+      }
-+
-+      dentry = lookup_create(&nd, 0, NULL);
-       error = PTR_ERR(dentry);
-       if (!IS_POSIXACL(nd.dentry->d_inode))
-@@ -1308,6 +1445,7 @@
-               dput(dentry);
-       }
-       up(&nd.dentry->d_inode->i_sem);
-+out2:
-       path_release(&nd);
- out:
-       putname(tmp);
-@@ -1356,7 +1494,18 @@
-                       error = path_walk(tmp, &nd);
-               if (error)
-                       goto out;
--              dentry = lookup_create(&nd, 1);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out2;
-+              }
-+              if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir_raw(&nd, mode);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
-+              dentry = lookup_create(&nd, 1, NULL);
-               error = PTR_ERR(dentry);
-               if (!IS_ERR(dentry)) {
-                       if (!IS_POSIXACL(nd.dentry->d_inode))
-@@ -1365,6 +1510,7 @@
-                       dput(dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-+out2:
-               path_release(&nd);
- out:
-               putname(tmp);
-@@ -1466,8 +1612,16 @@
-                       error = -EBUSY;
-                       goto exit1;
-       }
-+      if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              error = op->rmdir_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-       down(&nd.dentry->d_inode->i_sem);
--      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
-       error = PTR_ERR(dentry);
-       if (!IS_ERR(dentry)) {
-               error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1526,8 +1680,15 @@
-       error = -EISDIR;
-       if (nd.last_type != LAST_NORM)
-               goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-       down(&nd.dentry->d_inode->i_sem);
--      dentry = lookup_hash(&nd.last, nd.dentry);
-+      dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
-       error = PTR_ERR(dentry);
-       if (!IS_ERR(dentry)) {
-               /* Why not before? Because we want correct error value */
-@@ -1595,15 +1756,27 @@
-                       error = path_walk(to, &nd);
-               if (error)
-                       goto out;
--              dentry = lookup_create(&nd, 0);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out2;
-+              }
-+              if (nd.dentry->d_inode->i_op->symlink_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->symlink_raw(&nd, from);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
-+              dentry = lookup_create(&nd, 0, NULL);
-               error = PTR_ERR(dentry);
-               if (!IS_ERR(dentry)) {
-                       error = vfs_symlink(nd.dentry->d_inode, dentry, from);
-                       dput(dentry);
-               }
-               up(&nd.dentry->d_inode->i_sem);
-+      out2:
-               path_release(&nd);
--out:
-+      out:
-               putname(to);
-       }
-       putname(from);
-@@ -1686,7 +1855,14 @@
-               error = -EXDEV;
-               if (old_nd.mnt != nd.mnt)
-                       goto out_release;
--              new_dentry = lookup_create(&nd, 0);
-+              if (nd.last_type != LAST_NORM) {
-+                      error = -EEXIST;
-+                      goto out_release;
-+              }
-+              if (nd.dentry->d_inode->i_op->link_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->link_raw(&old_nd, &nd);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out_release;
-+              }
-+              new_dentry = lookup_create(&nd, 0, NULL);
-               error = PTR_ERR(new_dentry);
-               if (!IS_ERR(new_dentry)) {
-                       error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1732,7 +1908,7 @@
-  *       locking].
-  */
- int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                 struct inode *new_dir, struct dentry *new_dentry)
- {
-       int error;
-       struct inode *target;
-@@ -1811,7 +1987,7 @@
- }
- int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
--             struct inode *new_dir, struct dentry *new_dentry)
-+                   struct inode *new_dir, struct dentry *new_dentry)
- {
-       int error;
-@@ -1902,9 +2078,18 @@
-       if (newnd.last_type != LAST_NORM)
-               goto exit2;
-+      if (old_dir->d_inode->i_op->rename_raw) {
-+              lock_kernel();
-+              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+              unlock_kernel();
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit2;
-+      }
-+
-       double_lock(new_dir, old_dir);
--      old_dentry = lookup_hash(&oldnd.last, old_dir);
-+      old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
-       error = PTR_ERR(old_dentry);
-       if (IS_ERR(old_dentry))
-               goto exit3;
-@@ -1920,16 +2105,16 @@
-               if (newnd.last.name[newnd.last.len])
-                       goto exit4;
-       }
--      new_dentry = lookup_hash(&newnd.last, new_dir);
-+      new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
-       error = PTR_ERR(new_dentry);
-       if (IS_ERR(new_dentry))
-               goto exit4;
-+
-       lock_kernel();
-       error = vfs_rename(old_dir->d_inode, old_dentry,
-                                  new_dir->d_inode, new_dentry);
-       unlock_kernel();
--
-       dput(new_dentry);
- exit4:
-       dput(old_dentry);
-@@ -1980,20 +2165,26 @@
- }
- static inline int
--__vfs_follow_link(struct nameidata *nd, const char *link)
-+__vfs_follow_link(struct nameidata *nd, const char *link,
-+                struct lookup_intent *it)
- {
-       int res = 0;
-       char *name;
-       if (IS_ERR(link))
-               goto fail;
-+      if (it == NULL)
-+              it = nd->intent;
-+      else if (it != nd->intent)
-+              printk("it != nd->intent: tell phil@clusterfs.com\n");
-+
-       if (*link == '/') {
-               path_release(nd);
-               if (!walk_init_root(link, nd))
-                       /* weird __emul_prefix() stuff did it */
-                       goto out;
-       }
--      res = link_path_walk(link, nd);
-+      res = link_path_walk_it(link, nd, it);
- out:
-       if (current->link_count || res || nd->last_type!=LAST_NORM)
-               return res;
-@@ -2015,7 +2206,13 @@
- int vfs_follow_link(struct nameidata *nd, const char *link)
- {
--      return __vfs_follow_link(nd, link);
-+      return __vfs_follow_link(nd, link, NULL);
-+}
-+
-+int vfs_follow_link_it(struct nameidata *nd, const char *link,
-+                     struct lookup_intent *it)
-+{
-+      return __vfs_follow_link(nd, link, it);
- }
- /* get the link contents into pagecache */
-@@ -2057,7 +2254,7 @@
- {
-       struct page *page = NULL;
-       char *s = page_getlink(dentry, &page);
--      int res = __vfs_follow_link(nd, s);
-+      int res = __vfs_follow_link(nd, s, NULL);
-       if (page) {
-               kunmap(page);
-               page_cache_release(page);
-Index: linux-2.4.19.SuSE/fs/namespace.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/namespace.c      Mon Jan 27 05:08:07 2003
-+++ linux-2.4.19.SuSE/fs/namespace.c   Sat Nov 15 17:56:42 2003
-@@ -97,6 +97,7 @@
- {
-       old_nd->dentry = mnt->mnt_mountpoint;
-       old_nd->mnt = mnt->mnt_parent;
-+      UNPIN(old_nd->dentry, old_nd->mnt, 1);
-       mnt->mnt_parent = mnt;
-       mnt->mnt_mountpoint = mnt->mnt_root;
-       list_del_init(&mnt->mnt_child);
-@@ -108,6 +109,7 @@
- {
-       mnt->mnt_parent = mntget(nd->mnt);
-       mnt->mnt_mountpoint = dget(nd->dentry);
-+      PIN(nd->dentry, nd->mnt, 1);
-       list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
-       list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
-       nd->dentry->d_mounted++;
-@@ -491,15 +493,18 @@
- {
-       struct nameidata old_nd;
-       struct vfsmount *mnt = NULL;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int err = mount_is_safe(nd);
-       if (err)
-               return err;
-       if (!old_name || !*old_name)
-               return -EINVAL;
-       if (path_init(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd))
--              err = path_walk(old_name, &old_nd);
--      if (err)
-+              err = path_walk_it(old_name, &old_nd, &it);
-+      if (err) {
-+              intent_release(&it);
-               return err;
-+      }
-       down_write(&current->namespace->sem);
-       err = -EINVAL;
-@@ -522,6 +527,7 @@
-       }
-       up_write(&current->namespace->sem);
-+      intent_release(&it);
-       path_release(&old_nd);
-       return err;
- }
-@@ -725,6 +731,7 @@
-                 unsigned long flags, void *data_page)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int retval = 0;
-       int mnt_flags = 0;
-@@ -750,9 +757,11 @@
-       /* ... and get the mountpoint */
-       if (path_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd))
--              retval = path_walk(dir_name, &nd);
--      if (retval)
-+              retval = path_walk_it(dir_name, &nd, &it);
-+      if (retval) {
-+              intent_release(&it);
-               return retval;
-+      }
-       if (flags & MS_REMOUNT)
-               retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
-@@ -764,6 +773,8 @@
-       else
-               retval = do_add_mount(&nd, type_page, flags, mnt_flags,
-                                     dev_name, data_page);
-+
-+      intent_release(&it);
-       path_release(&nd);
-       return retval;
- }
-@@ -929,6 +940,8 @@
- {
-       struct vfsmount *tmp;
-       struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
-+      struct lookup_intent new_it = { .it_op = IT_GETATTR };
-+      struct lookup_intent old_it = { .it_op = IT_GETATTR };
-       char *name;
-       int error;
-@@ -943,7 +956,7 @@
-               goto out0;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
--              error = path_walk(name, &new_nd);
-+              error = path_walk_it(name, &new_nd, &new_it);
-       putname(name);
-       if (error)
-               goto out0;
-@@ -957,7 +970,7 @@
-               goto out1;
-       error = 0;
-       if (path_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
--              error = path_walk(name, &old_nd);
-+              error = path_walk_it(name, &old_nd, &old_it);
-       putname(name);
-       if (error)
-               goto out1;
-@@ -1013,8 +1026,10 @@
-       up(&old_nd.dentry->d_inode->i_zombie);
-       up_write(&current->namespace->sem);
-       path_release(&user_nd);
-+      intent_release(&old_it);
-       path_release(&old_nd);
- out1:
-+      intent_release(&new_it);
-       path_release(&new_nd);
- out0:
-       unlock_kernel();
-Index: linux-2.4.19.SuSE/fs/open.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/open.c   Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/open.c        Sat Nov 15 17:43:27 2003
-@@ -19,6 +19,8 @@
- #include <asm/uaccess.h>
- #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-+extern int path_walk_it(const char *name, struct nameidata *nd,
-+                      struct lookup_intent *it);
- int vfs_statfs(struct super_block *sb, struct statfs *buf)
- {
-@@ -95,9 +97,10 @@
-       write_unlock(&files->file_lock);
- }
--int do_truncate(struct dentry *dentry, loff_t length)
-+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
- {
-       struct inode *inode = dentry->d_inode;
-+      struct inode_operations *op = dentry->d_inode->i_op;
-       int error;
-       struct iattr newattrs;
-@@ -108,7 +111,13 @@
-       down(&inode->i_sem);
-       newattrs.ia_size = length;
-       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
--      error = notify_change(dentry, &newattrs);
-+      if (called_from_open)
-+              newattrs.ia_valid |= ATTR_FROM_OPEN;
-+      if (op->setattr_raw) {
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+      } else
-+              error = notify_change(dentry, &newattrs);
-       up(&inode->i_sem);
-       return error;
- }
-@@ -118,12 +127,13 @@
-       struct nameidata nd;
-       struct inode * inode;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       error = -EINVAL;
-       if (length < 0) /* sorry, but loff_t says... */
-               goto out;
--      error = user_path_walk(path, &nd);
-+      error = user_path_walk_it(path, &nd, &it);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-@@ -163,11 +173,13 @@
-       error = locks_verify_truncate(inode, NULL, length);
-       if (!error) {
-               DQUOT_INIT(inode);
--              error = do_truncate(nd.dentry, length);
-+              intent_release(&it);
-+              error = do_truncate(nd.dentry, length, 0);
-       }
-       put_write_access(inode);
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
-@@ -215,7 +227,7 @@
-       error = locks_verify_truncate(inode, file, length);
-       if (!error)
--              error = do_truncate(dentry, length);
-+              error = do_truncate(dentry, length, 0);
- out_putf:
-       fput(file);
- out:
-@@ -260,11 +272,13 @@
-       struct inode * inode;
-       struct iattr newattrs;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, NULL);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-+      /* this is safe without a Lustre lock because it only depends
-+         on the super block */
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto dput_and_out;
-@@ -279,11 +293,25 @@
-                       goto dput_and_out;
-               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
--      } else {
-+      }
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto dput_and_out;
-+      }
-+
-+      error = -EPERM;
-+      if (!times) {
-               if (current->fsuid != inode->i_uid &&
-                   (error = permission(inode,MAY_WRITE)) != 0)
-                       goto dput_and_out;
-       }
-+
-       error = notify_change(nd.dentry, &newattrs);
- dput_and_out:
-       path_release(&nd);
-@@ -304,12 +332,14 @@
-       struct inode * inode;
-       struct iattr newattrs;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, NULL);
-       if (error)
-               goto out;
-       inode = nd.dentry->d_inode;
-+      /* this is safe without a Lustre lock because it only depends
-+         on the super block */
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto dput_and_out;
-@@ -324,7 +354,20 @@
-               newattrs.ia_atime = times[0].tv_sec;
-               newattrs.ia_mtime = times[1].tv_sec;
-               newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
--      } else {
-+      }
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto dput_and_out;
-+      }
-+
-+      error = -EPERM;
-+      if (!utimes) {
-               if (current->fsuid != inode->i_uid &&
-                   (error = permission(inode,MAY_WRITE)) != 0)
-                       goto dput_and_out;
-@@ -347,6 +390,7 @@
-       int old_fsuid, old_fsgid;
-       kernel_cap_t old_cap;
-       int res;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
-               return -EINVAL;
-@@ -364,13 +408,14 @@
-       else
-               current->cap_effective = current->cap_permitted;
--      res = user_path_walk(filename, &nd);
-+      res = user_path_walk_it(filename, &nd, &it);
-       if (!res) {
-               res = permission(nd.dentry->d_inode, mode);
-               /* SuS v2 requires we report a read only fs too */
-               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
-                  && !special_file(nd.dentry->d_inode->i_mode))
-                       res = -EROFS;
-+              intent_release(&it);
-               path_release(&nd);
-       }
-@@ -386,6 +431,7 @@
-       int error;
-       struct nameidata nd;
-       char *name;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       name = getname(filename);
-       error = PTR_ERR(name);
-@@ -394,7 +440,7 @@
-       error = 0;
-       if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
--              error = path_walk(name, &nd);
-+              error = path_walk_it(name, &nd, &it);
-       putname(name);
-       if (error)
-               goto out;
-@@ -406,6 +452,7 @@
-       set_fs_pwd(current->fs, nd.mnt, nd.dentry);
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
-@@ -446,6 +493,7 @@
-       int error;
-       struct nameidata nd;
-       char *name;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       name = getname(filename);
-       error = PTR_ERR(name);
-@@ -454,7 +502,7 @@
-       path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
-                     LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
--      error = path_walk(name, &nd);   
-+      error = path_walk_it(name, &nd, &it);
-       putname(name);
-       if (error)
-               goto out;
-@@ -471,39 +519,56 @@
-       set_fs_altroot();
-       error = 0;
- dput_and_out:
-+      intent_release(&it);
-       path_release(&nd);
- out:
-       return error;
- }
--asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
-+int chmod_common(struct dentry *dentry, mode_t mode)
- {
--      struct inode * inode;
--      struct dentry * dentry;
--      struct file * file;
--      int err = -EBADF;
-+      struct inode *inode = dentry->d_inode;
-       struct iattr newattrs;
-+      int err = -EROFS;
--      file = fget(fd);
--      if (!file)
-+      if (IS_RDONLY(inode))
-               goto out;
--      dentry = file->f_dentry;
--      inode = dentry->d_inode;
-+      if (inode->i_op->setattr_raw) {
-+              newattrs.ia_mode = mode;
-+              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-+              newattrs.ia_valid |= ATTR_RAW;
-+              err = inode->i_op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (err != -EOPNOTSUPP)
-+                      goto out;
-+      }
--      err = -EROFS;
--      if (IS_RDONLY(inode))
--              goto out_putf;
-       err = -EPERM;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
--              goto out_putf;
-+              goto out;
-+
-       if (mode == (mode_t) -1)
-               mode = inode->i_mode;
-       newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
-       newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       err = notify_change(dentry, &newattrs);
--out_putf:
-+out:
-+      return err;
-+}
-+
-+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
-+{
-+      struct file * file;
-+      int err = -EBADF;
-+
-+      file = fget(fd);
-+      if (!file)
-+              goto out;
-+
-+      err = chmod_common(file->f_dentry, mode);
-+
-       fput(file);
- out:
-       return err;
-@@ -512,30 +577,14 @@
- asmlinkage long sys_chmod(const char * filename, mode_t mode)
- {
-       struct nameidata nd;
--      struct inode * inode;
-       int error;
--      struct iattr newattrs;
-       error = user_path_walk(filename, &nd);
-       if (error)
-               goto out;
--      inode = nd.dentry->d_inode;
--
--      error = -EROFS;
--      if (IS_RDONLY(inode))
--              goto dput_and_out;
--      error = -EPERM;
--      if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
--              goto dput_and_out;
-+      error = chmod_common(nd.dentry, mode);
--      if (mode == (mode_t) -1)
--              mode = inode->i_mode;
--      newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
--      newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
--      error = notify_change(nd.dentry, &newattrs);
--
--dput_and_out:
-       path_release(&nd);
- out:
-       return error;
-@@ -555,6 +604,20 @@
-       error = -EROFS;
-       if (IS_RDONLY(inode))
-               goto out;
-+
-+      if (inode->i_op->setattr_raw) {
-+              struct inode_operations *op = dentry->d_inode->i_op;
-+
-+              newattrs.ia_uid = user;
-+              newattrs.ia_gid = group;
-+              newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
-+              newattrs.ia_valid |= ATTR_RAW;
-+              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      return error;
-+      }
-+
-       error = -EPERM;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               goto out;
-@@ -659,6 +722,7 @@
- {
-       int namei_flags, error;
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_OPEN };
-       namei_flags = flags;
-       if ((namei_flags+1) & O_ACCMODE)
-@@ -666,14 +730,15 @@
-       if (namei_flags & O_TRUNC)
-               namei_flags |= 2;
--      error = open_namei(filename, namei_flags, mode, &nd);
--      if (!error)
--              return dentry_open(nd.dentry, nd.mnt, flags);
-+      error = open_namei_it(filename, namei_flags, mode, &nd, &it);
-+      if (error)
-+              return ERR_PTR(error);
--      return ERR_PTR(error);
-+      return dentry_open_it(nd.dentry, nd.mnt, flags, &it);
- }
--struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it)
- {
-       struct file * f;
-       struct inode *inode;
-@@ -710,7 +775,9 @@
-       }
-       if (f->f_op && f->f_op->open) {
-+              f->f_it = it;
-               error = f->f_op->open(inode,f);
-+              f->f_it = NULL;
-               if (error)
-                       goto cleanup_all;
-       }
-@@ -722,6 +789,7 @@
-                                     !inode->i_mapping->a_ops->direct_IO))
-               goto cleanup_all;
-+      intent_release(it);
-       return f;
- cleanup_all:
-@@ -736,11 +804,17 @@
- cleanup_file:
-       put_filp(f);
- cleanup_dentry:
-+      intent_release(it);
-       dput(dentry);
-       mntput(mnt);
-       return ERR_PTR(error);
- }
-+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+{
-+      return dentry_open_it(dentry, mnt, flags, NULL);
-+}
-+
- /*
-  * Find an empty file descriptor entry, and mark it busy.
-  */
-Index: linux-2.4.19.SuSE/fs/stat.c
-===================================================================
---- linux-2.4.19.SuSE.orig/fs/stat.c   Mon Jan 27 05:08:00 2003
-+++ linux-2.4.19.SuSE/fs/stat.c        Sat Nov 15 17:29:03 2003
-@@ -17,10 +17,16 @@
-  * Revalidate the inode. This is required for proper NFS attribute caching.
-  */
- static __inline__ int
--do_revalidate(struct dentry *dentry)
-+do_revalidate(struct dentry *dentry, struct lookup_intent *it)
- {
-       struct inode * inode = dentry->d_inode;
--      if (inode->i_op && inode->i_op->revalidate)
-+      if (inode->i_op && inode->i_op->revalidate_it)
-+              return inode->i_op->revalidate_it(dentry, it);
-+      else if (inode->i_op && inode->i_op->revalidate)
-               return inode->i_op->revalidate(dentry);
-       return 0;
- }
-@@ -141,13 +145,15 @@
- asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -157,13 +163,15 @@
- asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -178,13 +186,15 @@
- asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -195,13 +205,15 @@
- asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
- {
-       struct nameidata nd;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
-       int error;
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -222,7 +234,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_old_stat(dentry->d_inode, statbuf);
-               fput(f);
-@@ -241,7 +253,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_new_stat(dentry->d_inode, statbuf);
-               fput(f);
-@@ -263,7 +275,7 @@
-               error = -EINVAL;
-               if (inode->i_op && inode->i_op->readlink &&
--                  !(error = do_revalidate(nd.dentry))) {
-+                  !(error = do_revalidate(nd.dentry, NULL))) {
-                       UPDATE_ATIME(inode);
-                       error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
-               }
-@@ -339,12 +351,14 @@
- {
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
--      error = user_path_walk(filename, &nd);
-+      error = user_path_walk_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -354,12 +368,14 @@
- {
-       struct nameidata nd;
-       int error;
-+      struct lookup_intent it = { .it_op = IT_GETATTR };
--      error = user_path_walk_link(filename, &nd);
-+      error = user_path_walk_link_it(filename, &nd, &it);
-       if (!error) {
--              error = do_revalidate(nd.dentry);
-+              error = do_revalidate(nd.dentry, &it);
-               if (!error)
-                       error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+              intent_release(&it);
-               path_release(&nd);
-       }
-       return error;
-@@ -374,7 +390,7 @@
-       if (f) {
-               struct dentry * dentry = f->f_dentry;
--              err = do_revalidate(dentry);
-+              err = do_revalidate(dentry, NULL);
-               if (!err)
-                       err = cp_new_stat64(dentry->d_inode, statbuf);
-               fput(f);
-Index: linux-2.4.19.SuSE/include/linux/dcache.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/dcache.h      Mon Jan 27 05:13:15 2003
-+++ linux-2.4.19.SuSE/include/linux/dcache.h   Sat Nov 15 17:35:46 2003
-@@ -5,6 +5,51 @@
- #include <asm/atomic.h>
- #include <linux/mount.h>
-+#include <linux/string.h>
-+
-+#define IT_OPEN     0x0001
-+#define IT_CREAT    0x0002
-+#define IT_READDIR  0x0004
-+#define IT_GETATTR  0x0008
-+#define IT_LOOKUP   0x0010
-+#define IT_UNLINK   0x0020
-+#define IT_GETXATTR 0x0040
-+#define IT_EXEC     0x0080
-+#define IT_PIN      0x0100
-+
-+#define IT_FL_LOCKED   0x0001
-+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
-+
-+#define INTENT_MAGIC 0x19620323
-+
-+
-+struct lustre_intent_data {
-+      int       it_disposition;
-+      int       it_status;
-+      __u64     it_lock_handle;
-+      void     *it_data;
-+      int       it_lock_mode;
-+      int it_int_flags;
-+};
-+struct lookup_intent {
-+      int     it_magic;
-+      void    (*it_op_release)(struct lookup_intent *);
-+      int     it_op;
-+      int     it_flags;
-+      int     it_create_mode;
-+      union {
-+              struct lustre_intent_data lustre;
-+      } d;
-+};
-+
-+static inline void intent_init(struct lookup_intent *it, int op, int flags)
-+{
-+      memset(it, 0, sizeof(*it));
-+      it->it_magic = INTENT_MAGIC;
-+      it->it_op = op;
-+      it->it_flags = flags;
-+}
-+
- /*
-  * linux/include/linux/dcache.h
-@@ -92,8 +137,22 @@
-       int (*d_delete)(struct dentry *);
-       void (*d_release)(struct dentry *);
-       void (*d_iput)(struct dentry *, struct inode *);
-+      int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
-+      void (*d_pin)(struct dentry *, struct vfsmount * , int);
-+      void (*d_unpin)(struct dentry *, struct vfsmount *, int);
- };
-+#define PIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_pin) \
-+                              de->d_op->d_pin(de, mnt, flag);
-+#define UNPIN(de,mnt,flag)  if (de && de->d_op && de->d_op->d_unpin) \
-+                              de->d_op->d_unpin(de, mnt, flag);
-+
-+
-+/* defined in fs/namei.c */
-+extern void intent_release(struct lookup_intent *it);
-+/* defined in fs/dcache.c */
-+extern void __d_rehash(struct dentry * entry, int lock);
-+
- /* the dentry parameter passed to d_hash and d_compare is the parent
-  * directory of the entries to be compared. It is used in case these
-  * functions need any directory specific information for determining
-@@ -125,6 +184,7 @@
-                                        * s_nfsd_free_path semaphore will be down
-                                        */
- #define DCACHE_REFERENCED     0x0008  /* Recently used, don't discard. */
-+#define DCACHE_LUSTRE_INVALID 0x0010  /* Lustre invalidated */
- extern spinlock_t dcache_lock;
-Index: linux-2.4.19.SuSE/include/linux/fs.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/fs.h  Sat Nov 15 17:25:06 2003
-+++ linux-2.4.19.SuSE/include/linux/fs.h       Sat Nov 15 17:29:03 2003
-@@ -73,6 +73,7 @@
- #define FMODE_READ 1
- #define FMODE_WRITE 2
-+#define FMODE_EXEC 4
- #define READ 0
- #define WRITE 1
-@@ -363,6 +364,9 @@
- #define ATTR_MTIME_SET        256
- #define ATTR_FORCE    512     /* Not a change, but a change it */
- #define ATTR_ATTR_FLAG        1024
-+#define ATTR_RAW      0x0800  /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN        0x1000  /* called from open path, ie O_TRUNC */
-+#define ATTR_CTIME_SET        0x2000
- /*
-  * This is the Inode Attributes structure, used for notify_change().  It
-@@ -507,6 +511,7 @@
-       struct pipe_inode_info  *i_pipe;
-       struct block_device     *i_bdev;
-       struct char_device      *i_cdev;
-+      void                    *i_filterdata;
-       unsigned long           i_dnotify_mask; /* Directory notify events */
-       struct dnotify_struct   *i_dnotify; /* for directory notifications */
-@@ -669,6 +674,7 @@
-       /* needed for tty driver, and maybe others */
-       void                    *private_data;
-+      struct lookup_intent    *f_it;
-       /* preallocated helper kiobuf to speedup O_DIRECT */
-       struct kiobuf           *f_iobuf;
-@@ -799,6 +805,7 @@
-       struct qstr last;
-       unsigned int flags;
-       int last_type;
-+      struct lookup_intent *intent;
- };
- #define DQUOT_USR_ENABLED     0x01            /* User diskquotas enabled */
-@@ -947,7 +954,8 @@
- extern int __vfs_rmdir(struct inode *, struct dentry *);
- extern int vfs_rmdir(struct inode *, struct dentry *);
- extern int vfs_unlink(struct inode *, struct dentry *);
--extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
-+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+             struct inode *new_dir, struct dentry *new_dentry);
- /*
-  * File types
-@@ -1020,21 +1028,32 @@
- struct inode_operations {
-       int (*create) (struct inode *,struct dentry *,int);
-+      int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
-       struct dentry * (*lookup) (struct inode *,struct dentry *);
-+      struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
-       int (*link) (struct dentry *,struct inode *,struct dentry *);
-+      int (*link_raw) (struct nameidata *,struct nameidata *);
-       int (*unlink) (struct inode *,struct dentry *);
-+      int (*unlink_raw) (struct nameidata *);
-       int (*symlink) (struct inode *,struct dentry *,const char *);
-+      int (*symlink_raw) (struct nameidata *,const char *);
-       int (*mkdir) (struct inode *,struct dentry *,int);
-+      int (*mkdir_raw) (struct nameidata *,int);
-       int (*rmdir) (struct inode *,struct dentry *);
-+      int (*rmdir_raw) (struct nameidata *);
-       int (*mknod) (struct inode *,struct dentry *,int,int);
-+      int (*mknod_raw) (struct nameidata *,int,dev_t);
-       int (*rename) (struct inode *, struct dentry *,
-                       struct inode *, struct dentry *);
-+      int (*rename_raw) (struct nameidata *, struct nameidata *);
-       int (*readlink) (struct dentry *, char *,int);
-       int (*follow_link) (struct dentry *, struct nameidata *);
-       void (*truncate) (struct inode *);
-       int (*permission) (struct inode *, int);
-       int (*revalidate) (struct dentry *);
-+      int (*revalidate_it) (struct dentry *, struct lookup_intent *);
-       int (*setattr) (struct dentry *, struct iattr *);
-+      int (*setattr_raw) (struct inode *, struct iattr *);
-       int (*getattr) (struct dentry *, struct iattr *);
-       int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
-       ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1244,10 +1263,14 @@
- asmlinkage long sys_open(const char *, int, int);
- asmlinkage long sys_close(unsigned int);      /* yes, it's really unsigned */
--extern int do_truncate(struct dentry *, loff_t start);
-+extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
- extern struct file *filp_open(const char *, int, int);
- extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
-+extern int open_namei_it(const char *filename, int namei_flags, int mode,
-+                       struct nameidata *nd, struct lookup_intent *it);
-+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
-+                          int flags, struct lookup_intent *it);
- extern int filp_close(struct file *, fl_owner_t id);
- extern char * getname(const char *);
-@@ -1515,6 +1538,7 @@
- extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
- extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
-+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
- extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
- extern int FASTCALL(path_walk(const char *, struct nameidata *));
- extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-@@ -1526,6 +1550,8 @@
- extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
- #define user_path_walk(name,nd)        __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
- #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
-+#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
-+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
- extern void iput(struct inode *);
- extern void force_delete(struct inode *);
-@@ -1646,6 +1672,8 @@
- extern int vfs_readlink(struct dentry *, char *, int, const char *);
- extern int vfs_follow_link(struct nameidata *, const char *);
-+extern int vfs_follow_link_it(struct nameidata *, const char *,
-+                            struct lookup_intent *it);
- extern int page_readlink(struct dentry *, char *, int);
- extern int page_follow_link(struct dentry *, struct nameidata *);
- extern struct inode_operations page_symlink_inode_operations;
-Index: linux-2.4.19.SuSE/include/linux/fs_struct.h
-===================================================================
---- linux-2.4.19.SuSE.orig/include/linux/fs_struct.h   Fri Jul 13 15:10:44 2001
-+++ linux-2.4.19.SuSE/include/linux/fs_struct.h        Sat Nov 15 17:29:03 2003
-@@ -34,10 +34,12 @@
-       write_lock(&fs->lock);
-       old_root = fs->root;
-       old_rootmnt = fs->rootmnt;
-+      PIN(dentry, mnt, 1);
-       fs->rootmnt = mntget(mnt);
-       fs->root = dget(dentry);
-       write_unlock(&fs->lock);
-       if (old_root) {
-+              UNPIN(old_root, old_rootmnt, 1);
-               dput(old_root);
-               mntput(old_rootmnt);
-       }
-@@ -57,10 +59,12 @@
-       write_lock(&fs->lock);
-       old_pwd = fs->pwd;
-       old_pwdmnt = fs->pwdmnt;
-+      PIN(dentry, mnt, 0);
-       fs->pwdmnt = mntget(mnt);
-       fs->pwd = dget(dentry);
-       write_unlock(&fs->lock);
-       if (old_pwd) {
-+              UNPIN(old_pwd, old_pwdmnt, 0);
-               dput(old_pwd);
-               mntput(old_pwdmnt);
-       }
-Index: linux-2.4.19.SuSE/kernel/exit.c
-===================================================================
---- linux-2.4.19.SuSE.orig/kernel/exit.c       Mon Jan 27 05:08:16 2003
-+++ linux-2.4.19.SuSE/kernel/exit.c    Sat Nov 15 17:29:03 2003
-@@ -288,11 +288,14 @@
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-+              UNPIN(fs->pwd, fs->pwdmnt, 0);
-+              UNPIN(fs->root, fs->rootmnt, 1);
-               dput(fs->root);
-               mntput(fs->rootmnt);
-               dput(fs->pwd);
-               mntput(fs->pwdmnt);
-               if (fs->altroot) {
-+                      UNPIN(fs->altroot, fs->altrootmnt, 1);
-                       dput(fs->altroot);
-                       mntput(fs->altrootmnt);
-               }
-Index: linux-2.4.19.SuSE/kernel/fork.c
-===================================================================
---- linux-2.4.19.SuSE.orig/kernel/fork.c       Mon Jan 27 05:08:56 2003
-+++ linux-2.4.19.SuSE/kernel/fork.c    Sat Nov 15 17:29:03 2003
-@@ -454,10 +454,13 @@
-               fs->umask = old->umask;
-               read_lock(&old->lock);
-               fs->rootmnt = mntget(old->rootmnt);
-+              PIN(old->pwd, old->pwdmnt, 0);
-+              PIN(old->root, old->rootmnt, 1);
-               fs->root = dget(old->root);
-               fs->pwdmnt = mntget(old->pwdmnt);
-               fs->pwd = dget(old->pwd);
-               if (old->altroot) {
-+                      PIN(old->altroot, old->altrootmnt, 1);
-                       fs->altrootmnt = mntget(old->altrootmnt);
-                       fs->altroot = dget(old->altroot);
-               } else {
-Index: linux-2.4.19.SuSE/kernel/ksyms.c
-===================================================================
---- linux-2.4.19.SuSE.orig/kernel/ksyms.c      Sat Nov 15 17:24:46 2003
-+++ linux-2.4.19.SuSE/kernel/ksyms.c   Sat Nov 15 17:29:03 2003
-@@ -315,6 +315,7 @@
- EXPORT_SYMBOL(set_page_dirty);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_follow_link);
-+EXPORT_SYMBOL(vfs_follow_link_it);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(page_follow_link);
- EXPORT_SYMBOL(page_symlink_inode_operations);
index 49165c4..674e6c1 100644 (file)
@@ -1,8 +1,7 @@
-Index: linux-2.6.9-5.0.3.EL/fs/exec.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/exec.c        2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/exec.c     2005-02-26 23:29:02.000000000 +0200
-@@ -124,9 +124,10 @@
+diff -urp a/fs/exec.c b/fs/exec.c
+--- a/fs/exec.c        2006-03-10 14:59:58.000000000 -0800
++++ b/fs/exec.c        2006-03-10 18:09:26.000000000 -0800
+@@ -125,9 +125,10 @@ asmlinkage long sys_uselib(const char __
        struct file * file;
        struct nameidata nd;
        int error;
@@ -15,7 +14,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/exec.c
        if (error)
                goto out;
  
-@@ -138,7 +139,7 @@
+@@ -139,7 +140,7 @@ asmlinkage long sys_uselib(const char __
        if (error)
                goto exit;
  
@@ -24,7 +23,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/exec.c
        error = PTR_ERR(file);
        if (IS_ERR(file))
                goto out;
-@@ -487,8 +488,9 @@
+@@ -488,8 +489,9 @@ struct file *open_exec(const char *name)
        int err;
        struct file *file;
  
@@ -36,7 +35,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/exec.c
        file = ERR_PTR(err);
  
        if (!err) {
-@@ -501,7 +503,7 @@
+@@ -502,7 +504,7 @@ struct file *open_exec(const char *name)
                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
@@ -45,11 +44,21 @@ Index: linux-2.6.9-5.0.3.EL/fs/exec.c
                                if (!IS_ERR(file)) {
                                        err = deny_write_access(file);
                                        if (err) {
-Index: linux-2.6.9-5.0.3.EL/fs/namei.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/namei.c       2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/namei.c    2005-04-01 18:15:29.743029208 +0300
-@@ -272,8 +272,19 @@
+diff -urp a/fs/inode.c b/fs/inode.c
+--- a/fs/inode.c       2006-03-10 14:59:59.000000000 -0800
++++ b/fs/inode.c       2006-03-10 14:59:59.000000000 -0800
+@@ -235,6 +235,7 @@ void __iget(struct inode * inode)
+       inodes_stat.nr_unused--;
+ }
++EXPORT_SYMBOL(__iget);
+ /**
+  * clear_inode - clear an inode
+  * @inode: inode to clear
+diff -urp a/fs/namei.c b/fs/namei.c
+--- a/fs/namei.c       2006-03-10 14:59:59.000000000 -0800
++++ b/fs/namei.c       2006-03-10 16:42:11.000000000 -0800
+@@ -274,8 +274,19 @@ int deny_write_access(struct file * file
        return 0;
  }
  
@@ -69,7 +78,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        dput(nd->dentry);
        mntput(nd->mnt);
  }
-@@ -363,7 +374,10 @@
+@@ -367,7 +378,10 @@ static struct dentry * real_lookup(struc
  {
        struct dentry * result;
        struct inode *dir = parent->d_inode;
@@ -80,7 +89,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        down(&dir->i_sem);
        /*
         * First re-do the cached lookup just in case it was created
-@@ -402,7 +416,10 @@
+@@ -406,7 +420,10 @@ static struct dentry * real_lookup(struc
        if (result->d_op && result->d_op->d_revalidate) {
                if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
                        dput(result);
@@ -92,7 +101,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
                }
        }
        return result;
-@@ -432,7 +449,9 @@
+@@ -436,7 +453,9 @@ walk_init_root(const char *name, struct 
  static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
  {
        int res = 0;
@@ -102,7 +111,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        if (IS_ERR(link))
                goto fail;
  
-@@ -442,6 +461,9 @@
+@@ -446,6 +465,9 @@ static inline int __vfs_follow_link(stru
                        /* weird __emul_prefix() stuff did it */
                        goto out;
        }
@@ -112,7 +121,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        res = link_path_walk(link, nd);
  out:
        if (nd->depth || res || nd->last_type!=LAST_NORM)
-@@ -650,6 +672,33 @@
+@@ -654,6 +676,33 @@ fail:
        return PTR_ERR(dentry);
  }
  
@@ -145,8 +154,8 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
 +
  /*
   * Name resolution.
-  *
-@@ -751,8 +800,12 @@
+  * This is the basic name resolution function, turning a pathname into
+@@ -755,8 +804,12 @@ static fastcall int __link_path_walk(con
                        goto out_dput;
  
                if (inode->i_op->follow_link) {
@@ -159,7 +168,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
                        dput(next.dentry);
                        mntput(next.mnt);
                        if (err)
-@@ -791,14 +844,34 @@
+@@ -795,14 +848,34 @@ last_component:
                                inode = nd->dentry->d_inode;
                                /* fallthrough */
                        case 1:
@@ -194,7 +203,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
                if (err)
                        break;
                follow_mount(&next.mnt, &next.dentry);
-@@ -1016,7 +1089,7 @@
+@@ -1053,7 +1126,7 @@ struct dentry * lookup_hash(struct qstr 
  }
  
  /* SMP-safe */
@@ -203,7 +212,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  {
        unsigned long hash;
        struct qstr this;
-@@ -1036,11 +1109,16 @@
+@@ -1073,11 +1146,16 @@ struct dentry * lookup_one_len(const cha
        }
        this.hash = end_name_hash(hash);
  
@@ -221,7 +230,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  /*
   *    namei()
   *
-@@ -1052,7 +1130,7 @@
+@@ -1089,7 +1167,7 @@ access:
   * that namei follows links, while lnamei does not.
   * SMP-safe
   */
@@ -230,7 +239,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  {
        char *tmp = getname(name);
        int err = PTR_ERR(tmp);
-@@ -1064,6 +1142,12 @@
+@@ -1101,6 +1179,12 @@ int fastcall __user_walk(const char __us
        return err;
  }
  
@@ -243,7 +252,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  /*
   * It's inline, so penalty for filesystems that don't use sticky bit is
   * minimal.
-@@ -1347,8 +1431,8 @@
+@@ -1387,8 +1471,8 @@ int open_namei(const char * pathname, in
                acc_mode |= MAY_APPEND;
  
        /* Fill in the open() intent data */
@@ -254,7 +263,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  
        /*
         * The simplest case - just a plain lookup.
-@@ -1363,6 +1447,7 @@
+@@ -1403,6 +1487,7 @@ int open_namei(const char * pathname, in
        /*
         * Create - we need to know the parent.
         */
@@ -262,7 +271,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
        if (error)
                return error;
-@@ -1379,7 +1464,9 @@
+@@ -1419,7 +1504,9 @@ int open_namei(const char * pathname, in
        dir = nd->dentry;
        nd->flags &= ~LOOKUP_PARENT;
        down(&dir->d_inode->i_sem);
@@ -272,21 +281,20 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
  
  do_last:
        error = PTR_ERR(dentry);
-@@ -1492,7 +1579,9 @@
+@@ -1532,7 +1619,9 @@ do_link:
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
 +      nd->flags |= LOOKUP_LAST;
        dentry = __lookup_hash(&nd->last, nd->dentry, nd);
 +      nd->flags &= ~LOOKUP_LAST;
-       putname(nd->last.name);
+       __putname(nd->last.name);
        goto do_last;
  }
-Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/namespace.c   2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/namespace.c        2005-02-26 23:29:02.000000000 +0200
-@@ -61,6 +61,7 @@
+diff -urp a/fs/namespace.c b/fs/namespace.c
+--- a/fs/namespace.c   2006-03-10 15:00:00.000000000 -0800
++++ b/fs/namespace.c   2006-03-10 17:04:09.000000000 -0800
+@@ -61,6 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char
                INIT_LIST_HEAD(&mnt->mnt_mounts);
                INIT_LIST_HEAD(&mnt->mnt_list);
                INIT_LIST_HEAD(&mnt->mnt_fslink);
@@ -294,7 +302,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
                if (name) {
                        int size = strlen(name)+1;
                        char *newname = kmalloc(size, GFP_KERNEL);
-@@ -114,6 +115,7 @@
+@@ -114,6 +115,7 @@ static inline int check_mnt(struct vfsmo
  
  static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
  {
@@ -302,7 +310,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        old_nd->dentry = mnt->mnt_mountpoint;
        old_nd->mnt = mnt->mnt_parent;
        mnt->mnt_parent = mnt;
-@@ -177,6 +179,9 @@
+@@ -177,6 +179,9 @@ void __mntput(struct vfsmount *mnt)
  {
        struct super_block *sb = mnt->mnt_sb;
        dput(mnt->mnt_root);
@@ -312,7 +320,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        free_vfsmnt(mnt);
        deactivate_super(sb);
  }
-@@ -403,6 +408,8 @@
+@@ -403,6 +408,8 @@ static int do_umount(struct vfsmount *mn
         */
  
        lock_kernel();
@@ -321,7 +329,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
                sb->s_op->umount_begin(sb);
        unlock_kernel();
-@@ -627,6 +634,7 @@
+@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata 
                return err;
        if (!old_name || !*old_name)
                return -EINVAL;
@@ -329,7 +337,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
        if (err)
                return err;
-@@ -701,6 +709,7 @@
+@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat
                return -EPERM;
        if (!old_name || !*old_name)
                return -EINVAL;
@@ -337,7 +345,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
        if (err)
                return err;
-@@ -1012,6 +1021,7 @@
+@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di
        int retval = 0;
        int mnt_flags = 0;
  
@@ -345,11 +353,10 @@ Index: linux-2.6.9-5.0.3.EL/fs/namespace.c
        /* Discard magic */
        if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
                flags &= ~MS_MGC_MSK;
-Index: linux-2.6.9-5.0.3.EL/fs/open.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/open.c        2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/open.c     2005-02-26 23:29:02.000000000 +0200
-@@ -215,12 +215,12 @@
+diff -urp a/fs/open.c b/fs/open.c
+--- a/fs/open.c        2006-03-10 15:00:01.000000000 -0800
++++ b/fs/open.c        2006-03-10 18:09:51.000000000 -0800
+@@ -216,12 +216,12 @@ static inline long do_sys_truncate(const
        struct nameidata nd;
        struct inode * inode;
        int error;
@@ -364,7 +371,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
        if (error)
                goto out;
        inode = nd.dentry->d_inode;
-@@ -474,6 +474,7 @@
+@@ -475,6 +475,7 @@ asmlinkage long sys_access(const char __
        int old_fsuid, old_fsgid;
        kernel_cap_t old_cap;
        int res;
@@ -372,7 +379,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
  
        if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
                return -EINVAL;
-@@ -498,13 +499,14 @@
+@@ -499,13 +500,14 @@ asmlinkage long sys_access(const char __
        else
                current->cap_effective = current->cap_permitted;
  
@@ -388,7 +395,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
                path_release(&nd);
        }
  
-@@ -519,8 +521,9 @@
+@@ -520,8 +522,9 @@ asmlinkage long sys_chdir(const char __u
  {
        struct nameidata nd;
        int error;
@@ -399,7 +406,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
        if (error)
                goto out;
  
-@@ -572,8 +575,9 @@
+@@ -573,8 +576,9 @@ asmlinkage long sys_chroot(const char __
  {
        struct nameidata nd;
        int error;
@@ -410,45 +417,88 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
        if (error)
                goto out;
  
-@@ -754,27 +758,8 @@
-  * for the internal routines (ie open_namei()/follow_link() etc). 00 is
-  * used by symlinks.
-  */
--struct file *filp_open(const char * filename, int flags, int mode)
--{
--      int namei_flags, error;
--      struct nameidata nd;
+@@ -741,8 +745,6 @@ asmlinkage long sys_fchown(unsigned int 
+       return error;
+ }
+-static struct file *__dentry_open(struct dentry *, struct vfsmount *, int, struct file *);
 -
--      namei_flags = flags;
--      if ((namei_flags+1) & O_ACCMODE)
--              namei_flags++;
--      if (namei_flags & O_TRUNC)
--              namei_flags |= 2;
+ /*
+  * Note that while the flag value (low two bits) for sys_open means:
+  *    00 - read-only
+@@ -760,8 +762,9 @@ static struct file *__dentry_open(struct
+ struct file *filp_open(const char * filename, int flags, int mode)
+ {
+       int namei_flags, error;
++      struct file * temp_filp;
+       struct nameidata nd;
+-      struct file *f;
++      intent_init(&nd.intent, IT_OPEN);
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+@@ -769,16 +772,11 @@ struct file *filp_open(const char * file
+       if (namei_flags & O_TRUNC)
+               namei_flags |= 2;
+-      error = -ENFILE;
+-      f = get_empty_filp();
+-      if (f == NULL)
+-              return ERR_PTR(error);
 -
--      error = open_namei(filename, namei_flags, mode, &nd);
+       error = open_namei(filename, namei_flags, mode, &nd);
 -      if (!error)
--              return dentry_open(nd.dentry, nd.mnt, flags);
--
--      return ERR_PTR(error);
--}
+-              return __dentry_open(nd.dentry, nd.mnt, flags, f);
 -
--EXPORT_SYMBOL(filp_open);
+-      put_filp(f);
++      if (!error) {
++              temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent);
++              return temp_filp;
++      }       
+       return ERR_PTR(error);
+ }
+@@ -786,29 +784,27 @@ EXPORT_SYMBOL(filp_open);
+ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+ {
+-      int error;
+-      struct file *f;
 -
--struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags,
-+                          struct lookup_intent *it)
+-      error = -ENFILE;
+-      f = get_empty_filp();
+-      if (f == NULL) {
+-              dput(dentry);
+-              mntput(mnt);
+-              return ERR_PTR(error);
+-      }
++      struct lookup_intent it;
++      intent_init(&it, IT_LOOKUP);
+-      return __dentry_open(dentry, mnt, flags, f);
++      return dentry_open_it(dentry, mnt, flags, &it);
+ }
+ EXPORT_SYMBOL(dentry_open);
+-static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, struct lookup_intent *it)
  {
-       struct file * f;
++      struct file *f;
        struct inode *inode;
-@@ -786,6 +771,7 @@
-               goto cleanup_dentry;
+       int error;
++      error = -ENFILE;
++      f = get_empty_filp();
++      if (!f)
++              goto cleanup_dentry;
        f->f_flags = flags;
        f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
 +      f->f_it = it;
        inode = dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
                error = get_write_access(inode);
-@@ -804,6 +790,7 @@
+@@ -827,6 +823,7 @@ static struct file *__dentry_open(struct
                error = f->f_op->open(inode,f);
                if (error)
                        goto cleanup_all;
@@ -456,56 +506,19 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
        }
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  
-@@ -829,6 +816,7 @@
+@@ -851,6 +848,8 @@ cleanup_all:
+       f->f_vfsmnt = NULL;
  cleanup_file:
        put_filp(f);
- cleanup_dentry:
++cleanup_dentry:
 +      intent_release(it);
        dput(dentry);
        mntput(mnt);
        return ERR_PTR(error);
-@@ -836,6 +824,36 @@
- EXPORT_SYMBOL(dentry_open);
-+struct file *filp_open(const char * filename, int flags, int mode)
-+{
-+      int namei_flags, error;
-+      struct file * temp_filp;
-+      struct nameidata nd;
-+      intent_init(&nd.intent, IT_OPEN);
-+
-+      namei_flags = flags;
-+      if ((namei_flags+1) & O_ACCMODE)
-+              namei_flags++;
-+      if (namei_flags & O_TRUNC)
-+              namei_flags |= 2;
-+
-+      error = open_namei(filename, namei_flags, mode, &nd);
-+      if (!error) {
-+              temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent);
-+              return temp_filp;
-+      }       
-+      return ERR_PTR(error);
-+}
-+
-+
-+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
-+{
-+      struct lookup_intent it;
-+      intent_init(&it, IT_LOOKUP);
-+
-+      return dentry_open_it(dentry, mnt, flags, &it);
-+}
-+
- /*
-  * Find an empty file descriptor entry, and mark it busy.
-  */
-Index: linux-2.6.9-5.0.3.EL/fs/stat.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/stat.c        2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/stat.c     2005-02-26 23:29:02.000000000 +0200
-@@ -37,7 +37,7 @@
+diff -urp a/fs/stat.c b/fs/stat.c
+--- a/fs/stat.c        2006-03-10 15:00:01.000000000 -0800
++++ b/fs/stat.c        2006-03-10 15:00:02.000000000 -0800
+@@ -37,7 +37,7 @@ void generic_fillattr(struct inode *inod
  
  EXPORT_SYMBOL(generic_fillattr);
  
@@ -514,7 +527,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/stat.c
  {
        struct inode *inode = dentry->d_inode;
        int retval;
-@@ -46,6 +46,8 @@
+@@ -46,6 +46,8 @@ int vfs_getattr(struct vfsmount *mnt, st
        if (retval)
                return retval;
  
@@ -523,7 +536,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/stat.c
        if (inode->i_op->getattr)
                return inode->i_op->getattr(mnt, dentry, stat);
  
-@@ -62,14 +64,20 @@
+@@ -62,14 +64,20 @@ int vfs_getattr(struct vfsmount *mnt, st
  
  EXPORT_SYMBOL(vfs_getattr);
  
@@ -546,7 +559,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/stat.c
                path_release(&nd);
        }
        return error;
-@@ -81,10 +89,11 @@
+@@ -81,10 +89,11 @@ int vfs_lstat(char __user *name, struct 
  {
        struct nameidata nd;
        int error;
@@ -560,7 +573,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/stat.c
                path_release(&nd);
        }
        return error;
-@@ -96,9 +105,12 @@
+@@ -96,9 +105,12 @@ int vfs_fstat(unsigned int fd, struct ks
  {
        struct file *f = fget(fd);
        int error = -EBADF;
@@ -574,44 +587,9 @@ Index: linux-2.6.9-5.0.3.EL/fs/stat.c
                fput(f);
        }
        return error;
-Index: linux-2.6.9-5.0.3.EL/fs/nfs/dir.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/nfs/dir.c     2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/nfs/dir.c  2005-04-01 18:10:28.924760536 +0300
-@@ -718,7 +718,7 @@
-               return 0;
-       if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
-               return 0;
--      return (nd->intent.open.flags & O_EXCL) != 0;
-+      return (nd->intent.it_flags & O_EXCL) != 0;
- }
- static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
-@@ -1043,7 +1043,7 @@
-       attr.ia_valid = ATTR_MODE;
-       if (nd && (nd->flags & LOOKUP_CREATE))
--              open_flags = nd->intent.open.flags;
-+              open_flags = nd->intent.it_flags;
-       /*
-        * The 0 argument passed into the create function should one day
-Index: linux-2.6.9-5.0.3.EL/fs/inode.c
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/fs/inode.c       2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/fs/inode.c    2005-02-26 23:29:02.000000000 +0200
-@@ -233,6 +233,7 @@
-       inodes_stat.nr_unused--;
- }
-+EXPORT_SYMBOL(__iget);
- /**
-  * clear_inode - clear an inode
-  * @inode: inode to clear
-Index: linux-2.6.9-5.0.3.EL/include/linux/dcache.h
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/include/linux/dcache.h   2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/include/linux/dcache.h        2005-02-26 23:29:02.000000000 +0200
+diff -urp a/include/linux/dcache.h b/include/linux/dcache.h
+--- a/include/linux/dcache.h   2006-03-10 15:00:02.000000000 -0800
++++ b/include/linux/dcache.h   2006-03-10 17:01:52.000000000 -0800
 @@ -4,6 +4,7 @@
  #ifdef __KERNEL__
  
@@ -620,7 +598,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/dcache.h
  #include <linux/list.h>
  #include <linux/spinlock.h>
  #include <linux/cache.h>
-@@ -37,6 +38,8 @@
+@@ -37,6 +38,8 @@ struct qstr {
        const unsigned char *name;
  };
  
@@ -629,11 +607,10 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/dcache.h
  struct dentry_stat_t {
        int nr_dentry;
        int nr_unused;
-Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/include/linux/fs.h       2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/include/linux/fs.h    2005-02-26 23:29:02.000000000 +0200
-@@ -74,6 +74,7 @@
+diff -urp a/include/linux/fs.h b/include/linux/fs.h
+--- a/include/linux/fs.h       2006-03-10 15:00:02.000000000 -0800
++++ b/include/linux/fs.h       2006-03-10 17:05:51.000000000 -0800
+@@ -75,6 +75,7 @@ extern int leases_enable, dir_notify_ena
  
  #define FMODE_READ 1
  #define FMODE_WRITE 2
@@ -641,7 +618,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  
  /* Internal kernel extensions */
  #define FMODE_LSEEK   4
-@@ -258,6 +259,8 @@
+@@ -259,6 +260,8 @@ typedef void (dio_iodone_t)(struct inode
  #define ATTR_ATTR_FLAG        1024
  #define ATTR_KILL_SUID        2048
  #define ATTR_KILL_SGID        4096
@@ -650,7 +627,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  
  /*
   * This is the Inode Attributes structure, used for notify_change().  It
-@@ -456,6 +459,7 @@
+@@ -457,6 +460,7 @@ struct inode {
        struct block_device     *i_bdev;
        struct cdev             *i_cdev;
        int                     i_cindex;
@@ -658,7 +635,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  
        __u32                   i_generation;
  
-@@ -589,6 +593,7 @@
+@@ -590,6 +594,7 @@ struct file {
        spinlock_t              f_ep_lock;
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
@@ -666,7 +643,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  };
  extern spinlock_t files_lock;
  #define file_list_lock() spin_lock(&files_lock);
-@@ -934,7 +939,9 @@
+@@ -962,7 +967,9 @@ struct inode_operations {
        void (*truncate) (struct inode *);
        int (*permission) (struct inode *, int, struct nameidata *);
        int (*setattr) (struct dentry *, struct iattr *);
@@ -676,7 +653,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
        ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
-@@ -974,6 +981,7 @@
+@@ -1002,6 +1009,7 @@ struct super_operations {
        int (*remount_fs) (struct super_block *, int *, char *);
        void (*clear_inode) (struct inode *);
        void (*umount_begin) (struct super_block *);
@@ -684,7 +661,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  
        int (*show_options)(struct seq_file *, struct vfsmount *);
  };
-@@ -1164,6 +1172,7 @@
+@@ -1194,6 +1202,7 @@ extern int unregister_filesystem(struct 
  extern struct vfsmount *kern_mount(struct file_system_type *);
  extern int may_umount_tree(struct vfsmount *);
  extern int may_umount(struct vfsmount *);
@@ -692,7 +669,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  extern long do_mount(char *, char *, char *, unsigned long, void *);
  
  extern int vfs_statfs(struct super_block *, struct kstatfs *);
-@@ -1228,6 +1237,7 @@
+@@ -1258,6 +1267,7 @@ static inline int break_lease(struct ino
  extern int do_truncate(struct dentry *, loff_t start);
  extern struct file *filp_open(const char *, int, int);
  extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
@@ -700,10 +677,21 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/fs.h
  extern int filp_close(struct file *, fl_owner_t id);
  extern char * getname(const char __user *);
  
-Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/include/linux/namei.h    2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/include/linux/namei.h 2005-02-26 23:29:02.000000000 +0200
+diff -urp a/include/linux/mount.h b/include/linux/mount.h
+--- a/include/linux/mount.h    2006-03-10 15:00:03.000000000 -0800
++++ b/include/linux/mount.h    2006-03-10 15:00:03.000000000 -0800
+@@ -34,6 +34,8 @@ struct vfsmount
+       struct list_head mnt_list;
+       struct list_head mnt_fslink;    /* link in fs-specific expiry list */
+       struct namespace *mnt_namespace; /* containing namespace */
++      struct list_head mnt_lustre_list; /* GNS mount list */
++      unsigned long mnt_last_used;    /* for GNS auto-umount (jiffies) */
+ };
+ static inline struct vfsmount *mntget(struct vfsmount *mnt)
+diff -urp a/include/linux/namei.h b/include/linux/namei.h
+--- a/include/linux/namei.h    2006-03-10 15:00:03.000000000 -0800
++++ b/include/linux/namei.h    2006-03-10 15:00:03.000000000 -0800
 @@ -2,14 +2,48 @@
  #define _LINUX_NAMEI_H
  
@@ -756,7 +744,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
  enum { MAX_NESTED_LINKS = 8 };
  
  struct nameidata {
-@@ -21,10 +55,7 @@
+@@ -21,10 +55,7 @@ struct nameidata {
        unsigned        depth;
        char *saved_names[MAX_NESTED_LINKS + 1];
  
@@ -768,7 +756,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
  };
  
  /*
-@@ -47,6 +78,8 @@
+@@ -47,6 +78,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
  #define LOOKUP_NOALT          32
  #define LOOKUP_ATOMIC         64
  #define LOOKUP_REVAL          128
@@ -777,7 +765,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
  
  /*
   * Intent data
-@@ -56,6 +90,12 @@
+@@ -56,6 +89,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
  #define LOOKUP_ACCESS         (0x0400)
  
  extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
@@ -790,7 +778,7 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
  #define user_path_walk(name,nd) \
        __user_walk(name, LOOKUP_FOLLOW, nd)
  #define user_path_walk_link(name,nd) \
-@@ -68,7 +108,6 @@
+@@ -68,7 +107,6 @@ extern void path_release_on_umount(struc
  
  extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
  extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
@@ -798,16 +786,3 @@ Index: linux-2.6.9-5.0.3.EL/include/linux/namei.h
  extern int follow_down(struct vfsmount **, struct dentry **);
  extern int follow_up(struct vfsmount **, struct dentry **);
  
-Index: linux-2.6.9-5.0.3.EL/include/linux/mount.h
-===================================================================
---- linux-2.6.9-5.0.3.EL.orig/include/linux/mount.h    2005-02-26 14:28:01.000000000 +0200
-+++ linux-2.6.9-5.0.3.EL/include/linux/mount.h 2005-02-26 23:29:02.000000000 +0200
-@@ -34,6 +34,8 @@
-       struct list_head mnt_list;
-       struct list_head mnt_fslink;    /* link in fs-specific expiry list */
-       struct namespace *mnt_namespace; /* containing namespace */
-+      struct list_head mnt_lustre_list; /* GNS mount list */
-+      unsigned long mnt_last_used;    /* for GNS auto-umount (jiffies) */
- };
- static inline struct vfsmount *mntget(struct vfsmount *mnt)
index 0116393..695423b 100644 (file)
@@ -682,7 +682,7 @@ Index: linux-2.6.5-12.1/include/linux/fs.h
        spinlock_t              f_ep_lock;
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
-+      struct lookup_intent    *f_it;
++      struct lookup_intent    *f_it;
  };
  extern spinlock_t files_lock;
  #define file_list_lock() spin_lock(&files_lock);
index f46227f..80db906 100644 (file)
@@ -170,13 +170,13 @@ Index: linux-2.6.12.5/fs/namei.c
                                inode = nd->dentry->d_inode;
                                /* fallthrough */
                        case 1:
-+                              nd->flags |= LOOKUP_LAST;
-+                              err = revalidate_special(nd);
-+                              nd->flags &= ~LOOKUP_LAST;
++                              nd->flags |= LOOKUP_LAST;
++                              err = revalidate_special(nd);
++                              nd->flags &= ~LOOKUP_LAST;
 +                              if (!nd->dentry->d_inode)
 +                                      err = -ENOENT;
 +                              if (err) {
-+                                      path_release(nd);
++                                      path_release(nd);
 +                                      goto return_err;
 +                              }
 +                              if (lookup_flags & LOOKUP_DIRECTORY) {
@@ -272,9 +272,9 @@ Index: linux-2.6.12.5/fs/namei.c
        dir = nd->dentry;
        nd->flags &= ~LOOKUP_PARENT;
        down(&dir->d_inode->i_sem);
-+      nd->flags |= LOOKUP_LAST;
++      nd->flags |= LOOKUP_LAST;
        path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
-+      nd->flags &= ~LOOKUP_LAST;
++      nd->flags &= ~LOOKUP_LAST;
        path.mnt = nd->mnt;
  
  do_last:
@@ -282,9 +282,9 @@ Index: linux-2.6.12.5/fs/namei.c
        }
        dir = nd->dentry;
        down(&dir->d_inode->i_sem);
-+      nd->flags |= LOOKUP_LAST;
++      nd->flags |= LOOKUP_LAST;
        path.dentry = __lookup_hash(&nd->last, nd->dentry, nd);
-+      nd->flags &= ~LOOKUP_LAST;
++      nd->flags &= ~LOOKUP_LAST;
        path.mnt = nd->mnt;
        putname(nd->last.name);
        goto do_last;
index 166a512..c0e0c3d 100644 (file)
@@ -52,13 +52,13 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
-+              if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir_raw(&nd, mode);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir_raw(&nd, mode);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
                dentry = lookup_create(&nd, 1);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
@@ -82,16 +82,16 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
                        error = -EBUSY;
                        goto exit1;
        }
-+ 
-+      if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ 
-+              error = op->rmdir_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-+ 
++
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
++
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
@@ -107,13 +107,13 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
@@ -180,12 +180,12 @@ Index: linux-2.6.9-5.0.3.EL/fs/namei.c
        if (newnd.last_type != LAST_NORM)
                goto exit2;
  
-+      if (old_dir->d_inode->i_op->rename_raw) {
-+              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit2;
-+      }
++      if (old_dir->d_inode->i_op->rename_raw) {
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
 +
        trap = lock_rename(new_dir, old_dir);
  
@@ -325,7 +325,7 @@ Index: linux-2.6.9-5.0.3.EL/fs/open.c
 +              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
 +              error = op->setattr_raw(inode, &newattrs);
-+              /* the file system wants to use normal vfs path now */
++              /* the file system wants to use the normal vfs path now */
 +              if (error != -EOPNOTSUPP)
 +                      goto out;
 +      }
index 8817678..184fa49 100644 (file)
@@ -304,7 +304,7 @@ Index: linux-2.6.5-12.1/fs/open.c
 +      
 +      if (inode->i_op->setattr_raw) {
 +              struct inode_operations *op = dentry->d_inode->i_op;
-+              
++
 +              newattrs.ia_mode = mode;
 +              newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
 +              newattrs.ia_valid |= ATTR_RAW;
index 66ead6a..ce239c9 100644 (file)
@@ -52,13 +52,13 @@ Index: linux-2.6.12.2/fs/namei.c
                error = path_lookup(tmp, LOOKUP_PARENT, &nd);
                if (error)
                        goto out;
-+              if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+                      struct inode_operations *op = nd.dentry->d_inode->i_op;
-+                      error = op->mkdir_raw(&nd, mode);
-+                      /* the file system wants to use normal vfs path now */
-+                      if (error != -EOPNOTSUPP)
-+                              goto out2;
-+              }
++              if (nd.dentry->d_inode->i_op->mkdir_raw) {
++                      struct inode_operations *op = nd.dentry->d_inode->i_op;
++                      error = op->mkdir_raw(&nd, mode);
++                      /* the file system wants to use normal vfs path now */
++                      if (error != -EOPNOTSUPP)
++                              goto out2;
++              }
                dentry = lookup_create(&nd, 1);
                error = PTR_ERR(dentry);
                if (!IS_ERR(dentry)) {
@@ -82,16 +82,16 @@ Index: linux-2.6.12.2/fs/namei.c
                        error = -EBUSY;
                        goto exit1;
        }
-+ 
-+      if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ 
-+              error = op->rmdir_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
-+ 
++
++      if (nd.dentry->d_inode->i_op->rmdir_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++              error = op->rmdir_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
++
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
@@ -107,13 +107,13 @@ Index: linux-2.6.12.2/fs/namei.c
        error = -EISDIR;
        if (nd.last_type != LAST_NORM)
                goto exit1;
-+      if (nd.dentry->d_inode->i_op->unlink_raw) {
-+              struct inode_operations *op = nd.dentry->d_inode->i_op;
-+              error = op->unlink_raw(&nd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit1;
-+      }
++      if (nd.dentry->d_inode->i_op->unlink_raw) {
++              struct inode_operations *op = nd.dentry->d_inode->i_op;
++              error = op->unlink_raw(&nd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit1;
++      }
        down(&nd.dentry->d_inode->i_sem);
        dentry = lookup_hash(&nd.last, nd.dentry);
        error = PTR_ERR(dentry);
@@ -180,12 +180,12 @@ Index: linux-2.6.12.2/fs/namei.c
        if (newnd.last_type != LAST_NORM)
                goto exit2;
  
-+      if (old_dir->d_inode->i_op->rename_raw) {
-+              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+              /* the file system wants to use normal vfs path now */
-+              if (error != -EOPNOTSUPP)
-+                      goto exit2;
-+      }
++      if (old_dir->d_inode->i_op->rename_raw) {
++              error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++              /* the file system wants to use normal vfs path now */
++              if (error != -EOPNOTSUPP)
++                      goto exit2;
++      }
 +
        trap = lock_rename(new_dir, old_dir);
  
index 4b5419d..011d87d 100644 (file)
@@ -2,22 +2,20 @@ Index: linux-2.6.7-vanilla/fs/dcache.c
 ===================================================================
 --- linux-2.6.7-vanilla.orig/fs/dcache.c       2004-07-01 12:09:19.000000000 +0300
 +++ linux-2.6.7-vanilla/fs/dcache.c    2004-07-01 12:29:12.510193264 +0300
-@@ -219,7 +219,14 @@
+@@ -219,6 +219,13 @@
                spin_unlock(&dcache_lock);
                return 0;
        }
--      /*
 +
-+        /* network invalidation by Lustre */
++      /* network invalidation by Lustre */
 +      if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
 +              spin_unlock(&dcache_lock);
 +              return 0;
 +      }
 +
-       /*
+       /*
         * Check whether to do a partial shrink_dcache
         * to get rid of unused child entries.
-        */
 @@ -1199,16 +1199,25 @@
   * Adds a dentry to the hash according to its name.
   */
diff --git a/lustre/kernel_patches/patches/vm-tunables-rhel4.patch b/lustre/kernel_patches/patches/vm-tunables-rhel4.patch
new file mode 100644 (file)
index 0000000..7e4a9d6
--- /dev/null
@@ -0,0 +1,73 @@
+Index: linux+rhel4+chaos/mm/page_alloc.c
+===================================================================
+--- linux+rhel4+chaos.orig/mm/page_alloc.c
++++ linux+rhel4+chaos/mm/page_alloc.c
+@@ -1972,8 +1972,12 @@ static void setup_per_zone_pages_min(voi
+                                          lowmem_pages;
+               }
+-              zone->pages_low = zone->pages_min * 2;
+-              zone->pages_high = zone->pages_min * 3;
++              /*
++               * When interpreting these watermarks, just keep in mind that:
++               * zone->pages_min == (zone->pages_min * 4) / 4;
++               */
++              zone->pages_low  = (zone->pages_min * 5) / 4;
++              zone->pages_high = (zone->pages_min * 6) / 4;
+               spin_unlock_irqrestore(&zone->lru_lock, flags);
+       }
+ }
+@@ -1982,24 +1986,25 @@ static void setup_per_zone_pages_min(voi
+  * Initialise min_free_kbytes.
+  *
+  * For small machines we want it small (128k min).  For large machines
+- * we want it large (16MB max).  But it is not linear, because network
++ * we want it large (64MB max).  But it is not linear, because network
+  * bandwidth does not increase linearly with machine size.  We use
+  *
+- *    min_free_kbytes = sqrt(lowmem_kbytes)
++ *    min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy
++ *  min_free_kbytes = sqrt(lowmem_kbytes * 16) 
+  *
+  * which yields
+  *
+- * 16MB:      128k
+- * 32MB:      181k
+- * 64MB:      256k
+- * 128MB:     362k
+- * 256MB:     512k
+- * 512MB:     724k
+- * 1024MB:    1024k
+- * 2048MB:    1448k
+- * 4096MB:    2048k
+- * 8192MB:    2896k
+- * 16384MB:   4096k
++ * 16MB:      512k
++ * 32MB:      724k
++ * 64MB:      1024k
++ * 128MB:     1448k
++ * 256MB:     2048k
++ * 512MB:     2896k
++ * 1024MB:    4096k
++ * 2048MB:    5792k
++ * 4096MB:    8192k
++ * 8192MB:    11584k
++ * 16384MB:   16384k
+  */
+ static int __init init_per_zone_pages_min(void)
+ {
+@@ -2007,11 +2012,11 @@ static int __init init_per_zone_pages_mi
+       lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
+-      min_free_kbytes = int_sqrt(lowmem_kbytes);
++      min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
+       if (min_free_kbytes < 128)
+               min_free_kbytes = 128;
+-      if (min_free_kbytes > 16384)
+-              min_free_kbytes = 16384;
++      if (min_free_kbytes > 65536)
++              min_free_kbytes = 65536;
+       setup_per_zone_pages_min();
+       setup_per_zone_protection();
+       return 0;
diff --git a/lustre/kernel_patches/patches/vmalloc_to_page-2.4.19-bgl.patch b/lustre/kernel_patches/patches/vmalloc_to_page-2.4.19-bgl.patch
deleted file mode 100644 (file)
index 1ff2f5d..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-Index: linux.mcp2/kernel/ksyms.c
-===================================================================
---- linux.mcp2.orig/kernel/ksyms.c     2004-05-05 14:57:48.000000000 -0700
-+++ linux.mcp2/kernel/ksyms.c  2004-05-05 15:32:44.000000000 -0700
-@@ -108,6 +108,7 @@
- EXPORT_SYMBOL(kfree);
- EXPORT_SYMBOL(vfree);
- EXPORT_SYMBOL(__vmalloc);
-+extern struct page * vmalloc_to_page(void *addr);
- EXPORT_SYMBOL(vmalloc_to_page);
- EXPORT_SYMBOL(mem_map);
- EXPORT_SYMBOL(remap_page_range);
index ec748cf..9b5c368 100644 (file)
@@ -8,7 +8,7 @@ iopen-misc-2.6-suse.patch
 export-truncate-2.6-suse.patch 
 export_symbols-2.6-rhel4.patch 
 dev_read_only-2.6-suse.patch 
-export-2.6-suse.patch
+export-log-2.6-rhel4.patch
 lookup_bdev_init_intent.patch
 remove-suid-2.6-suse.patch
 export-show_task-2.6-vanilla.patch
@@ -18,3 +18,4 @@ export_symbol_numa.patch
 qsnet-rhel4-2.6.patch
 linux-2.6-binutils-2.16.patch
 compile-fixes-2.6.9-rhel4-22.patch
+vm-tunables-rhel4.patch 
diff --git a/lustre/kernel_patches/series/bgl-2.4.19 b/lustre/kernel_patches/series/bgl-2.4.19
deleted file mode 100644 (file)
index bd67a30..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-dev_read_only_2.4.20-rh.patch
-exports_2.4.19-bgl.patch 
-lustre_version.patch
-vfs_intent-2.4.19-bgl.patch 
-invalidate_show-2.4.19-bgl.patch 
-export-truncate-bgl.patch 
-iod-stock-24-exports-2.4.19-bgl.patch 
-ext3-htree-2.4.19-bgl.patch 
-linux-2.4.19-bgl-xattr-0.8.54.patch 
-ext3-2.4.20-fixes.patch
-ext3-2.4-ino_t.patch
-ext3-largefile.patch
-ext3-truncate_blocks.patch
-ext3-unmount_sync.patch
-ext3-use-after-free-2.4.19-pre1.patch 
-ext3-orphan_lock.patch
-ext3-noread-2.4.20.patch
-ext3-delete_thread-2.4.20.patch
-extN-wantedi.patch
-ext3-san-2.4.20.patch
-ext3-map_inode_page.patch
-ext3-error-export.patch
-iopen-2.4.19-bgl.patch 
-tcp-zero-copy-2.4.19-pre1.patch 
-jbd-dont-account-blocks-twice.patch
-jbd-commit-tricks.patch
-ext3-no-write-super.patch
-add_page_private-2.4.19-bgl.patch 
-socket-exports-2.4.19-bgl.patch 
-removepage-2.4.20.patch
-jbd-ctx_switch.patch
-jbd-flushtime-2.4.19-suse.patch
-jbd-get_write_access.patch
-nfs_export_kernel-2.4.19-bgl.patch 
-ext3-raw-lookup.patch
-ext3-ea-in-inode-2.4.20.patch
-listman-2.4.19-bgl.patch
-ext3-trusted_ea-2.4.20.patch
-jbd-2.4.19-pre1-jcberr.patch 
-resched-2.4.19-pre1.patch
-ext3-xattr-ptr-arith-fix.patch
-vmalloc_to_page-2.4.19-bgl.patch 
-procfs-ndynamic-2.4.patch
-ext3-truncate-buffer-head.patch
-kallsyms-2.4-bgl.patch 
-kksymoops-2.4-bgl.patch 
-export-show_task-2.4-bgl.patch 
index 12b65ba..e0b9c23 100644 (file)
@@ -16,7 +16,7 @@ extN-wantedi-2.4.21-suse2.patch
 ext3-san-2.4.20.patch
 ext3-map_inode_page-2.4.21-suse2.patch 
 ext3-error-export.patch
-iopen-2.4.21-sles8sp3.patch
+iopen-2.4.21-chaos.patch
 tcp-zero-copy-2.4.21-suse2.patch 
 jbd-dont-account-blocks-twice.patch
 jbd-commit-tricks.patch
index ddcefe4..74e9445 100644 (file)
@@ -16,7 +16,7 @@ extN-wantedi-2.4.21-suse2.patch
 ext3-san-2.4.20.patch
 ext3-map_inode_page-2.4.21-suse2.patch 
 ext3-error-export.patch
-iopen-2.4.19-suse.patch
+iopen-2.4.21-chaos.patch
 jbd-dont-account-blocks-twice.patch
 jbd-commit-tricks.patch
 ext3-no-write-super-chaos.patch
index 307353a..b89f5a8 100644 (file)
@@ -1,5 +1,5 @@
 lnxmaj="2.6.9"
-lnxrel="22.EL"
+lnxrel="34.EL"
 
 KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2
 SERIES=2.6-rhel4.series
index d604f6e..5e34152 100644 (file)
@@ -1,5 +1,5 @@
 lnxmaj="2.6.5"
-lnxrel="7.201"
+lnxrel="7.252"
 
 KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
 # they include our patches
index 7ee0629..bc48f94 100644 (file)
@@ -2,15 +2,20 @@ SERIES             MNEMONIC                 COMMENT                     ARCH
 
 SUPPORTED KERNELS:
 rhel-2.4.21        linux-2.4.21-20.3EL      RHEL3 2.4.21                all
-2.6-suse           linux-2.6-suse           SLES9 SP1 kernel            all
+2.6-suse           linux-2.6-suse           already in SLES9 SP1 kernel all
 2.6-suse-newer     linux-2.6-suse           SLES9 SP1 kernel add-ons    all
 2.6-rhel4          linux-2.6-rhel4          RHEL4 2.6.9 kernel          all
+2.6.12-vanilla     linux-2.6.12.6           kernel.org 2.6.12.6         all
 
 NB - The patches in the 2.6-suse series should already be in the SLES9 SP1
      kernel.  The patches in the 2.6-suse-newer series are patches that
      have been created since the SP1 kernel was released and should be
      applied to the already-patched SP1 kernel.
 
+NB - The patches in the ldiskfs series should not be applied to the kernel.
+     They are instead applied by the lustre build process to create the
+     ldiskfs kernel module instead of modifying the core ext3 code.
+
 UNSUPPORTED KERNELS; BEING PHASED OUT; MAY BE MISSING CRITICAL BUG FIXES:
 hp-pnnl-2.4.20     linux-2.4.20-hp4_pnnl1   same as vanilla but no uml  ia64
 vanilla-2.4.24     linux-2.4.24             patch with uml-2.4.24-6     um
index 0eff073..7e378c2 100644 (file)
@@ -38,7 +38,8 @@ sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
        cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3
        cp $(linux_headers) linux-stage/include/linux
 if USE_QUILT
-       cd linux-stage && quilt setup -d ../$(patches) ../$(series)
+       ln -s ../$(patches) linux-stage/patches
+       ln -s ../$(series) linux-stage/series
        cd linux-stage && quilt push -a -q
 else
        @echo -n "Applying ext3 patches:"
index 8a342a5..fab1c40 100644 (file)
@@ -419,8 +419,8 @@ out_sem:
 int client_disconnect_export(struct obd_export *exp)
 {
         struct obd_device *obd = class_exp2obd(exp);
-        struct client_obd *cli = &obd->u.cli;
-        struct obd_import *imp = cli->cl_import;
+        struct client_obd *cli;
+        struct obd_import *imp;
         int rc = 0, err;
         ENTRY;
 
@@ -430,6 +430,9 @@ int client_disconnect_export(struct obd_export *exp)
                 RETURN(-EINVAL);
         }
 
+        cli = &obd->u.cli;
+        imp = cli->cl_import;
+
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
@@ -503,9 +506,8 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
         }
 
         conn->cookie = exp->exp_handle.h_cookie;
-        CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n",
-               cluuid->uuid, exp);
-        CDEBUG(D_IOCTL, "connect: cookie "LPX64"\n", conn->cookie);
+        CDEBUG(D_HA, "connect export for UUID '%s' at %p, cookie "LPX64"\n",
+               cluuid->uuid, exp, conn->cookie);
         RETURN(0);
 }
 
@@ -538,16 +540,14 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         obd_str2uuid (&tgtuuid, str);
         target = class_uuid2obd(&tgtuuid);
         /* COMPAT_146 */
-        if (!target) {
-                target = class_name2obd(str);
-        }
         /* old (pre 1.6) lustre_process_log tries to connect to mdsname
-           (eg. mdsA) instead of uuid.  Since 1.6 changes names, the above
-           hack fails. */
+           (eg. mdsA) instead of uuid. */
         if (!target) {
                 snprintf((char *)tgtuuid.uuid, sizeof(tgtuuid), "%s_UUID", str);
                 target = class_uuid2obd(&tgtuuid);
         }
+        if (!target)
+                target = class_name2obd(str);
         /* end COMPAT_146 */
 
         if (!target || target->obd_stopping || !target->obd_set_up) {
@@ -600,22 +600,26 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 GOTO(out, rc);
 
         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
-                if (!data || (data->ocd_version < LUSTRE_VERSION_CODE -
-                               LUSTRE_VERSION_ALLOWED_OFFSET)) {
-                        if (!data) 
-                                DEBUG_REQ(D_WARNING, req, "Refusing old "
-                                          "libclient connection attempt\n");
-                        else
-                                DEBUG_REQ(D_WARNING, req,
-                                          "Refusing old (%d.%d.%d.%d) "
-                                          "libclient connection attempt\n",
+                if (!data) {
+                        DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) "
+                                  "libclient connection attempt\n");
+                        GOTO(out, rc = -EPROTO);
+                } else if (data->ocd_version < LUSTRE_VERSION_CODE -
+                                               LUSTRE_VERSION_ALLOWED_OFFSET) {
+                        DEBUG_REQ(D_WARNING, req, "Refusing old (%d.%d.%d.%d) "
+                                  "libclient connection attempt\n",
                                   OBD_OCD_VERSION_MAJOR(data->ocd_version),
                                   OBD_OCD_VERSION_MINOR(data->ocd_version),
                                   OBD_OCD_VERSION_PATCH(data->ocd_version),
                                   OBD_OCD_VERSION_FIX(data->ocd_version));
-                        data = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*data));
-                        data->ocd_connect_flags = OBD_CONNECT_VERSION;
-                        data->ocd_version = LUSTRE_VERSION_CODE;
+                        data = lustre_msg_buf(req->rq_repmsg, 0,
+                                              offsetof(typeof(*data),
+                                                       ocd_version) +
+                                              sizeof(data->ocd_version));
+                        if (data) {
+                                data->ocd_connect_flags = OBD_CONNECT_VERSION;
+                                data->ocd_version = LUSTRE_VERSION_CODE;
+                        }
                         GOTO(out, rc = -EPROTO);
                 }
         }
@@ -640,8 +644,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         if (!export) {
                 spin_unlock(&target->obd_dev_lock);
         } else if (req->rq_reqmsg->conn_cnt == 1) {
-                CERROR("%s reconnected with 1 conn_cnt; cookies not "
-                       "random?\n", cluuid.uuid);
+                CERROR("%s: NID %s (%s) reconnected with 1 conn_cnt; "
+                       "cookies not random?\n", target->obd_name,
+                       libcfs_nid2str(req->rq_peer.nid), cluuid.uuid);
                 GOTO(out, rc = -EALREADY);
         }
 
@@ -664,9 +669,10 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
 
         if (export == NULL) {
                 if (target->obd_recovering) {
-                        CERROR("%s: denying connection for new client %s: "
+                        CERROR("%s: denying connection for new client %s (%s): "
                                "%d clients in recovery for %lds\n",
-                               target->obd_name, cluuid.uuid,
+                               target->obd_name,
+                               libcfs_nid2str(req->rq_peer.nid), cluuid.uuid,
                                target->obd_recoverable_clients,
                                (target->obd_recovery_timer.expires-jiffies)/HZ);
                         rc = -EBUSY;
@@ -714,9 +720,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
 
         spin_lock_irqsave(&export->exp_lock, flags);
         if (export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
-                CERROR("%s: already connected at a higher conn_cnt: %d > %d\n",
-                       cluuid.uuid, export->exp_conn_cnt,
-                       req->rq_reqmsg->conn_cnt);
+                CERROR("%s: %s already connected at higher conn_cnt: %d > %d\n",
+                       cluuid.uuid, libcfs_nid2str(req->rq_peer.nid),
+                       export->exp_conn_cnt, req->rq_reqmsg->conn_cnt);
                 spin_unlock_irqrestore(&export->exp_lock, flags);
                 GOTO(out, rc = -EALREADY);
         }
@@ -764,7 +770,6 @@ out:
 
 int target_handle_disconnect(struct ptlrpc_request *req)
 {
-        struct obd_export *exp;
         int rc;
         ENTRY;
 
@@ -773,8 +778,7 @@ int target_handle_disconnect(struct ptlrpc_request *req)
                 RETURN(rc);
 
         /* keep the rq_export around so we can send the reply */
-        exp = class_export_get(req->rq_export);
-        req->rq_status = obd_disconnect(exp);
+        req->rq_status = obd_disconnect(class_export_get(req->rq_export));
         RETURN(0);
 }
 
@@ -811,7 +815,6 @@ static void target_release_saved_req(struct ptlrpc_request *req)
 static void target_finish_recovery(struct obd_device *obd)
 {
         struct list_head *tmp, *n;
-        int rc;
 
         CWARN("%s: sending delayed replies to recovered clients\n",
               obd->obd_name);
@@ -820,12 +823,9 @@ static void target_finish_recovery(struct obd_device *obd)
 
         /* when recovery finished, cleanup orphans on mds and ost */
         if (OBT(obd) && OBP(obd, postrecov)) {
-                rc = OBP(obd, postrecov)(obd);
-                if (rc >= 0)
-                        CWARN("%s: all clients recovered, %d MDS "
-                              "orphans deleted\n", obd->obd_name, rc);
-                else
-                        CWARN("postrecov failed %d\n", rc);
+                int rc = OBP(obd, postrecov)(obd);
+                CWARN("%s: recovery %s: rc %d\n", obd->obd_name,
+                      rc < 0 ? "failed" : "complete", rc);
         }
 
         list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
index 3c43c15..93a7aee 100644 (file)
@@ -1231,15 +1231,15 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
                 return;
         }
 
-        CDEBUG(level, "  -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n",
+        CDEBUG(level," -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n",
                lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
                pos, lock->l_pid);
         if (lock->l_conn_export != NULL)
                 obd = lock->l_conn_export->exp_obd;
         if (lock->l_export && lock->l_export->exp_connection) {
                 CDEBUG(level, "  Node: NID %s (rhandle: "LPX64")\n",
-                       libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid),
-                       lock->l_remote_handle.cookie);
+                     libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid),
+                     lock->l_remote_handle.cookie);
         } else if (obd == NULL) {
                 CDEBUG(level, "  Node: local\n");
         } else {
@@ -1252,9 +1252,10 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
                lock->l_resource->lr_name.name[0],
                lock->l_resource->lr_name.name[1]);
         CDEBUG(level, "  Req mode: %s, grant mode: %s, rc: %u, read: %d, "
-               "write: %d\n", ldlm_lockname[lock->l_req_mode],
+               "write: %d flags: %#x\n", ldlm_lockname[lock->l_req_mode],
                ldlm_lockname[lock->l_granted_mode],
-               atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers);
+               atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers,
+               lock->l_flags);
         if (lock->l_resource->lr_type == LDLM_EXTENT)
                 CDEBUG(level, "  Extent: "LPU64" -> "LPU64
                        " (req "LPU64"-"LPU64")\n",
index d59bfe9..25e042c 100644 (file)
@@ -393,6 +393,12 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock,
                                    libcfs_nid2str(peer.nid));
                         ldlm_lock_cancel(lock);
                         rc = -ERESTART;
+                } else if (lock->l_flags & LDLM_FL_CANCEL) {
+                        LDLM_DEBUG(lock, "%s AST timeout from nid %s, but "
+                                   "cancel was received (AST reply lost?)",
+                                   ast_type, libcfs_nid2str(peer.nid));
+                        ldlm_lock_cancel(lock);
+                        rc = -ERESTART;
                 } else {
                         l_lock(&lock->l_resource->lr_namespace->ns_lock);
                         ldlm_del_waiting_lock(lock);
index c96dd92..c90424a 100644 (file)
@@ -386,7 +386,7 @@ int ldlm_cli_enqueue(struct obd_export *exp,
                          sizeof(*body), "buflen[%d] = %d, not %d\n",
                          MDS_REQ_INTENT_LOCKREQ_OFF,
                          req->rq_reqmsg->buflens[MDS_REQ_INTENT_LOCKREQ_OFF],
-                         sizeof(*body));
+                         (int)sizeof(*body));
         }
 
         lock->l_conn_export = exp;
@@ -985,13 +985,14 @@ int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
         struct list_head *tmp, *next;
         struct ldlm_lock *lock;
         int rc = LDLM_ITER_CONTINUE;
-        struct ldlm_namespace *ns = res->lr_namespace;
+        struct ldlm_namespace *ns;
 
         ENTRY;
 
         if (!res)
                 RETURN(LDLM_ITER_CONTINUE);
 
+        ns = res->lr_namespace;
         l_lock(&ns->ns_lock);
         list_for_each_safe(tmp, next, &res->lr_granted) {
                 lock = list_entry(tmp, struct ldlm_lock, l_res_link);
index 68b5992..9321581 100644 (file)
@@ -308,9 +308,9 @@ void __liblustre_setup_(void)
                 printf("LibLustre: no mount target specified\n");
                 exit(1);
         }
-        printf("LibLustre: mount point %s, target %s\n",
-                lustre_path, target);
 
+        CDEBUG(D_CONFIG, "LibLustre: mount point %s, target %s\n",
+               lustre_path, target);
 
 #ifdef INIT_SYSIO
         /* initialize libsysio & mount rootfs */
index d8965c8..6427f5e 100644 (file)
@@ -255,4 +255,14 @@ static inline struct ext2_dirent *ext2_next_entry(struct ext2_dirent *p)
         return (struct ext2_dirent*)((char*) p + le16_to_cpu(p->rec_len));
 }
 
+static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb)
+{
+        struct intnl_stat *st = llu_i2stat(inode);
+        lvb->lvb_size = st->st_size;
+        lvb->lvb_blocks = st->st_blocks;
+        lvb->lvb_mtime = st->st_mtime;
+        lvb->lvb_atime = st->st_atime;
+        lvb->lvb_ctime = st->st_ctime;
+}
+
 #endif
index 17e54ec..b4689d5 100644 (file)
@@ -126,7 +126,7 @@ void liblustre_init_random()
         seed[0] = _my_pnid;
 #endif
         gettimeofday(&tv, NULL);
-        ll_srand(tv.tv_usec | __swab32(getpid()), tv.tv_sec|__swab32(seed[0]));
+        ll_srand(tv.tv_sec ^ __swab32(seed[0]), tv.tv_usec ^__swab32(getpid()));
 }
 
 void get_random_bytes(void *buf, int size)
index 3804c93..1f60bd5 100644 (file)
@@ -226,6 +226,7 @@ int llu_glimpse_size(struct inode *inode)
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
         struct lustre_handle lockh = { 0 };
+        struct ost_lvb lvb;
         int rc, flags = LDLM_FL_HAS_INTENT;
         ENTRY;
 
@@ -240,9 +241,13 @@ int llu_glimpse_size(struct inode *inode)
                 RETURN(rc > 0 ? -EIO : rc);
         }
 
-        st->st_size = lov_merge_size(lli->lli_smd, 0);
-        st->st_blocks = lov_merge_blocks(lli->lli_smd);
-        st->st_mtime = lov_merge_mtime(lli->lli_smd, st->st_mtime);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
+        st->st_size = lvb.lvb_size;
+        st->st_blocks = lvb.lvb_blocks;
+        st->st_mtime = lvb.lvb_mtime;
+        st->st_atime = lvb.lvb_atime;
+        st->st_ctime = lvb.lvb_ctime;
 
         CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
                (long long)st->st_size, (long long)st->st_blocks);
@@ -259,6 +264,7 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
 {
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         struct intnl_stat *st = llu_i2stat(inode);
+        struct ost_lvb lvb;
         int rc;
         ENTRY;
 
@@ -281,12 +287,17 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
         if (rc > 0)
                 rc = -EIO;
 
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 1);
         if (policy->l_extent.start == 0 &&
             policy->l_extent.end == OBD_OBJECT_EOF)
-                st->st_size = lov_merge_size(lsm, 1);
+                st->st_size = lvb.lvb_size;
 
-        if (rc == 0)
-                st->st_mtime = lov_merge_mtime(lsm, st->st_mtime);
+        if (rc == 0) {
+                st->st_mtime = lvb.lvb_mtime;
+                st->st_atime = lvb.lvb_atime;
+                st->st_ctime = lvb.lvb_ctime;
+        }
 
         RETURN(rc);
 }
@@ -566,6 +577,7 @@ ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
         struct obd_export *exp = NULL;
         struct llu_io_group *iogroup;
         struct lustre_rw_params p;
+        struct ost_lvb lvb;
         __u64 kms;
         int err, is_read, iovidx, ret;
         int local_lock;
@@ -608,7 +620,9 @@ ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
                  * date, and, hence, cannot be used for short-read
                  * detection. Rely in OST to handle short reads in that case.
                  */
-                kms = lov_merge_size(lsm, 1);
+                inode_init_lvb(inode, &lvb);
+                obd_merge_lvb(exp, lsm, &lvb, 1);
+                kms = lvb.lvb_size;
                 /* extent.end is last byte of the range */
                 if (p.lrp_policy.l_extent.end >= kms) {
                         /* A glimpse is necessary to determine whether
@@ -773,6 +787,10 @@ static int llu_file_rwx(struct inode *ino,
 int llu_iop_read(struct inode *ino,
                  struct ioctx *ioctx)
 {
+        /* BUG: 5972 */
+        struct intnl_stat *st = llu_i2stat(ino);
+        st->st_atime = CURRENT_TIME;
+
         return llu_file_rwx(ino, ioctx, 1);
 }
 
index ee8ee37..6ab4ece 100644 (file)
@@ -149,11 +149,14 @@ void llu_update_inode(struct inode *inode, struct mds_body *body,
 
         if (body->valid & OBD_MD_FLID)
                 st->st_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(st->st_atime) = body->atime;
-        if (body->valid & OBD_MD_FLMTIME)
+        if (body->valid & OBD_MD_FLATIME &&
+            body->mtime > LTIME_S(st->st_mtime))
                 LTIME_S(st->st_mtime) = body->mtime;
-        if (body->valid & OBD_MD_FLCTIME)
+        if (body->valid & OBD_MD_FLMTIME &&
+            body->atime > LTIME_S(st->st_atime))
+                LTIME_S(st->st_atime) = body->atime;
+        if (body->valid & OBD_MD_FLCTIME &&
+            body->ctime > LTIME_S(st->st_ctime))
                 LTIME_S(st->st_ctime) = body->ctime;
         if (body->valid & OBD_MD_FLMODE)
                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
@@ -1887,8 +1890,7 @@ static struct inode_ops llu_inode_ops = {
         inop_lookup:    llu_iop_lookup,
         inop_getattr:   llu_iop_getattr,
         inop_setattr:   llu_iop_setattr,
-      //FIXME corresponding libsysio is tagged b_release_1_4_6
-        //  inop_filldirentries:     llu_iop_filldirentries,
+        inop_filldirentries:     llu_iop_filldirentries,
         inop_mkdir:     llu_iop_mkdir_raw,
         inop_rmdir:     llu_iop_rmdir_raw,
         inop_symlink:   llu_iop_symlink_raw,
index fa98f39..566a4c9 100644 (file)
@@ -1035,6 +1035,48 @@ int t51(char *name)
         printf("\n");
         LEAVE();
 }
+/*
+ * check atime update during read
+ */
+int t52(char *name)
+{
+        char file[MAX_PATH_LENGTH] = "";
+        char buf[16];
+        struct stat statbuf;
+        time_t atime;
+        time_t diff;
+        int fd, i;
+
+        ENTRY("atime should be updated during read");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t52_file", lustre_path);
+
+        t_echo_create(file, "check atime update during read");
+        fd = open(file, O_RDONLY);
+        if (fd < 0) {
+                printf("\nerror open file: %s\n", strerror(errno));
+                return(-1);
+        }
+        stat(file, &statbuf);
+        printf("st_atime=%s", ctime(&statbuf.st_atime));
+        atime = statbuf.st_atime;
+        for (i = 0; i < 3; i++) {
+                sleep(2);
+                read(fd, buf, sizeof(buf));
+                stat(file, &statbuf);
+                printf("st_atime=%s", ctime(&statbuf.st_atime));
+                diff = statbuf.st_atime - atime;
+                if (diff <= 0) {
+                        printf("atime doesn't updated! failed!\n");
+                        close(fd);
+                        t_unlink(file);
+                        return -1;
+                }       
+                atime = statbuf.st_atime; 
+        }
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
 
 extern void __liblustre_setup_(void);
 extern void __liblustre_cleanup_(void);
index c7b32c2..8102e50 100644 (file)
@@ -488,11 +488,11 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 int rc, lmmsize;
 
                 ll_inode2fid(&fid, inode);
-                
+
                 rc = ll_get_max_mdsize(sbi, &lmmsize);
-                if (rc) 
-                        RETURN(rc); 
-                
+                if (rc)
+                        RETURN(rc);
+
                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLDIREA,
                                  lmmsize, &request);
                 if (rc < 0) {
@@ -531,6 +531,8 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 ptlrpc_req_finished(request);
                 return rc;
         }
+        case LL_IOC_OBD_STATFS:
+                RETURN(ll_obd_statfs(inode, (void *)arg));
         case IOC_MDC_GETFILEINFO:
         case IOC_MDC_GETSTRIPE: {
                 struct ptlrpc_request *request = NULL;
index a5a164b..063eed3 100644 (file)
@@ -671,7 +671,7 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
         return rc;
 }
 
-/* NB: lov_merge_size will prefer locally cached writes if they extend the
+/* NB: obd_merge_lvb will prefer locally cached writes if they extend the
  * file (because it prefers KMS over RSS when larger) */
 int ll_glimpse_size(struct inode *inode, int ast_flags)
 {
@@ -679,6 +679,7 @@ int ll_glimpse_size(struct inode *inode, int ast_flags)
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
         struct lustre_handle lockh = { 0 };
+        struct ost_lvb lvb;
         int rc;
         ENTRY;
 
@@ -705,11 +706,14 @@ int ll_glimpse_size(struct inode *inode, int ast_flags)
         }
 
         ll_inode_size_lock(inode, 1);
-        inode->i_size = lov_merge_size(lli->lli_smd, 0);
-        inode->i_blocks = lov_merge_blocks(lli->lli_smd);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(sbi->ll_osc_exp, lli->lli_smd, &lvb, 0);
+        inode->i_size = lvb.lvb_size;
+        inode->i_blocks = lvb.lvb_blocks;
+        LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+        LTIME_S(inode->i_atime) = lvb.lvb_atime;
+        LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
         ll_inode_size_unlock(inode, 1);
-        LTIME_S(inode->i_mtime) =
-                lov_merge_mtime(lli->lli_smd, LTIME_S(inode->i_mtime));
 
         CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n",
                inode->i_size, inode->i_blocks);
@@ -725,6 +729,7 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
                    int ast_flags)
 {
         struct ll_sb_info *sbi = ll_i2sbi(inode);
+        struct ost_lvb lvb;
         int rc;
         ENTRY;
 
@@ -750,6 +755,10 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
         if (rc > 0)
                 rc = -EIO;
 
+        ll_inode_size_lock(inode, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
+
         if (policy->l_extent.start == 0 &&
             policy->l_extent.end == OBD_OBJECT_EOF) {
                 /* vmtruncate()->ll_truncate() first sets the i_size and then
@@ -762,14 +771,16 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
                  * cancel the result of the truncate.  Getting the
                  * ll_inode_size_lock() after the enqueue maintains the DLM
                  * -> ll_inode_size_lock() acquiring order. */
-                ll_inode_size_lock(inode, 1);
-                inode->i_size = lov_merge_size(lsm, 1);
-                ll_inode_size_unlock(inode, 1);
+                inode->i_size = lvb.lvb_size;
         }
 
-        if (rc == 0)
-                LTIME_S(inode->i_mtime) =
-                        lov_merge_mtime(lsm, LTIME_S(inode->i_mtime));
+        if (rc == 0) {
+                LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+                LTIME_S(inode->i_atime) = lvb.lvb_atime;
+                LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
+        }
+        ll_inode_size_unlock(inode, 1);
+
         RETURN(rc);
 }
 
@@ -799,6 +810,7 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct ll_lock_tree tree;
         struct ll_lock_tree_node *node;
+        struct ost_lvb lvb;
         struct ll_ra_read bead;
         int rc;
         ssize_t retval;
@@ -868,7 +880,9 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
          * ll_inode_size_lock(). This guarantees that short reads are handled
          * correctly in the face of concurrent writes and truncates.
          */
-        kms = lov_merge_size(lsm, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2sbi(inode)->ll_osc_exp, lsm, &lvb, 1);
+        kms = lvb.lvb_size;
         if (*ppos + count - 1 > kms) {
                 /* A glimpse is necessary to determine whether we return a
                  * short read (B) or some zeroes at the end of the buffer (C) */
@@ -894,6 +908,8 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
         bead.lrr_start = *ppos >> CFS_PAGE_SHIFT;
         bead.lrr_count = (count + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
         ll_ra_read_in(file, &bead);
+        /* BUG: 5972 */
+        file_accessed(file);
         retval = generic_file_read(file, buf, count, ppos);
         ll_ra_read_ex(file, &bead);
 
@@ -1163,7 +1179,7 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
 }
 
 static int ll_get_grouplock(struct inode *inode, struct file *file,
-                         unsigned long arg)
+                            unsigned long arg)
 {
         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
         ldlm_policy_data_t policy = { .l_extent = { .start = 0,
@@ -1194,7 +1210,7 @@ static int ll_get_grouplock(struct inode *inode, struct file *file,
 }
 
 static int ll_put_grouplock(struct inode *inode, struct file *file,
-                         unsigned long arg)
+                            unsigned long arg)
 {
         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
         struct ll_inode_info *lli = ll_i2info(inode);
@@ -1455,7 +1471,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                 RETURN(ll_iocontrol(inode, file, cmd, arg));
         case EXT3_IOC_GETVERSION_OLD:
         case EXT3_IOC_GETVERSION:
-                RETURN(put_user(inode->i_generation, (int *) arg));
+                RETURN(put_user(inode->i_generation, (int *)arg));
         case LL_IOC_JOIN: {
                 char *ftail;
                 int rc;
@@ -1471,6 +1487,8 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                 RETURN(ll_get_grouplock(inode, file, arg));
         case LL_IOC_GROUP_UNLOCK:
                 RETURN(ll_put_grouplock(inode, file, arg));
+        case LL_IOC_OBD_STATFS:
+                RETURN(ll_obd_statfs(inode, (void *)arg));
 
         /* We need to special case any other ioctls we want to handle,
          * to send them to the MDS/OST as appropriate and to properly
index 8fd5a34..5239302 100644 (file)
@@ -426,6 +426,7 @@ void lustre_dump_dentry(struct dentry *, int recur);
 void lustre_dump_inode(struct inode *);
 struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
                                              struct list_head *list);
+int ll_obd_statfs(struct inode *inode, void *arg);
 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 
 /* llite/llite_nfs.c */
index 3f0067b..302bd88 100644 (file)
@@ -474,7 +474,6 @@ void client_common_put_super(struct super_block *sb)
         obd_disconnect(sbi->ll_mdc_exp);
 
         lustre_throw_orphan_dentries(sb);
-
         EXIT;
 }
 
@@ -575,8 +574,6 @@ void ll_options(char *options, int *flags)
                         continue;
                 }
         }
-        CERROR("flags %#x\n", *flags);
-
         EXIT;
 }
                 
@@ -1220,7 +1217,8 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
 
         if (body->valid & OBD_MD_FLID)
                 inode->i_ino = body->ino;
-        if (body->valid & OBD_MD_FLATIME)
+        if (body->valid & OBD_MD_FLATIME &&
+            body->atime > LTIME_S(inode->i_atime))
                 LTIME_S(inode->i_atime) = body->atime;
         if (body->valid & OBD_MD_FLMTIME &&
             body->mtime > LTIME_S(inode->i_mtime)) {
@@ -1565,6 +1563,72 @@ struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi,
         return NULL;
 }
 
+int ll_obd_statfs(struct inode *inode, void *arg)
+{
+        struct ll_sb_info *sbi = NULL;
+        struct obd_device *client_obd = NULL, *lov_obd = NULL;
+        struct lov_obd *lov = NULL;
+        struct obd_import *client_imp = NULL;
+        struct obd_statfs stat_buf = {0};
+        char *buf = NULL;
+        struct obd_ioctl_data *data = NULL;
+        __u32 type, index;
+        int len, rc;
+
+        if (!inode || !(sbi = ll_i2sbi(inode)))
+                GOTO(out_statfs, rc = -EINVAL);
+
+        rc = obd_ioctl_getdata(&buf, &len, arg);
+        if (rc)
+                GOTO(out_statfs, rc);
+
+        data = (void*)buf;
+        if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
+            !data->ioc_pbuf1 || !data->ioc_pbuf2)
+                GOTO(out_statfs, rc = -EINVAL);
+
+        memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
+        memcpy(&index, data->ioc_inlbuf2, sizeof(__u32));
+
+        if (type == LL_STATFS_MDC) {
+                if (index > 0)
+                        GOTO(out_statfs, rc = -ENODEV);
+                client_obd = class_exp2obd(sbi->ll_mdc_exp);
+                client_imp = class_exp2cliimp(sbi->ll_mdc_exp);
+        } else if (type == LL_STATFS_LOV) {
+                lov_obd = class_exp2obd(sbi->ll_osc_exp);
+                lov = &lov_obd->u.lov;
+
+                if (index >= lov->desc.ld_tgt_count)
+                        GOTO(out_statfs, rc = -ENODEV);
+
+                client_obd = class_exp2obd(lov->tgts[index].ltd_exp);
+                client_imp = class_exp2cliimp(lov->tgts[index].ltd_exp);
+                if (!lov->tgts[index].active)
+                        GOTO(out_uuid, rc = -ENODATA);
+        }
+
+        if (!client_obd || !client_imp)
+                GOTO(out_statfs, rc = -EINVAL);
+
+        rc = obd_statfs(client_obd, &stat_buf, jiffies - 1);
+        if (rc)
+                GOTO(out_statfs, rc);
+
+        if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1))
+                GOTO(out_statfs, rc = -EFAULT);
+
+out_uuid:
+        if (copy_to_user(data->ioc_pbuf2, &client_imp->imp_target_uuid,
+                         data->ioc_plen2))
+                rc = -EFAULT;
+
+out_statfs:
+        if (buf)
+                obd_ioctl_freedata(buf, len);
+        return rc;
+}
+
 EXPORT_SYMBOL(ll_fill_super);
 EXPORT_SYMBOL(ll_put_super);
 EXPORT_SYMBOL(ll_remount_fs);
index 18c122d..2e77ba8 100644 (file)
@@ -368,6 +368,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
         struct page *page = NULL;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm;
+        struct ost_lvb lvb;
         __u64 kms, old_mtime;
         unsigned long pgoff, size, rand_read, seq_read;
         int rc = 0;
@@ -397,7 +398,9 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
                 CWARN("binary changed. inode %lu\n", inode->i_ino);
 
         lov_stripe_lock(lsm);
-        kms = lov_merge_size(lsm, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
+        kms = lvb.lvb_size;
 
         pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
         size = (kms + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
index 84c3229..81c4eda 100644 (file)
@@ -111,6 +111,7 @@ void ll_truncate(struct inode *inode)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
+        struct ost_lvb lvb;
         struct obdo oa;
         int rc;
         ENTRY;
@@ -133,7 +134,9 @@ void ll_truncate(struct inode *inode)
         /* XXX I'm pretty sure this is a hack to paper over a more fundamental
          * race condition. */
         lov_stripe_lock(lsm);
-        if (lov_merge_size(lsm, 0) == inode->i_size) {
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 0);
+        if (lvb.lvb_size == inode->i_size) {
                 CDEBUG(D_VFSTRACE, "skipping punch for obj "LPX64", %Lu=%#Lx\n",
                        lsm->lsm_object_id, inode->i_size, inode->i_size);
                 lov_stripe_unlock(lsm);
@@ -195,7 +198,7 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from,
         obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
         struct brw_page pga;
         struct obdo oa;
-        __u64 kms;
+        struct ost_lvb lvb;
         int rc = 0;
         ENTRY;
 
@@ -235,11 +238,12 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from,
          * locking will have updated the KMS, and for our purposes here we can
          * treat it like i_size. */
         lov_stripe_lock(lsm);
-        kms = lov_merge_size(lsm, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 0);
         lov_stripe_unlock(lsm);
-        if (kms <= offset) {
+        if (lvb.lvb_size <= offset) {
                 LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
-                               kms, offset);
+                               lvb.lvb_size, offset);
                 memset(kmap(page), 0, PAGE_SIZE);
                 kunmap(page);
                 GOTO(prepare_done, rc = 0);
@@ -270,7 +274,8 @@ static int ll_ap_make_ready(void *data, int cmd)
         llap = LLAP_FROM_COOKIE(data);
         page = llap->llap_page;
 
-        LASSERT(!(cmd & OBD_BRW_READ));
+        LASSERTF(!(cmd & OBD_BRW_READ), "cmd %x page %p ino %lu index %lu\n", cmd, page,
+                 page->mapping->host->i_ino, page->index);
 
         /* we're trying to write, but the page is locked.. come back later */
         if (TryLockPage(page))
@@ -312,6 +317,7 @@ static int ll_ap_refresh_count(void *data, int cmd)
         struct lov_stripe_md *lsm;
         struct page *page;
         struct inode *inode;
+        struct ost_lvb lvb;
         __u64 kms;
         ENTRY;
 
@@ -325,7 +331,9 @@ static int ll_ap_refresh_count(void *data, int cmd)
         lsm = lli->lli_smd;
 
         lov_stripe_lock(lsm);
-        kms = lov_merge_size(lsm, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
+        kms = lvb.lvb_size;
         lov_stripe_unlock(lsm);
 
         /* catch race with truncate */
@@ -1037,13 +1045,16 @@ static int ll_readahead(struct ll_readahead_state *ras,
         struct inode *inode;
         struct lov_stripe_md *lsm;
         struct ll_ra_read *bead;
+        struct ost_lvb lvb;
         ENTRY;
 
         inode = mapping->host;
         lsm = ll_i2info(inode)->lli_smd;
 
         lov_stripe_lock(lsm);
-        kms = lov_merge_size(lsm, 1);
+        inode_init_lvb(inode, &lvb);
+        obd_merge_lvb(ll_i2obdexp(inode), lsm, &lvb, 1);
+        kms = lvb.lvb_size;
         lov_stripe_unlock(lsm);
         if (kms == 0) {
                 ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
index 5dff3ba..68c762f 100644 (file)
@@ -113,6 +113,8 @@ static inline void lov_llh_put(struct lov_lock_handles *llh)
 /* lov_merge.c */
 void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
                      struct lov_stripe_md *lsm, int stripeno, int *set);
+int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm,
+                  struct ost_lvb *lvb, int kms_only);
 int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
                    obd_off size, int shrink);
 
index 4b1ff36..d874fed 100644 (file)
 
 #include "lov_internal.h"
 
-/* Merge rss if kms == 0
+/* Merge the lock value block(&lvb) attributes from each of the stripes in a
+ * file into a single lvb. It is expected that the caller initializes the
+ * current atime, mtime, ctime to avoid regressing a more uptodate time on 
+ * the local client.
  *
- * Even when merging RSS, we will take the KMS value if it's larger.
- * This prevents getattr from stomping on dirty cached pages which
- * extend the file size. */
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms)
+ * If @kms_only is set then we do not consider the recently seen size (rss)
+ * when updating the known minimum size (kms).  Even when merging RSS, we will
+ * take the KMS value if it's larger.  This prevents getattr from stomping on
+ * dirty cached pages which extend the file size. */
+int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm,
+                  struct ost_lvb *lvb, int kms_only)
 {
         struct lov_oinfo *loi;
         __u64 size = 0;
+        __u64 blocks = 0;
+        __u64 current_mtime = lvb->lvb_mtime;
+        __u64 current_atime = lvb->lvb_atime;
+        __u64 current_ctime = lvb->lvb_ctime;
         int i;
 
         LASSERT_SPIN_LOCKED(&lsm->lsm_lock);
@@ -59,42 +68,29 @@ __u64 lov_merge_size(struct lov_stripe_md *lsm, int kms)
                 obd_size lov_size, tmpsize;
 
                 tmpsize = loi->loi_kms;
-                if (kms == 0 && loi->loi_rss > tmpsize)
-                        tmpsize = loi->loi_rss;
+                if (kms_only == 0 && loi->loi_lvb.lvb_size > tmpsize)
+                        tmpsize = loi->loi_lvb.lvb_size;
 
                 lov_size = lov_stripe_size(lsm, tmpsize, i);
                 if (lov_size > size)
                         size = lov_size;
+                /* merge blocks, mtime, atime */ 
+                blocks += loi->loi_lvb.lvb_blocks;
+                if (loi->loi_lvb.lvb_mtime > current_mtime)
+                        current_mtime = loi->loi_lvb.lvb_mtime;
+                if (loi->loi_lvb.lvb_atime > current_atime)
+                        current_atime = loi->loi_lvb.lvb_atime;
+                if (loi->loi_lvb.lvb_ctime > current_ctime)
+                        current_ctime = loi->loi_lvb.lvb_ctime;
         }
 
-        return size;
-}
-EXPORT_SYMBOL(lov_merge_size);
-
-/* Merge blocks */
-__u64 lov_merge_blocks(struct lov_stripe_md *lsm)
-{
-        struct lov_oinfo *loi;
-        __u64 blocks = 0;
-        int i;
-
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
-                blocks += loi->loi_blocks;
-        return blocks;
-}
-EXPORT_SYMBOL(lov_merge_blocks);
-
-__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time)
-{
-        struct lov_oinfo *loi;
-        int i;
-
-        for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
-                if (loi->loi_mtime > current_time)
-                        current_time = loi->loi_mtime;
-        return current_time;
+        lvb->lvb_size = size;
+        lvb->lvb_blocks = blocks;
+        lvb->lvb_mtime = current_mtime; 
+        lvb->lvb_atime = current_atime; 
+        lvb->lvb_ctime = current_ctime; 
+        RETURN(0);
 }
-EXPORT_SYMBOL(lov_merge_mtime);
 
 /* Must be called under the lov_stripe_lock() */
 int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
@@ -115,7 +111,7 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
                 for (loi = lsm->lsm_oinfo; stripe < lsm->lsm_stripe_count;
                      stripe++, loi++) {
                         kms = lov_size_to_stripe(lsm, size, stripe);
-                        loi->loi_kms = loi->loi_rss = kms;
+                        loi->loi_kms = loi->loi_lvb.lvb_size = kms;
                         CDEBUG(D_INODE,
                                "stripe %d KMS %sing "LPU64"->"LPU64"\n",
                                stripe, kms > loi->loi_kms ? "increas":"shrink",
index 90f2857..ae4bb27 100644 (file)
@@ -287,17 +287,15 @@ static int lov_disconnect(struct obd_export *exp)
         int i, rc;
         ENTRY;
 
-        rc = class_disconnect(exp);
-
         if (!lov->tgts)
-                RETURN(rc);
+                goto out;
 
         /* Only disconnect the underlying layers on the final disconnect. */
         lov->connects--;
         if (lov->connects != 0) {
                 /* why should there be more than 1 connect? */
                 CERROR("disconnect #%d\n", lov->connects);
-                RETURN(rc);
+                goto out;
         }
 
         /* Let's hold another reference so lov_del_obd doesn't spin through
@@ -311,6 +309,8 @@ static int lov_disconnect(struct obd_export *exp)
         }
         lov_putref(obd);
 
+out:
+        rc = class_disconnect(exp); /* bz 9811 */
         RETURN(rc);
 }
 
@@ -2152,7 +2152,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
                         struct ldlm_lock *lock;
                         struct lov_stripe_md *lsm;
                 } *data = key;
-                 struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name;
+                struct ldlm_res_id *res_id = &data->lock->l_resource->lr_name;
                 struct lov_oinfo *loi;
                 __u32 *stripe = val;
 
@@ -2442,6 +2442,7 @@ struct obd_ops lov_obd_ops = {
         .o_queue_group_io      = lov_queue_group_io,
         .o_trigger_group_io    = lov_trigger_group_io,
         .o_teardown_async_page = lov_teardown_async_page,
+        .o_merge_lvb           = lov_merge_lvb,
         .o_adjust_kms          = lov_adjust_kms,
         .o_punch               = lov_punch,
         .o_sync                = lov_sync,
index a39540f..bde768b 100644 (file)
@@ -165,12 +165,17 @@ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
 
                 req->rq_buflen = sizeof(*req->rq_md);
                 OBD_ALLOC(req->rq_md, req->rq_buflen);
-                if (req->rq_md == NULL)
+                if (req->rq_md == NULL) {
+                        OBD_FREE_PTR(req);
                         GOTO(out, rc = -ENOMEM);
+                }
 
                 req->rq_oa = obdo_alloc();
-                if (req->rq_oa == NULL)
+                if (req->rq_oa == NULL) {
+                        OBD_FREE_PTR(req->rq_md);
+                        OBD_FREE_PTR(req);
                         GOTO(out, rc = -ENOMEM);
+                }
 
                 req->rq_idx = ost_idx;
                 req->rq_stripe = i;
@@ -217,5 +222,6 @@ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
                 rc = 0;
         }
 out:
+
         RETURN(rc);
 }
index c8f254d..b6b4b62 100644 (file)
@@ -129,26 +129,24 @@ int lov_update_enqueue_set(struct lov_request_set *set,
          * can be addressed then. */
         if (rc == ELDLM_OK) {
                 struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
-                __u64 tmp = req->rq_md->lsm_oinfo->loi_rss;
+                __u64 tmp = req->rq_md->lsm_oinfo->loi_lvb.lvb_size;
 
                 LASSERT(lock != NULL);
                 lov_stripe_lock(set->set_md);
-                loi->loi_rss = tmp;
-                loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
-                loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+                loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb;
                 /* Extend KMS up to the end of this lock and no further
                  * A lock on [x,y] means a KMS of up to y + 1 bytes! */
                 if (tmp > lock->l_policy_data.l_extent.end)
                         tmp = lock->l_policy_data.l_extent.end + 1;
                 if (tmp >= loi->loi_kms) {
-                        LDLM_DEBUG(lock, "lock acquired, setting rss="
-                                   LPU64", kms="LPU64, loi->loi_rss, tmp);
+                        LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64
+                                   ", kms="LPU64, loi->loi_lvb.lvb_size, tmp);
                         loi->loi_kms = tmp;
                         loi->loi_kms_valid = 1;
                 } else {
                         LDLM_DEBUG(lock, "lock acquired, setting rss="
                                    LPU64"; leaving kms="LPU64", end="LPU64,
-                                   loi->loi_rss, loi->loi_kms,
+                                   loi->loi_lvb.lvb_size, loi->loi_kms,
                                    lock->l_policy_data.l_extent.end);
                 }
                 lov_stripe_unlock(set->set_md);
@@ -157,12 +155,10 @@ int lov_update_enqueue_set(struct lov_request_set *set,
         } else if (rc == ELDLM_LOCK_ABORTED && flags & LDLM_FL_HAS_INTENT) {
                 memset(lov_lockhp, 0, sizeof(*lov_lockhp));
                 lov_stripe_lock(set->set_md);
-                loi->loi_rss = req->rq_md->lsm_oinfo->loi_rss;
-                loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
-                loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+                loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb;
                 lov_stripe_unlock(set->set_md);
                 CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
-                       " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
+                       " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms);
                 rc = ELDLM_OK;
         } else {
                 struct obd_export *exp = set->set_exp;
@@ -293,10 +289,8 @@ int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm,
                 req->rq_md->lsm_object_id = loi->loi_id;
                 req->rq_md->lsm_stripe_count = 0;
                 req->rq_md->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
-                req->rq_md->lsm_oinfo->loi_rss = loi->loi_rss;
                 req->rq_md->lsm_oinfo->loi_kms = loi->loi_kms;
-                req->rq_md->lsm_oinfo->loi_blocks = loi->loi_blocks;
-                req->rq_md->lsm_oinfo->loi_mtime = loi->loi_mtime;
+                req->rq_md->lsm_oinfo->loi_lvb = loi->loi_lvb;
 
                 lov_set_add_req(req, set);
         }
@@ -417,10 +411,10 @@ int lov_fini_cancel_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
 
+        LASSERT(set->set_exp);
         if (set->set_lockh)
                 lov_llh_put(set->set_lockh);
 
@@ -594,9 +588,9 @@ int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 rc = create_done(set->set_exp, set, lsmp);
                 /* FIXME update qos data here */
@@ -776,7 +770,7 @@ static int brw_done(struct lov_request_set *set)
                 loi = &lsm->lsm_oinfo[req->rq_stripe];
 
                 if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS)
-                        loi->loi_blocks = req->rq_oa->o_blocks;
+                        loi->loi_lvb.lvb_blocks = req->rq_oa->o_blocks;
         }
 
         RETURN(0);
@@ -787,9 +781,9 @@ int lov_fini_brw_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 rc = brw_done(set);
                 /* FIXME update qos data here */
@@ -917,9 +911,9 @@ int lov_fini_getattr_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes)
                 rc = common_attr_done(set);
 
@@ -985,9 +979,9 @@ int lov_fini_destroy_set(struct lov_request_set *set)
 {
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 /* FIXME update qos data here */
         }
@@ -1064,9 +1058,9 @@ int lov_fini_setattr_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 rc = common_attr_done(set);
                 /* FIXME update qos data here */
@@ -1141,6 +1135,7 @@ int lov_update_setattr_set(struct lov_request_set *set,
                            struct lov_request *req, int rc)
 {
         struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+        struct lov_stripe_md *lsm = set->set_md;
         ENTRY;
 
         lov_update_set(set, req, rc);
@@ -1151,10 +1146,17 @@ int lov_update_setattr_set(struct lov_request_set *set,
 
         /* FIXME: LOV STACKING update loi data should be done by OSC *
          * when this is gone we can go back to using lov_update_common_set() */
-        if (rc == 0 && req->rq_oa->o_valid & OBD_MD_FLMTIME)
-                set->set_md->lsm_oinfo[req->rq_stripe].loi_mtime =
-                        req->rq_oa->o_mtime;
-        /* ditto loi_atime, loi_ctime when available */
+        if (rc == 0) {
+                if (req->rq_oa->o_valid & OBD_MD_FLMTIME)
+                        lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_ctime =
+                                req->rq_oa->o_ctime;
+                if (req->rq_oa->o_valid & OBD_MD_FLMTIME)
+                        lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_mtime =
+                                req->rq_oa->o_mtime;
+                if (req->rq_oa->o_valid & OBD_MD_FLATIME)
+                        lsm->lsm_oinfo[req->rq_stripe].loi_lvb.lvb_atime =
+                                req->rq_oa->o_atime;
+        }
 
         RETURN(rc);
 }
@@ -1177,9 +1179,9 @@ int lov_fini_punch_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 if (!set->set_success)
                         rc = -EIO;
@@ -1258,9 +1260,9 @@ int lov_fini_sync_set(struct lov_request_set *set)
         int rc = 0;
         ENTRY;
 
-        LASSERT(set->set_exp);
         if (set == NULL)
                 RETURN(0);
+        LASSERT(set->set_exp);
         if (set->set_completes) {
                 if (!set->set_success)
                         rc = -EIO;
index 9284843..31478fa 100644 (file)
@@ -485,7 +485,7 @@ static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file,
 }
 
 static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
-                              void *lmm, int lmm_size)
+                              void *lmm, int lmm_size, const char *name)
 {
         int rc;
 
@@ -497,7 +497,7 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
 
         lock_24kernel();
         rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED,
-                                   XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size, 0);
+                                   name, lmm, lmm_size, 0);
 
         unlock_24kernel();
 
@@ -508,7 +508,8 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
 }
 
 /* Must be called with i_sem held */
-static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
+static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size,
+                              const char *name)
 {
         int rc;
 
@@ -516,7 +517,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
         lock_24kernel();
 
         rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED,
-                            XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size);
+                            name, lmm, lmm_size);
         unlock_24kernel();
 
         /* This gives us the MD size */
@@ -525,7 +526,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
 
         if (rc < 0) {
                 CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n",
-                       EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA,
+                       EXT3_XATTR_INDEX_TRUSTED, name,
                        inode->i_ino, rc);
                 memset(lmm, 0, lmm_size);
                 return (rc == -ENODATA) ? 0 : rc;
index b9f987a..68a049e 100644 (file)
@@ -125,7 +125,7 @@ static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle,
 }
 
 static int fsfilt_reiserfs_set_md(struct inode *inode, void *handle,
-                                  void *lmm, int lmm_size)
+                                  void *lmm, int lmm_size, const char *name)
 {
         /* XXX write stripe data into MDS file itself */
         CERROR("not implemented yet\n");
@@ -133,7 +133,8 @@ static int fsfilt_reiserfs_set_md(struct inode *inode, void *handle,
         return -ENOSYS;
 }
 
-static int fsfilt_reiserfs_get_md(struct inode *inode, void *lmm, int lmm_size)
+static int fsfilt_reiserfs_get_md(struct inode *inode, void *lmm, int lmm_size,
+                                  const char *name)
 {
         if (lmm == NULL)
                 return inode->i_size;
index 313950c..b88e508 100644 (file)
@@ -609,43 +609,6 @@ static void mdc_commit_close(struct ptlrpc_request *req)
         spin_unlock(&open_req->rq_lock);
 }
 
-static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc)
-{
-        union ptlrpc_async_args *aa = data;
-        struct mdc_rpc_lock *rpc_lock;
-        struct obd_device *obd = aa->pointer_arg[1];
-        unsigned long flags;
-
-        spin_lock_irqsave(&req->rq_lock, flags);
-        rpc_lock = aa->pointer_arg[0];
-        aa->pointer_arg[0] = NULL;
-        spin_unlock_irqrestore(&req->rq_lock, flags);
-
-        if (rpc_lock == NULL) {
-                CERROR("called with NULL rpc_lock\n");
-        } else {
-                LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n",
-                         rpc_lock, obd->u.cli.cl_rpc_lock);
-                mdc_put_rpc_lock(rpc_lock, NULL);
-        }
-        wake_up(&req->rq_reply_waitq);
-        RETURN(rc);
-}
-
-/* We can't use ptlrpc_check_reply, because we don't want to wake up for
- * anything but a reply or an error. */
-static int mdc_close_check_reply(struct ptlrpc_request *req)
-{
-        int rc = 0;
-        unsigned long flags;
-
-        spin_lock_irqsave(&req->rq_lock, flags);
-        if (req->rq_async_args.pointer_arg[0] == NULL)
-                rc = 1;
-        spin_unlock_irqrestore (&req->rq_lock, flags);
-        return rc;
-}
-
 int mdc_close(struct obd_export *exp, struct obdo *oa,
               struct obd_client_handle *och, struct ptlrpc_request **request)
 {
@@ -656,7 +619,6 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
                               obd->u.cli.cl_max_mds_cookiesize};
         struct ptlrpc_request *req;
         struct mdc_open_data *mod;
-        struct l_wait_info lwi;
         ENTRY;
 
         req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
@@ -690,21 +652,10 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
         LASSERT(req->rq_cb_data == NULL);
         req->rq_cb_data = mod;
 
-        CDEBUG(D_HA, "close req->rep_len %d mdsize %d cookiesize %d\n",
-               req->rq_replen,
-               obd->u.cli.cl_max_mds_easize, obd->u.cli.cl_max_mds_cookiesize);
-
-        /* We hand a ref to the rpcd here, so we need another one of our own. */
-        ptlrpc_request_addref(req);
-
         mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
-        req->rq_interpret_reply = mdc_close_interpret;
-        req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock;
-        req->rq_async_args.pointer_arg[1] = obd;
-        ptlrpcd_add_req(req);
-        lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL);
-        rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req),
-                          &lwi);
+        rc = ptlrpc_queue_wait(req);
+        mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+
         if (req->rq_repmsg == NULL) {
                 CDEBUG(D_HA, "request failed to send: %p, %d\n", req,
                        req->rq_status);
@@ -727,14 +678,10 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
                         rc = -EPROTO;
                 }
         }
-        if (req->rq_async_args.pointer_arg[0] != NULL) {
-                CERROR("returned without dropping rpc_lock: rc %d\n", rc);
-                mdc_close_interpret(req, &req->rq_async_args, rc);
-        }
 
         EXIT;
- out:
         *request = req;
+ out:
         return rc;
 }
 
index 398eabe..59f59dd 100644 (file)
@@ -493,7 +493,7 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
 
         if (lock)
                 down(&inode->i_sem);
-        rc = fsfilt_get_md(obd, inode, md, *size);
+        rc = fsfilt_get_md(obd, inode, md, *size, "lov");
 
         if (rc < 0) {
                 CERROR("Error %d reading eadata for ino %lu\n",
@@ -708,7 +708,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
         if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) ||
             (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) {
                 down(&inode->i_sem);
-                rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
+                rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0,
+                                   "lov");
                 up(&inode->i_sem);
                 CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
                        rc, inode->i_ino);
@@ -1856,6 +1857,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
         spin_lock_init(&mds->mds_transno_lock);
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
         mds->mds_max_cookiesize = sizeof(struct llog_cookie);
+        mds->mds_atime_diff = MAX_ATIME_DIFF;
 
         sprintf(ns_name, "mds-%s", obd->obd_uuid.uuid);
         obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
@@ -2053,7 +2055,7 @@ err_cleanup:
 
 int mds_postrecov(struct obd_device *obd)
 {
-        int rc, item = 0;
+        int rc;
         ENTRY;
 
         if (obd->obd_fail)
@@ -2070,14 +2072,11 @@ int mds_postrecov(struct obd_device *obd)
                        obd->obd_name, rc);
                 GOTO(out, rc);
         }
-        
+
         /* clean PENDING dir */
         rc = mds_cleanup_pending(obd);
-        if (rc < 0) {
+        if (rc < 0)
                 GOTO(out, rc);
-        } else {
-                item = rc;
-        }
 
         /* FIXME Does target_finish_recovery really need this to block? */
         /* Notify the LOV, which will in turn call mds_notify for each tgt */
@@ -2091,7 +2090,7 @@ int mds_postrecov(struct obd_device *obd)
         lquota_recovery(quota_interface, obd);
 
 out:
-        RETURN(rc < 0 ? rc : item);
+        RETURN(rc);
 }
 
 /* We need to be able to stop an mds_lov_synchronize */
@@ -2545,7 +2544,6 @@ static int mdt_health_check(struct obd_device *obd)
         return rc;
 }
 
-
 static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
                                           void *data)
 {
@@ -2555,7 +2553,8 @@ static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
         fid.generation = gen;
         return mds_fid2dentry(&obd->u.mds, &fid, NULL);
 }
-static int mds_health_check(struct obd_device *obd) 
+
+static int mds_health_check(struct obd_device *obd)
 {
         struct obd_device_target *odt = &obd->u.obt;
         struct mds_obd *mds = &obd->u.mds;
@@ -2566,7 +2565,7 @@ static int mds_health_check(struct obd_device *obd)
 
         LASSERT(mds->mds_health_check_filp != NULL);
         rc |= !!lvfs_check_io_health(obd, mds->mds_health_check_filp);
-        
+
         return rc;
 }
 
index 137aa2c..92d351a 100644 (file)
@@ -341,6 +341,40 @@ static int lprocfs_mds_wr_itune(struct file *file, const char *buffer,
 }
 #endif
 
+static int lprocfs_wr_atime_diff(struct file *file, const char *buffer,
+                                 unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        struct mds_obd *mds = &obd->u.mds;
+        char kernbuf[20], *end;
+        unsigned long diff = 0;
+
+        if (count > (sizeof(kernbuf) - 1))
+                return -EINVAL;
+
+        if (copy_from_user(kernbuf, buffer, count))
+                return -EFAULT;
+
+        kernbuf[count] = '\0';
+
+        diff = simple_strtoul(kernbuf, &end, 0);
+        if (kernbuf == end)
+                return -EINVAL;
+
+        mds->mds_atime_diff = diff;
+        return count;
+}
+
+static int lprocfs_rd_atime_diff(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        struct mds_obd *mds = &obd->u.mds;
+
+        *eof = 1;
+        return snprintf(page, count, "%lu\n", mds->mds_atime_diff);
+}
+
 struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "uuid",            lprocfs_rd_uuid,        0, 0 },
         { "blocksize",       lprocfs_rd_blksize,     0, 0 },
@@ -368,6 +402,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
                              lprocfs_wr_group_upcall, 0},
         { "group_flush",     0, lprocfs_wr_group_flush, 0},
         { "group_info",      0, lprocfs_wr_group_info, 0 },
+        { "atime_diff",      lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
         { 0 }
 };
 
index 84cdc93..0ff7870 100644 (file)
@@ -251,7 +251,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
         } else {
                 rc = fsfilt_read_record(obd, file, lsd, sizeof(*lsd), &off);
                 if (rc) {
-                        CERROR("error reading MDS %s: rc %d\n", LAST_RCVD, rc);
+                        CERROR("error reading MDS %s: rc %d\n", LAST_RCVD, rc);
                         GOTO(err_msd, rc);
                 }
                 if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
@@ -288,6 +288,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
 
         mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno);
 
+        lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
         CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
                obd->obd_name, mds->mds_last_transno);
         CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
index 599287f..1a56542 100644 (file)
@@ -231,7 +231,7 @@ static void mds_finish_join(struct mds_obd *mds, struct ptlrpc_request *req,
                 mds->mds_max_cookiesize = body->max_cookiesize;
                 body->valid |= OBD_MD_FLMODEASIZE;
         }
-        
+
         if (body->valid & OBD_MD_FLMODEASIZE)
                 CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
                        mds->mds_max_mdsize, mds->mds_max_cookiesize);
@@ -265,7 +265,7 @@ static int mds_join_unlink_tail_inode(struct mds_update_record *rec,
 
         rc = mds_get_parents_children_locked(obd, mds, &join_rec->jr_fid,
                                              &de_tailparent, &head_fid,
-                                             &de_head, LCK_PW, rec->ur_name,
+                                             &de_head, LCK_EX, rec->ur_name,
                                              rec->ur_namelen, &de_tail,
                                              NULL, 0, NULL, dlm_handles,
                                              LCK_EX);
@@ -315,9 +315,9 @@ cleanup:
 
         if (dlm_handles[0].cookie != 0) {
                 if (rc)
-                        ldlm_lock_decref(&dlm_handles[0], LCK_PW);
+                        ldlm_lock_decref(&dlm_handles[0], LCK_EX);
                 else
-                        ptlrpc_save_lock(req, &dlm_handles[0], LCK_PW);
+                        ptlrpc_save_lock(req, &dlm_handles[0], LCK_EX);
         }
         if (de_tail)
                 l_dput(de_tail);
@@ -473,7 +473,7 @@ int mds_join_file(struct mds_update_record *rec, struct ptlrpc_request *req,
         CDEBUG(D_INODE, "join finish, set lmm V2 to inode %lu \n",
                head_inode->i_ino);
         fsfilt_set_md(obd, head_inode, handle, head_lmmj,
-                      sizeof(struct lov_mds_md_join));
+                      sizeof(struct lov_mds_md_join), "lov");
         mds_finish_join(mds, req, head_inode, head_lmmj);
 cleanup:
         rc = mds_finish_transno(mds, head_inode, handle, req, rc, 0);
index a75052f..42a0259 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/obd_lov.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_fsfilt.h>
+#include <linux/lustre_ver.h>
 
 #include "mds_internal.h"
 
@@ -306,6 +307,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct lustre_handle conn = {0,};
+        struct obd_connect_data *data;
         int rc, i;
         ENTRY;
 
@@ -322,8 +324,14 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
                 RETURN(-ENOTCONN);
         }
 
-        rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid,
-                         NULL /* obd_connect_data */);
+        OBD_ALLOC(data, sizeof(*data));
+        if (data == NULL)
+                RETURN(-ENOMEM);
+        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX;
+        data->ocd_version = LUSTRE_VERSION_CODE;
+        /* NB: lov_connect() needs to fill in .ocd_index for each OST */
+        rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid, data);
+        OBD_FREE(data, sizeof(*data));
         if (rc) {
                 CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc);
                 mds->mds_osc_obd = ERR_PTR(rc);
@@ -830,7 +838,7 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode,
                 GOTO(conv_free, rc);
         }
 
-        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size);
+        rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, "lov");
 
         err = fsfilt_commit(obd, inode, handle, 0);
         if (!rc)
index c75a94b..4bf67bc 100644 (file)
@@ -364,7 +364,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                 LASSERT(lmm_buf);
                 LASSERT(lmm_bufsize >= lmm_size);
                 memcpy(lmm_buf, lmm, lmm_size);
-                rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+                rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
                 if (rc)
                         CERROR("open replay failed to set md:%d\n", rc);
                 RETURN(0);
@@ -477,14 +477,12 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset,
                 GOTO(out_oa, rc);
         }
 
-        rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+        rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov");
         lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size);
-        if (!lmm_buf) {
-                CERROR("Can't allocate reply buffer size=%d\n", lmm_size);
+        if (!lmm_buf) 
                 rc = -ENOMEM;
-        } else {
+        else 
                 memcpy(lmm_buf, lmm, lmm_size);
-        }
         obd_free_diskmd(mds->mds_osc_exp, &lmm);
  out_oa:
         oti_free_cookies(&oti);
@@ -778,31 +776,25 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid,
 
         if (dchild->d_inode != NULL) {
                 mds_inode_set_orphan(dchild->d_inode);
-                mds_pack_inode2fid(&body->fid1, dchild->d_inode);
-                mds_pack_inode2body(body, dchild->d_inode);
-                intent_set_disposition(rep, DISP_LOOKUP_EXECD);
-                intent_set_disposition(rep, DISP_LOOKUP_POS);
                 CWARN("Orphan %s found and opened in PENDING directory\n",
                        fidname);
-                goto open;
-        }
-        l_dput(dchild);
+        } else {
+                l_dput(dchild);
 
-        /* We didn't find it in PENDING so it isn't an orphan.  See
-         * if it was a regular inode that was previously created. */
-        dchild = mds_fid2dentry(mds, fid, NULL);
-        if (IS_ERR(dchild))
-                RETURN(PTR_ERR(dchild));
+                /* We didn't find it in PENDING so it isn't an orphan.  See
+                 * if it was a regular inode that was previously created. */
+                dchild = mds_fid2dentry(mds, fid, NULL);
+                if (IS_ERR(dchild))
+                        RETURN(PTR_ERR(dchild));
+        }
 
         mds_pack_inode2fid(&body->fid1, dchild->d_inode);
         mds_pack_inode2body(body, dchild->d_inode);
         intent_set_disposition(rep, DISP_LOOKUP_EXECD);
         intent_set_disposition(rep, DISP_LOOKUP_POS);
 
- open:
-        rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep,
-                             NULL);
-        rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
+        rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL);
+        rc = mds_finish_transno(mds, dchild->d_inode, handle,
                                 req, rc, rep ? rep->lock_policy_res1 : 0);
         /* XXX what do we do here if mds_finish_transno itself failed? */
 
@@ -1301,7 +1293,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd,
                  * */
                 LTIME_S(iattr.ia_atime) = request_body->atime;
                 if ((LTIME_S(iattr.ia_atime) >
-                     LTIME_S(inode->i_atime) + MAX_ATIME_DIFF) ||
+                     LTIME_S(inode->i_atime) + mds->mds_atime_diff) ||
                     (iattr.ia_valid != 0 &&
                      LTIME_S(iattr.ia_atime) > LTIME_S(inode->i_atime)))
                         iattr.ia_valid |= ATTR_ATIME;
index 1e6d76c..6d62bf3 100644 (file)
@@ -115,8 +115,10 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
 
         /* if the export has already been failed, we have no last_rcvd slot */
         if (req->rq_export->exp_failed) {
-                CWARN("committing transaction for disconnected client %s\n",
-                      req->rq_export->exp_client_uuid.uuid);
+                CWARN("commit transaction for disconnected client %s: rc %d\n",
+                      req->rq_export->exp_client_uuid.uuid, rc);
+                if (rc == 0)
+                        rc = -ENOTCONN;
                 if (handle)
                         GOTO(commit, rc);
                 RETURN(rc);
@@ -139,7 +141,13 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
 
         transno = req->rq_reqmsg->transno;
         if (rc != 0) {
-                LASSERT(transno == 0);
+                if (transno != 0) {
+                        CERROR("%s: replay %s transno "LPU64" failed: rc %d\n",
+                               obd->obd_name,
+                               libcfs_nid2str(req->rq_export->exp_connection->c_peer.nid),
+                               transno, rc);
+                        transno = 0;
+                }
         } else if (transno == 0) {
                 spin_lock(&mds->mds_transno_lock);
                 transno = ++mds->mds_last_transno;
@@ -334,8 +342,7 @@ void mds_steal_ack_locks(struct ptlrpc_request *req)
                 ptlrpc_schedule_difficult_reply (oldrep);
 
                 spin_unlock (&svc->srv_lock);
-                spin_unlock_irqrestore (&exp->exp_lock, flags);
-                return;
+                break;
         }
         spin_unlock_irqrestore (&exp->exp_lock, flags);
 }
@@ -577,7 +584,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                       lum->lmm_stripe_count == 0) ||
                     /* lmm_stripe_size == -1 is deprecated in 1.4.6 */
                     lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1))){
-                        rc = fsfilt_set_md(obd, inode, handle, NULL, 0);
+                        rc = fsfilt_set_md(obd, inode, handle, NULL, 0, "lov");
                         if (rc)
                                 GOTO(cleanup, rc);
                 } else {
@@ -590,7 +597,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                         obd_free_memmd(mds->mds_osc_exp, &lsm);
 
                         rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata,
-                                           rec->ur_eadatalen);
+                                           rec->ur_eadatalen, "lov");
                         if (rc)
                                 GOTO(cleanup, rc);
                 }
@@ -865,7 +872,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                         if (rc > 0) {
                                 down(&inode->i_sem);
                                 rc = fsfilt_set_md(obd, inode, handle,
-                                                   &lmm, lmm_size);
+                                                   &lmm, lmm_size, "lov");
                                 up(&inode->i_sem);
                         }
                         if (rc)
index cf487cb..925967a 100644 (file)
@@ -228,13 +228,14 @@ int mds_cleanup_pending(struct obd_device *obd)
                         GOTO(err_out, rc = PTR_ERR(dchild));
                 }
                 if (!dchild->d_inode) {
-                        CERROR("orphan %s has been removed\n", d_name);
+                        CWARN("%s: orphan %s has already been removed\n",
+                              obd->obd_name, d_name);
                         GOTO(next, rc = 0);
                 }
 
                 if (is_bad_inode(dchild->d_inode)) {
-                        CERROR("bad orphan inode found %lu/%u\n",
-                               dchild->d_inode->i_ino,
+                        CERROR("%s: bad orphan inode found %lu/%u\n",
+                               obd->obd_name, dchild->d_inode->i_ino,
                                dchild->d_inode->i_generation);
                         GOTO(next, rc = -ENOENT);
                 }
@@ -244,7 +245,8 @@ int mds_cleanup_pending(struct obd_device *obd)
                 if (mds_inode_is_orphan(child_inode) &&
                     mds_orphan_open_count(child_inode)) {
                         MDS_UP_READ_ORPHAN_SEM(child_inode);
-                        CWARN("orphan %s re-opened during recovery\n", d_name);
+                        CWARN("%s: orphan %s re-opened during recovery\n",
+                              obd->obd_name, d_name);
                         GOTO(next, rc = 0);
                 }
                 MDS_UP_READ_ORPHAN_SEM(child_inode);
@@ -252,16 +254,18 @@ int mds_cleanup_pending(struct obd_device *obd)
                 rc = mds_unlink_orphan(obd, dchild, child_inode, pending_dir);
                 if (rc == 0) {
                         item ++;
-                        CWARN("removed orphan %s from MDS and OST\n", d_name);
+                        CDEBUG(D_HA, "%s: removed orphan %s\n",
+                               obd->obd_name, d_name);
                 } else {
-                        CDEBUG(D_INODE, "removed orphan %s from MDS/OST failed,"
-                               " rc = %d\n", d_name, rc);
+                        CDEBUG(D_INODE, "%s: removed orphan %s failed,"
+                               " rc = %d\n", obd->obd_name, d_name, rc);
                         rc = 0;
                 }
 next:
                 l_dput(dchild);
                 up(&pending_dir->i_sem);
         }
+        rc = 0;
 err_out:
         list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
                 list_del(&dirent->lld_list);
@@ -269,8 +273,9 @@ err_out:
         }
 err_pop:
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-        if (rc == 0)
-                rc = item;
+        if (item > 0)
+                CWARN("%s: removed %d pending open-unlinked files\n",
+                      obd->obd_name, item);
         RETURN(rc);
 
 err_mntget:
index de49e60..4ff5e2c 100644 (file)
@@ -2,10 +2,10 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
  *  lustre/mgc/mgc_request.c
- *  Lustre Management Client config llog handling
+ *  Lustre Management Client
  *
  *  Copyright (C) 2006 Cluster File Systems, Inc.
- *   Author Nathan Rutman <nathan@clusterfs.com>
+ *   Author: Nathan Rutman <nathan@clusterfs.com>
  *
  *   This file is part of Lustre, http://www.lustre.org
  *
  *   along with Lustre; if not, write to the Free Software
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- *  For testing and management it is treated as an obd_device,
- *  although * it does not export a full OBD method table (the
- *  requests are coming * in over the wire, so object target modules
- *  do not have a full * method table.)
  */
  
 #ifndef EXPORT_SYMTAB
index 9b3b8cc..4024823 100644 (file)
@@ -151,24 +151,24 @@ int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id)
         ne_off = le64_to_cpu (off);
         id = le64_to_cpu (id);
         if (memcmp(addr, (char *)&ne_off, LPDS)) {
-                CERROR("%s: id "LPX64" offset "LPU64" off: "LPX64" != "
+                CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" off: "LPX64" != "
                        LPX64"\n", who, id, off, *(__u64 *)addr, ne_off);
                 err = -EINVAL;
         }
         if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
-                CERROR("%s: id "LPX64" offset "LPU64" id: "LPX64" != "LPX64"\n",
+                CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" id: "LPX64" != "LPX64"\n",
                        who, id, off, *(__u64 *)(addr + LPDS), id);
                 err = -EINVAL;
         }
 
         addr += end - LPDS - LPDS;
         if (memcmp(addr, (char *)&ne_off, LPDS)) {
-                CERROR("%s: id "LPX64" offset "LPU64" end off: "LPX64" != "
+                CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" end off: "LPX64" != "
                        LPX64"\n", who, id, off, *(__u64 *)addr, ne_off);
                 err = -EINVAL;
         }
         if (memcmp(addr + LPDS, (char *)&id, LPDS)) {
-                CERROR("%s: id "LPX64" offset "LPU64" end id: "LPX64" != "
+                CDEBUG(D_ERROR, "%s: id "LPX64" offset "LPU64" end id: "LPX64" != "
                        LPX64"\n", who, id, off, *(__u64 *)(addr + LPDS), id);
                 err = -EINVAL;
         }
index 52c1f01..a624183 100644 (file)
@@ -1247,8 +1247,8 @@ search_again:
         list_for_each(p, &obd->obd_exports) {
                 doomed_exp[num_to_evict] = list_entry(p, struct obd_export,
                                                       exp_obd_chain);
-                if (strcmp(obd_export_nid2str(doomed_exp[num_to_evict]), nid)
-                    == 0) {
+                if (strcmp(obd_export_nid2str(doomed_exp[num_to_evict]),
+                           nid) == 0) {
                         class_export_get(doomed_exp[num_to_evict]);
                         if (++num_to_evict == EVICT_BATCH)
                                 break;
@@ -1270,7 +1270,7 @@ search_again:
         }
 
         if (!exports_evicted)
-                CERROR("%s: can't disconnect NID '%s': no exports found\n",
+                CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n",
                        obd->obd_name, nid);
         return exports_evicted;
 }
index 91cdfb5..18e8c00 100644 (file)
@@ -342,7 +342,6 @@ int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
         struct llog_process_cat_data *cd = catdata;
         void *buf;
         int rc = 0, first_index = 1, index, idx;
-        struct llog_rec_tail *tail;
         ENTRY;
 
         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
@@ -358,6 +357,7 @@ int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
 
         while (rc == 0) {
                 struct llog_rec_hdr *rec;
+                struct llog_rec_tail *tail;
 
                 /* skip records not set in bitmap */
                 while (index >= first_index &&
@@ -382,9 +382,13 @@ int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
                         rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
                         idx ++;
                 }
+                tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
 
                 /* process records in buffer, starting where we found one */
-                while ((void *)rec >= buf) {
+                while ((void *)tail > buf) {
+                        rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
+                                sizeof(*tail);
+
                         if (rec->lrh_index == 0)
                                 GOTO(out, 0); /* no more records */
 
@@ -406,8 +410,7 @@ int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
                         --index;
                         if (index < first_index)
                                 GOTO(out, rc = 0);
-                        tail = (void *)rec - sizeof(struct llog_rec_tail);
-                        rec = ((void *)rec - le32_to_cpu(tail->lrt_len));
+                        tail = (void *)rec - sizeof(*tail);
                 }
         }
 
index b8a21d0..1b7a6ed 100644 (file)
@@ -390,7 +390,7 @@ static int llog_test_5(struct obd_device *obd)
         llog_init_handle(llh, LLOG_F_IS_CAT, &uuid);
 
         CWARN("5b: print the catalog entries.. we expect 2\n");
-        rc = llog_process(llh, (llog_cb_t)cat_print_cb, "test 5", NULL);
+        rc = llog_process(llh, cat_print_cb, "test 5", NULL);
         if (rc) {
                 CERROR("5b: process with cat_print_cb failed: %d\n", rc);
                 GOTO(out, rc);
@@ -412,7 +412,7 @@ static int llog_test_5(struct obd_device *obd)
         }
 
         CWARN("5b: print the catalog entries.. we expect 1\n");
-        rc = llog_process(llh, (llog_cb_t)cat_print_cb, "test 5", NULL);
+        rc = llog_process(llh, cat_print_cb, "test 5", NULL);
         if (rc) {
                 CERROR("5b: process with cat_print_cb failed: %d\n", rc);
                 GOTO(out, rc);
@@ -482,11 +482,11 @@ static int llog_test_6(struct obd_device *obd, char *name)
                 GOTO(parse_out, rc);
         }
 
-        rc = llog_process(llh, (llog_cb_t)plain_print_cb, NULL, NULL);
+        rc = llog_process(llh, plain_print_cb, NULL, NULL);
         if (rc)
                 CERROR("6: llog_process failed %d\n", rc);
 
-        rc = llog_reverse_process(llh, (llog_cb_t)plain_print_cb, NULL, NULL);
+        rc = llog_reverse_process(llh, plain_print_cb, NULL, NULL);
         if (rc)
                 CERROR("6: llog_reverse_process failed %d\n", rc);
 
index 1768612..025e188 100644 (file)
@@ -694,6 +694,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, merge_lvb);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
index 716368f..40af90a 100644 (file)
@@ -90,7 +90,6 @@ int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index)
 int class_add_uuid(char *uuid, __u64 nid)
 {
         struct uuid_nid_data *data;
-        int rc;
         int nob = strnlen (uuid, PAGE_SIZE) + 1;
 
         LASSERT(nid != 0);  /* valid newconfig NID is never zero */
@@ -98,7 +97,6 @@ int class_add_uuid(char *uuid, __u64 nid)
         if (nob > PAGE_SIZE)
                 return -EINVAL;
 
-        rc = -ENOMEM;
         OBD_ALLOC(data, sizeof(*data));
         if (data == NULL)
                 return -ENOMEM;
index 31775f4..909e311 100644 (file)
@@ -53,6 +53,12 @@ unsigned int ll_rand(void)
 }
 EXPORT_SYMBOL(ll_rand);
 
+/* Note that if the input seeds are not completely random, then there is
+ * a preferred location for the entropy in the two seeds, in order to avoid
+ * the initial values from the PRNG to be the same each time.
+ *
+ * seed1 (seed_x) should have the most entropy in the low bits of the word
+ * seed2 (seed_y) should have the most entropy in the high bits of the word */
 void ll_srand(unsigned int seed1, unsigned int seed2)
 {
        if (seed1)
index 9eed26e..503633b 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/obd_support.h>
 #include <linux/obd_class.h>
 #include <linux/obd_echo.h>
+#include <linux/lustre_ver.h>
 #include <linux/lustre_debug.h>
 #include <linux/lprocfs_status.h>
 
@@ -515,11 +516,11 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
         gfp_mask = ((oa->o_id & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
 
         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
+        LASSERT(lsm != NULL);
+        LASSERT(lsm->lsm_object_id == oa->o_id);
 
         if (count <= 0 ||
-            (count & (PAGE_SIZE - 1)) != 0 ||
-            (lsm != NULL &&
-             lsm->lsm_object_id != oa->o_id))
+            (count & (PAGE_SIZE - 1)) != 0)
                 return (-EINVAL);
 
         /* XXX think again with misaligned I/O */
@@ -1328,6 +1329,7 @@ echo_client_setup(struct obd_device *obddev, obd_count len, void *buf)
         struct obd_device *tgt;
         struct lustre_handle conn = {0, };
         struct obd_uuid echo_uuid = { "ECHO_UUID" };
+        struct obd_connect_data *ocd = NULL;
         int rc;
         ENTRY;
 
@@ -1347,8 +1349,20 @@ echo_client_setup(struct obd_device *obddev, obd_count len, void *buf)
         INIT_LIST_HEAD (&ec->ec_objects);
         ec->ec_unique = 0;
 
-        rc = obd_connect(&conn, tgt, &echo_uuid, NULL /* obd_connect_data */);
-        if (rc) {
+        OBD_ALLOC(ocd, sizeof(*ocd));
+        if (ocd == NULL) {
+                CERROR("Can't alloc ocd connecting to %s\n",
+                       lustre_cfg_string(lcfg, 1));
+                return -ENOMEM;
+        }
+        
+        ocd->ocd_version = LUSTRE_VERSION_CODE;
+
+        rc = obd_connect(&conn, tgt, &echo_uuid, ocd);
+
+        OBD_FREE(ocd, sizeof(*ocd));
+
+        if (rc != 0) {
                 CERROR("fail to connect to device %s\n",
                        lustre_cfg_string(lcfg, 1));
                 return (rc);
index b59343b..d04cb91 100644 (file)
@@ -801,7 +801,7 @@ static int filter_prep(struct obd_device *obd)
         if (IS_ERR(file)) {
                 rc = PTR_ERR(file);
                 CERROR("OBD filter: cannot open/create %s rc = %d\n",
-                        HEALTH_CHECK, rc);
+                       HEALTH_CHECK, rc);
                 GOTO(err_filp, rc);
         }
         filter->fo_health_check_filp = file;
@@ -1734,32 +1734,59 @@ static int filter_cleanup(struct obd_device *obd)
 static int filter_connect_internal(struct obd_export *exp,
                                    struct obd_connect_data *data)
 {
-        if (data != NULL) {
-                CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64
-                       " ocd_version: %x ocd_grant: %d\n",
-                       exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,
-                       data->ocd_connect_flags, data->ocd_version,
-                       data->ocd_grant);
-
-                data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
-                exp->exp_connect_flags = data->ocd_connect_flags;
-                data->ocd_version = LUSTRE_VERSION_CODE;
-
-                if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
-                        obd_size left, want;
-
-                        spin_lock(&exp->exp_obd->obd_osfs_lock);
-                        left = filter_grant_space_left(exp);
-                        want = data->ocd_grant;
-                        data->ocd_grant = filter_grant(exp, 0, want, left);
-                        spin_unlock(&exp->exp_obd->obd_osfs_lock);
-
-                        CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: "
-                               "%lld left: %lld\n", exp->exp_obd->obd_name,
-                               exp->exp_client_uuid.uuid, exp,
-                               data->ocd_grant, want, left);
+        if (!data) 
+                RETURN(0);
+        
+        CDEBUG(D_RPCTRACE, "%s: cli %s/%p ocd_connect_flags: "LPX64
+               " ocd_version: %x ocd_grant: %d\n",
+               exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp,
+               data->ocd_connect_flags, data->ocd_version,
+               data->ocd_grant);
+
+        data->ocd_connect_flags &= OST_CONNECT_SUPPORTED;
+        exp->exp_connect_flags = data->ocd_connect_flags;
+        data->ocd_version = LUSTRE_VERSION_CODE;
+
+        if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
+                obd_size left, want;
+
+                spin_lock(&exp->exp_obd->obd_osfs_lock);
+                left = filter_grant_space_left(exp);
+                want = data->ocd_grant;
+                data->ocd_grant = filter_grant(exp, 0, want, left);
+                spin_unlock(&exp->exp_obd->obd_osfs_lock);
+
+                CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: "
+                       "%lld left: %lld\n", exp->exp_obd->obd_name,
+                       exp->exp_client_uuid.uuid, exp,
+                       data->ocd_grant, want, left);
+        }
+
+        if (data->ocd_connect_flags & OBD_CONNECT_INDEX) {
+                struct filter_obd *filter = &exp->exp_obd->u.filter;
+                struct lr_server_data *lsd = filter->fo_fsd;
+                int index = le32_to_cpu(lsd->lsd_ost_index);
+                
+                if (!(lsd->lsd_feature_compat &
+                      cpu_to_le32(OBD_COMPAT_OST))) {
+                        /* this will only happen on the first connect */
+                        lsd->lsd_ost_index = le32_to_cpu(data->ocd_index);
+                        lsd->lsd_feature_compat |= cpu_to_le32(OBD_COMPAT_OST);
+                        filter_update_server_data(exp->exp_obd, 
+                                                  filter->fo_rcvd_filp, lsd, 1);
+                } else if (index != data->ocd_index) {
+                        LCONSOLE_ERROR("Connection from %s to index "
+                                       "%u doesn't match actual OST "
+                                       "index %u, bad configuration?\n",
+                                       obd_export_nid2str(exp), index, 
+                                       data->ocd_index);
+                        RETURN(-EBADF);
                 }
         }
+        /* FIXME: Do the same with the MDS UUID and fsd_peeruuid.
+         * FIXME: We don't strictly need the COMPAT flag for that,
+         * FIXME: as fsd_peeruuid[0] will tell us if that is set.
+         * FIXME: We needed it for the index, as index 0 is valid. */
 
         RETURN(0);
 }
@@ -1781,7 +1808,8 @@ static int filter_reconnect(struct obd_export *exp, struct obd_device *obd,
 
 /* nearly identical to mds_connect */
 static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
-                          struct obd_uuid *cluuid, struct obd_connect_data *data)
+                          struct obd_uuid *cluuid,
+                          struct obd_connect_data *data)
 {
         struct obd_export *exp;
         struct filter_export_data *fed;
@@ -1932,6 +1960,7 @@ static void filter_grant_discard(struct obd_export *exp)
                  "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n",
                  obd->obd_name, filter->fo_tot_pending,
                  exp->exp_client_uuid.uuid, exp, fed->fed_pending);
+        /* fo_tot_pending is handled in filter_grant_commit as bulk finishes */
         LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty,
                  "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n",
                  obd->obd_name, filter->fo_tot_dirty,
@@ -2081,7 +2110,7 @@ int filter_update_fidea(struct obd_export *exp, struct inode *inode,
                        LPU64"/"LPU64")\n", oa->o_fid, oa->o_stripe_idx,
                        oa->o_generation, oa->o_id, group);
 
-                rc = fsfilt_set_md(obd, inode, handle, &ff, sizeof(ff));
+                rc = fsfilt_set_md(obd, inode, handle, &ff, sizeof(ff), "fid");
                 if (rc)
                         CERROR("store fid in object failed! rc: %d\n", rc);
         } else {
@@ -2188,7 +2217,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
         }
 
         rc = filter_finish_transno(exp, oti, rc);
-        
+
         err = fsfilt_commit(exp->exp_obd, inode, handle, 0);
         if (err) {
                 CERROR("error on commit, err = %d\n", err);
@@ -2228,7 +2257,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa,
                                     __FUNCTION__, 1);
         if (IS_ERR(dentry))
                 RETURN(PTR_ERR(dentry));
-                        
+
         filter = &exp->exp_obd->u.filter;
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
         lock_kernel();
@@ -2240,7 +2269,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa,
 
         res = ldlm_resource_get(exp->exp_obd->obd_namespace, NULL,
                                 res_id, LDLM_EXTENT, 0);
-        
+
         if (res != NULL) {
                 ns_lvbo = res->lr_namespace->ns_lvbo;
                 if (ns_lvbo && ns_lvbo->lvbo_update)
@@ -2249,7 +2278,7 @@ int filter_setattr(struct obd_export *exp, struct obdo *oa,
         }
 
         oa->o_valid = OBD_MD_FLID;
-        
+
         /* Quota release need uid/gid info */
         obdo_from_inode(oa, dentry->d_inode,
                         FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
@@ -2454,7 +2483,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
         struct filter_obd *filter;
         struct obd_statfs *osfs;
         int err = 0, rc = 0, recreate_obj = 0, i;
-        unsigned long enough_time = jiffies + (obd_timeout * HZ) / 3;
+        unsigned long enough_time = jiffies + (obd_timeout * HZ) / 4;
         __u64 next_id;
         void *handle = NULL;
         ENTRY;
@@ -2471,8 +2500,9 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 rc = filter_statfs(obd, osfs, jiffies - HZ);
                 if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
                         CDEBUG(D_HA, "OST out of space! avail "LPU64"\n",
-                              osfs->os_bavail<<filter->fo_obt.obt_sb->s_blocksize_bits);
-                        *num=0;
+                               osfs->os_bavail <<
+                                       filter->fo_obt.obt_sb->s_blocksize_bits);
+                        *num = 0;
                         rc = -ENOSPC;
                 }
                 OBD_FREE(osfs, sizeof(*osfs));
@@ -2481,7 +2511,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 }
         }
 
-        CDEBUG(D_HA, "%s: precreating %d objects\n", obd->obd_name, *num);
+        CDEBUG(D_HA, "%s: precreating %d objects in group "LPU64" at "LPU64"\n",
+               obd->obd_name, *num, group, oa->o_id);
 
         down(&filter->fo_create_lock);
 
@@ -2544,6 +2575,9 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                         GOTO(cleanup, rc = PTR_ERR(handle));
                 cleanup_phase = 3;
 
+                /* We mark object SUID+SGID to flag it for accepting UID+GID
+                 * from client on first write.  Currently the permission bits
+                 * on the OST are never used, so this is OK. */
                 rc = ll_vfs_create(dparent->d_inode, dchild,
                                    S_IFREG |  S_ISUID | S_ISGID | 0666, NULL);
                 if (rc) {
@@ -2579,7 +2613,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 if (rc)
                         break;
                 if (time_after(jiffies, enough_time)) {
-                        CDEBUG(D_INODE,"%s: precreate slow - want %d got %d \n",
+                        CDEBUG(D_HA, "%s: precreate slow - want %d got %d \n",
                                obd->obd_name, *num, i);
                         break;
                 }
@@ -2588,11 +2622,9 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
 
         up(&filter->fo_create_lock);
 
-        CDEBUG(D_HA, "%s: server last_objid for group "LPU64": "LPU64"\n",
-               obd->obd_name, group, filter->fo_last_objids[group]);
+        CDEBUG(D_HA, "%s: created %d objects for group "LPU64": "LPU64"\n",
+               obd->obd_name, i, group, filter->fo_last_objids[group]);
 
-        CDEBUG(D_HA, "%s: filter_precreate() created %d objects\n",
-               obd->obd_name, i);
         RETURN(rc);
 }
 
index b55e889..cf02053 100644 (file)
@@ -13,8 +13,6 @@
 #include <linux/obd.h>
 #include <linux/lustre_disk.h>
 
-#define FILTER_LAYOUT_VERSION "2"
-
 #define HEALTH_CHECK "health_check"
 #define FILTER_INIT_OBJID 0
 
@@ -39,9 +37,6 @@ struct filter_client_data {
         __u8  fcd_padding[LR_CLIENT_SIZE - 56];
 };
 
-#define FILTER_DENTRY_MAGIC 0x9efba101
-#define FILTER_FLAG_DESTROY 0x0001      /* destroy dentry on last file close */
-
 /* Limit the returned fields marked valid to those that we actually might set */
 #define FILTER_VALID_FLAGS (OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLGENER  |\
                             OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ|\
index 03c318e..ed768f7 100644 (file)
@@ -115,10 +115,14 @@ static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa)
         /* Update our accounting now so that statfs takes it into account.
          * Note that fed_dirty is only approximate and can become incorrect
          * if RPCs arrive out-of-order.  No important calculations depend
-         * on fed_dirty however. */
+         * on fed_dirty however, but we must check sanity to not assert. */
+        if ((long long)oa->o_dirty < 0)
+                oa->o_dirty = 0;
+        else if (oa->o_dirty > fed->fed_grant + 4 * FILTER_GRANT_CHUNK)
+                oa->o_dirty = fed->fed_grant + 4 * FILTER_GRANT_CHUNK;
         obd->u.filter.fo_tot_dirty += oa->o_dirty - fed->fed_dirty;
         if (fed->fed_grant < oa->o_dropped) {
-                CERROR("%s: cli %s/%p reports %u dropped > fed_grant %lu\n",
+                CDEBUG(D_HA,"%s: cli %s/%p reports %u dropped > fedgrant %lu\n",
                        obd->obd_name, exp->exp_client_uuid.uuid, exp,
                        oa->o_dropped, fed->fed_grant);
                 oa->o_dropped = 0;
@@ -694,8 +698,23 @@ static int filter_commitrw_read(struct obd_export *exp, struct obdo *oa,
                                 struct obd_trans_info *oti, int rc)
 {
         struct inode *inode = NULL;
+        struct ldlm_res_id res_id = { .name = { obj->ioo_id } };
+        struct ldlm_resource *resource = NULL;
+        struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
         ENTRY;
 
+        /* If oa != NULL then filter_preprw_read updated the inode atime
+         * and we should update the lvb so that other glimpses will also
+         * get the updated value. bug 5972 */
+        if (oa && ns && ns->ns_lvbo && ns->ns_lvbo->lvbo_update) {
+                resource = ldlm_resource_get(ns, NULL, res_id, LDLM_EXTENT, 0);
+
+                if (resource != NULL) {
+                        ns->ns_lvbo->lvbo_update(resource, NULL, 0, 1);
+                        ldlm_resource_putref(resource);
+                }
+        }
+
         if (res->dentry != NULL)
                 inode = res->dentry->d_inode;
 
index d972fe2..6a48bc6 100644 (file)
@@ -44,7 +44,6 @@
 #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
 struct filter_iobuf {
         atomic_t          dr_numreqs;  /* number of reqs being processed */
-        struct bio       *dr_bios;     /* list of completed bios */
         wait_queue_head_t dr_wait;
         int               dr_max_pages;
         int               dr_npages;
@@ -139,8 +138,6 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
         }
 
         spin_lock_irqsave(&iobuf->dr_lock, flags);
-        bio->bi_private = iobuf->dr_bios;
-        iobuf->dr_bios = bio;
         if (iobuf->dr_error == 0)
                 iobuf->dr_error = error;
         spin_unlock_irqrestore(&iobuf->dr_lock, flags);
@@ -148,6 +145,12 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
         record_finish_io(iobuf, test_bit(BIO_RW, &bio->bi_rw) ?
                          OBD_BRW_WRITE : OBD_BRW_READ, error);
 
+        /* Completed bios used to be chained off iobuf->dr_bios and freed in
+         * filter_clear_dreq().  It was then possible to exhaust the biovec-256
+         * mempool when serious on-disk fragmentation was encountered,
+         * deadlocking the OST.  The bios are now released as soon as complete
+         * so the pool cannot be exhausted while IOs are competing. bug 10076 */
+        bio_put(bio);
         return 0;
 }
 
@@ -183,7 +186,6 @@ struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *filter,
                 goto failed_2;
 
         iobuf->dr_filter = filter;
-        iobuf->dr_bios = NULL;
         init_waitqueue_head(&iobuf->dr_wait);
         atomic_set(&iobuf->dr_numreqs, 0);
         spin_lock_init(&iobuf->dr_lock);
@@ -203,12 +205,6 @@ struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *filter,
 
 static void filter_clear_iobuf(struct filter_iobuf *iobuf)
 {
-        /* free all bios */
-        while (iobuf->dr_bios) {
-                struct bio *bio = iobuf->dr_bios;
-                iobuf->dr_bios = bio->bi_private;
-                bio_put(bio);
-        }
         iobuf->dr_npages = 0;
         atomic_set(&iobuf->dr_numreqs, 0);
 }
@@ -449,7 +445,7 @@ int filter_direct_io(int rw, struct dentry *dchild, struct filter_iobuf *iobuf,
                 LASSERT(iobuf->dr_npages > 0);
                 create = 1;
                 sem = &obd->u.filter.fo_alloc_lock;
-                
+
                 lquota_enforce(quota_interface, obd, iobuf->dr_ignore_quota);
         }
 remap:
@@ -626,9 +622,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         if (iattr.ia_valid & (ATTR_UID | ATTR_GID)) {
                 CDEBUG(D_INODE, "update UID/GID to %lu/%lu\n",
                        (unsigned long)oa->o_uid, (unsigned long)oa->o_gid);
-                
+
                 cap_raise(current->cap_effective, CAP_SYS_RESOURCE);
-                
+
                 iattr.ia_valid |= ATTR_MODE;
                 iattr.ia_mode = inode->i_mode;
                 if (iattr.ia_valid & ATTR_UID)
index ce4d5b3..066f8dc 100644 (file)
@@ -80,9 +80,7 @@ static int filter_lvbo_init(struct ldlm_resource *res)
         if (dentry->d_inode == NULL)
                 GOTO(out_dentry, rc = -ENOENT);
 
-        lvb->lvb_size = dentry->d_inode->i_size;
-        lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
-        lvb->lvb_blocks = dentry->d_inode->i_blocks;
+        inode_init_lvb(dentry->d_inode, lvb);
 
         CDEBUG(D_DLMTRACE, "res: "LPU64" initial lvb size: "LPU64", "
                "mtime: "LPU64", blocks: "LPU64"\n",
index 361bc77..0f6d847 100644 (file)
@@ -1169,6 +1169,8 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                    struct lov_stripe_md *md, obd_count page_count,
                    struct brw_page *pga, struct obd_trans_info *oti)
 {
+        struct obdo *saved_oa = NULL;
+        int          rc;
         ENTRY;
 
         if (cmd & OBD_BRW_CHECK) {
@@ -1181,9 +1183,10 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                 RETURN(0);
         }
 
+        rc = 0;
+
         while (page_count) {
                 obd_count pages_per_brw;
-                int rc;
 
                 if (page_count > PTLRPC_MAX_BRW_PAGES)
                         pages_per_brw = PTLRPC_MAX_BRW_PAGES;
@@ -1193,15 +1196,32 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                 sort_brw_pages(pga, pages_per_brw);
                 pages_per_brw = max_unfragmented_pages(pga, pages_per_brw);
 
+                if (saved_oa != NULL) {
+                        /* restore previously saved oa */
+                        *oa = *saved_oa;
+                } else if (page_count > pages_per_brw) {
+                        /* save a copy of oa (brw will clobber it) */
+                        OBD_ALLOC(saved_oa, sizeof(*saved_oa));
+                        if (saved_oa == NULL) {
+                                CERROR("Can't save oa (ENOMEM)\n");
+                                RETURN(-ENOMEM);
+                        }
+                        *saved_oa = *oa;
+                }
+                
                 rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga);
 
                 if (rc != 0)
-                        RETURN(rc);
+                        break;
 
                 page_count -= pages_per_brw;
                 pga += pages_per_brw;
         }
-        RETURN(0);
+
+        if (saved_oa != NULL)
+                OBD_FREE(saved_oa, sizeof(*saved_oa));
+
+        RETURN(rc);
 }
 
 static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
@@ -1419,9 +1439,13 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
 
         if (rc == 0 && oa != NULL) {
                 if (oa->o_valid & OBD_MD_FLBLOCKS)
-                        oap->oap_loi->loi_blocks = oa->o_blocks;
+                        oap->oap_loi->loi_lvb.lvb_blocks = oa->o_blocks;
                 if (oa->o_valid & OBD_MD_FLMTIME)
-                        oap->oap_loi->loi_mtime = oa->o_mtime;
+                        oap->oap_loi->loi_lvb.lvb_mtime = oa->o_mtime;
+                if (oa->o_valid & OBD_MD_FLATIME)
+                        oap->oap_loi->loi_lvb.lvb_atime = oa->o_atime;
+                if (oa->o_valid & OBD_MD_FLCTIME)
+                        oap->oap_loi->loi_lvb.lvb_ctime = oa->o_ctime;
         }
 
         if (oap->oap_oig) {
@@ -2734,9 +2758,7 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
         if ((*flags & LDLM_FL_HAS_INTENT && rc == ELDLM_LOCK_ABORTED) || !rc) {
                 CDEBUG(D_INODE,"got kms "LPU64" blocks "LPU64" mtime "LPU64"\n",
                        lvb.lvb_size, lvb.lvb_blocks, lvb.lvb_mtime);
-                lsm->lsm_oinfo->loi_rss = lvb.lvb_size;
-                lsm->lsm_oinfo->loi_mtime = lvb.lvb_mtime;
-                lsm->lsm_oinfo->loi_blocks = lvb.lvb_blocks;
+                lsm->lsm_oinfo->loi_lvb = lvb;
         }
 
         RETURN(rc);
index 8e21102..703578b 100644 (file)
@@ -767,7 +767,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 target_committed_to_req(req);
                 ptlrpc_reply(req);
         } else if (!comms_error) {
-                /* only reply if comms OK */
+                /* Only reply if there was no comms problem with bulk */
                 target_committed_to_req(req);
                 req->rq_status = rc;
                 ptlrpc_error(req);
@@ -778,8 +778,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         req->rq_reply_state = NULL;
                 }
                 if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
-                        CERROR("bulk IO comms error: "
-                               "evicting %s@%s id %s\n",
+                        CERROR("%s: bulk IO comm error evicting %s@%s id %s\n",
+                               req->rq_export->exp_obd->obd_name,
                                req->rq_export->exp_client_uuid.uuid,
                                req->rq_export->exp_connection->c_remote_uuid.uuid,
                                libcfs_id2str(req->rq_peer));
index f64addf..091eba1 100644 (file)
@@ -806,8 +806,6 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
         }
 
         if (imp->imp_state == LUSTRE_IMP_RECOVER) {
-                char   *nidstr;
-
                 CDEBUG(D_HA, "reconnected to %s@%s\n",
                        imp->imp_target_uuid.uuid,
                        imp->imp_connection->c_remote_uuid.uuid);
@@ -820,15 +818,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 
                 deuuidify(imp->imp_target_uuid.uuid, NULL,
                           &target_start, &target_len);
-                nidstr = libcfs_nid2str(imp->imp_connection->c_peer.nid);
-
-                LCONSOLE_INFO("Connection restored to service %.*s using nid "
-                              "%s.\n", target_len, target_start, nidstr);
-
-                CWARN("%s: connection restored to %s@%s\n",
-                      imp->imp_obd->obd_name,
-                      imp->imp_target_uuid.uuid,
-                      imp->imp_connection->c_remote_uuid.uuid);
+                LCONSOLE_INFO("%s: Connection restored to service %.*s "
+                              "using nid %s.\n", imp->imp_obd->obd_name,
+                              target_len, target_start,
+                              libcfs_nid2str(imp->imp_connection->c_peer.nid));
         }
 
         if (imp->imp_state == LUSTRE_IMP_FULL) {
index cfac594..2752ef4 100644 (file)
@@ -339,11 +339,15 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult)
         req->rq_repmsg->status = req->rq_status;
         req->rq_repmsg->opc    = req->rq_reqmsg->opc;
 
-        if (req->rq_export == NULL
+        if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
                 conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
         else
                 conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
 
+        if (conn == NULL) {
+                CERROR("not replying on NULL connection\n"); /* bug 9635 */
+                return -ENOTCONN;
+        }
         atomic_inc (&svc->srv_outstanding_replies);
         ptlrpc_rs_addref(rs);                   /* +1 ref for the network */
 
index 2aaef4b..7807fef 100644 (file)
@@ -68,7 +68,8 @@ int ptlrpc_ping(struct obd_import *imp)
 static void ptlrpc_update_next_ping(struct obd_import *imp)
 {
         imp->imp_next_ping = jiffies + HZ *
-                (imp->imp_state == LUSTRE_IMP_DISCON ? 10 : PING_INTERVAL);
+                (imp->imp_state == LUSTRE_IMP_DISCON ? RECONNECT_INTERVAL :
+                                                       PING_INTERVAL);
 }
 
 void ptlrpc_ping_import_soon(struct obd_import *imp)
@@ -319,6 +320,7 @@ void ptlrpc_pinger_wake_up()
  * the current implementation of pinger in liblustre is not optimized
  */
 
+#ifdef ENABLE_PINGER
 static struct pinger_data {
         int             pd_recursion;
         unsigned long   pd_this_ping;   /* jiffies */
@@ -464,13 +466,14 @@ out:
 }
 
 static void *pinger_callback = NULL;
+#endif /* ENABLE_PINGER */
 
 int ptlrpc_start_pinger(void)
 {
-        memset(&pinger_args, 0, sizeof(pinger_args));
 #ifdef ENABLE_PINGER
-        pinger_callback =
-                liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args);
+        memset(&pinger_args, 0, sizeof(pinger_args));
+        pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs,
+                                                           &pinger_args);
 #endif
         return 0;
 }
@@ -486,6 +489,7 @@ int ptlrpc_stop_pinger(void)
 
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
+#ifdef ENABLE_PINGER
         down(&pinger_sem);
         ptlrpc_update_next_ping(imp);
         if (pinger_args.pd_set == NULL &&
@@ -495,6 +499,7 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
                 pinger_args.pd_next_ping = imp->imp_next_ping;
         }
         up(&pinger_sem);
+#endif
 }
 
 int ptlrpc_pinger_add_import(struct obd_import *imp)
index bd0843f..8513641 100644 (file)
@@ -271,7 +271,7 @@ void ptlrpc_wake_delayed(struct obd_import *imp)
 
 void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req)
 {
-        struct obd_import *imp= failed_req->rq_import;
+        struct obd_import *imp = failed_req->rq_import;
         unsigned long flags;
         ENTRY;
 
index b30dfc0..bd78c00 100644 (file)
@@ -120,7 +120,6 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
 
                 /* Initialize quota limit to MIN_QLIMIT */
                 LASSERT(oqctl->qc_dqblk.dqb_valid == QIF_BLIMITS);
-                LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
                 LASSERT(oqctl->qc_dqblk.dqb_bsoftlimit == 0);
 
                 /* There might be a pending dqacq/dqrel (which is going to
@@ -129,6 +128,10 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                 qctxt_wait_pending_dqacq(&obd->u.obt.obt_qctxt, 
                                          oqctl->qc_id, oqctl->qc_type, 1);
 
+                if (!oqctl->qc_dqblk.dqb_bhardlimit)
+                        goto adjust;
+                
+                LASSERT(oqctl->qc_dqblk.dqb_bhardlimit == MIN_QLIMIT);
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
 
@@ -143,7 +146,7 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
 
                 if (rc)
                         RETURN(rc);
-
+adjust:
                 /* Trigger qunit pre-acquire */
                 if (oqctl->qc_type == USRQUOTA)
                         uid = oqctl->qc_id;
index 8eb14ef..4db3a71 100644 (file)
@@ -618,17 +618,20 @@ out:
 }
 
 static int mds_init_slave_ilimits(struct obd_device *obd,
-                                  struct obd_quotactl *oqctl)
+                                  struct obd_quotactl *oqctl, int set)
 {
         /* XXX: for file limits only adjust local now */
         unsigned int uid = 0, gid = 0;
-        struct obd_quotactl *ioqc;
+        struct obd_quotactl *ioqc = NULL;
         int rc;
         ENTRY;
 
         /* if we are going to set zero limit, needn't init slaves */
         if (!oqctl->qc_dqblk.dqb_ihardlimit && !oqctl->qc_dqblk.dqb_isoftlimit)
                 RETURN(0);
+        
+        if (!set)
+                goto acquire;
 
         OBD_ALLOC_PTR(ioqc);
         if (!ioqc)
@@ -644,7 +647,7 @@ static int mds_init_slave_ilimits(struct obd_device *obd,
         rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
         if (rc)
                 GOTO(out, rc);
-
+acquire:
         /* trigger local qunit pre-acquire */
         if (oqctl->qc_type == USRQUOTA)
                 uid = oqctl->qc_id;
@@ -659,12 +662,13 @@ static int mds_init_slave_ilimits(struct obd_device *obd,
         /* FIXME initialize all slaves in CMD */
         EXIT;
 out:
-        OBD_FREE_PTR(ioqc);
+        if (ioqc)
+                OBD_FREE_PTR(ioqc);
         return rc;
 }
 
 static int mds_init_slave_blimits(struct obd_device *obd,
-                                  struct obd_quotactl *oqctl)
+                                  struct obd_quotactl *oqctl, int set)
 {
         struct mds_obd *mds = &obd->u.mds;
         struct obd_quotactl *ioqc;
@@ -684,12 +688,14 @@ static int mds_init_slave_blimits(struct obd_device *obd,
         ioqc->qc_id = oqctl->qc_id;
         ioqc->qc_type = oqctl->qc_type;
         ioqc->qc_dqblk.dqb_valid = QIF_BLIMITS;
-        ioqc->qc_dqblk.dqb_bhardlimit = MIN_QLIMIT;
+        ioqc->qc_dqblk.dqb_bhardlimit = set ? MIN_QLIMIT : 0;
 
         /* set local limit to MIN_QLIMIT */
-        rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
-        if (rc)
-                GOTO(out, rc);
+        if (set) {
+                rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, ioqc);
+                if (rc)
+                        GOTO(out, rc);
+        }
 
         /* trigger local qunit pre-acquire */
         if (oqctl->qc_type == USRQUOTA)
@@ -719,7 +725,7 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         time_t btime, itime;
         struct lustre_dquot *dquot;
         struct obd_dqblk *dqblk = &oqctl->qc_dqblk;
-        int rc;
+        int set, rc;
         ENTRY;
 
         down(&mds->mds_qonoff_sem);
@@ -797,16 +803,18 @@ int mds_set_dqblk(struct obd_device *obd, struct obd_quotactl *oqctl)
         }
 
         up(&mds->mds_qonoff_sem);
-        if (dqblk->dqb_valid & QIF_ILIMITS && !(ihardlimit || isoftlimit)) {
-                rc = mds_init_slave_ilimits(obd, oqctl);
+        if (dqblk->dqb_valid & QIF_ILIMITS) {
+                set = !(ihardlimit || isoftlimit);
+                rc = mds_init_slave_ilimits(obd, oqctl, set);
                 if (rc) {
                         CERROR("init slave ilimits failed! (rc:%d)\n", rc);
                         goto revoke_out;
                 }
         }
 
-        if (dqblk->dqb_valid & QIF_BLIMITS && !(bhardlimit || bsoftlimit)) {
-                rc = mds_init_slave_blimits(obd, oqctl);
+        if (dqblk->dqb_valid & QIF_BLIMITS) {
+                set = !(bhardlimit || bsoftlimit);
+                rc = mds_init_slave_blimits(obd, oqctl, set);
                 if (rc) {
                         CERROR("init slave blimits failed! (rc:%d)\n", rc);
                         goto revoke_out;
index 2444c65..89edc5b 100755 (executable)
@@ -135,33 +135,38 @@ restart() {
 
 status() {
        STATE="stopped"
+       RETVAL=1
        egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
 
        # check for any routes - on a portals router this is the only thing
-       [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running"
+       [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
        
        # check for any configured devices (may indicate partial startup)
-       [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial"
+       [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=1
 
        # check for either a server or a client filesystem
        MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
        OST="`ls /proc/fs/lustre/obdfilter/*/recovery_status 2> /dev/null`"
        LLITE="`ls /proc/fs/lustre/llite/fs* 2> /dev/null`"
-       [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running"
+       [ "$MDS" -o "$OST" -o "$LLITE" ] && STATE="running" && RETVAL=0
 
        # check for server disconnections 
        DISCON="`grep -v FULL /proc/fs/lustre/*c/*/*server_uuid 2> /dev/null`"
-       [ "$DISCON" ] && STATE="disconnected"
+       [ "$DISCON" ] && STATE="disconnected" && RETVAL=0
 
        # check for servers in recovery
-       [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery"
+       [ "$MDS$OST" ] && grep -q RECOV $MDS $OST && STATE="recovery" && RETVAL=0
 
-        # check for error in health_check
-        HEALTH="/proc/fs/lustre/health_check"
-        [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy"
+       # check for error in health_check
+       HEALTH="/proc/fs/lustre/health_check"
+       [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=2
 
-        # check for LBUG
-        [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG"
+       # check for LBUG
+       [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=3
+
+       # Check if the service really exists
+       DUMMY=`lctl dl | grep $SERVICE`
+       [ $? -ne 0 ] && STATE="not_found" && RETVAL=5
 
        echo $STATE
 }
index 3d47860..a7d0a0a 100644 (file)
@@ -13,7 +13,11 @@ noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
 noinst_SCRIPTS += sanity.sh rundbench
 
 EXTRA_DIST = $(pkgexample_scripts) $(noinst_SCRIPTS) $(noinst_DATA) \
-       sanity.sh rundbench
+             sanity.sh rundbench \
+             acl/run acl/make-tree acl/getfacl-noacl.test acl/cp.test \
+             acl/setfacl.test acl/permissions.test acl/misc.test \
+             acl/inheritance.test
+
 if TESTS
 pkgexample_SCRIPTS = $(pkgexample_scripts)
 noinst_PROGRAMS = openunlink testreq truncate directio openme writeme
index b12b068..2b185f9 100644 (file)
@@ -32,4 +32,4 @@ FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD
 POWER_DOWN=${POWER_DOWN:-"powerman --off"}
 POWER_UP=${POWER_UP:-"powerman --on"}
 
-PDSH=no_dsh
+PDSH=${PDSH:-no_dsh}
index fdd3afb..cff6ffd 100644 (file)
@@ -10,7 +10,7 @@
 set -e
 
 ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 
+# bug number for skipped test:
 ALWAYS_EXCEPT=" $CONF_SANITY_EXCEPT"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
@@ -53,6 +53,7 @@ start_mds() {
        echo "start mds service on `facet_active_host mds`"
        start mds --reformat $MDSLCONFARGS  || return 94
 }
+
 stop_mds() {
        echo "stop mds service on `facet_active_host mds`"
        stop mds $@  || return 97
@@ -267,11 +268,9 @@ test_5d() {
 
        [ -d $MOUNT ] || mkdir -p $MOUNT
        $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
-       llmount -o nettype=$NETTYPE,$MOUNTOPT $mds_HOST://mds_svc/client_facet $MOUNT  || return 1 
+       llmount -o nettype=$NETTYPE,$MOUNTOPT `facet_nid mds`://mds_svc/client_facet $MOUNT  || return 1
 
-       umount $MOUNT || return 2
-       # cleanup client modules
-       $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
+       umount_client $MOUNT || return 2
        
        stop_mds || return 3
 
@@ -327,18 +326,19 @@ test_9() {
         start_ost
         start_mds
         mount_client $MOUNT
-        CHECK_PTLDEBUG="`cat /proc/sys/lnet/debug`"
-        if [ $CHECK_PTLDEBUG = "1" ]; then
+        CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug | sed -e 's/.* = //'`"
+        if [ "$CHECK_PTLDEBUG" ] && [ $CHECK_PTLDEBUG -eq 1 ]; then
            echo "lmc --debug success"
         else
            echo "lmc --debug: want 1, have $CHECK_PTLDEBUG"
            return 1
         fi
-        CHECK_SUBSYSTEM="`cat /proc/sys/lnet/subsystem_debug`"
-        if [ $CHECK_SUBSYSTEM = "2" ]; then
+       # again with the pdsh prefix
+        CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug|cut -d= -f2`"
+        if [ "$CHECK_SUBSYS" ] && [ $CHECK_SUBSYS -eq 2 ]; then
            echo "lmc --subsystem success"
         else
-           echo "lmc --subsystem: want 2, have $CHECK_SUBSYSTEM"
+           echo "lmc --subsystem: want 2, have $CHECK_SUBSYS"
            return 1
         fi
         check_mount || return 41
@@ -352,14 +352,14 @@ test_9() {
         start_ost
         start_mds
         CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug | cut -d= -f2`"
-        if [ $CHECK_PTLDEBUG = "3" ]; then
+        if [ "$CHECK_PTLDEBUG" ] && [ $CHECK_PTLDEBUG -eq 3 ]; then
            echo "lconf --debug success"
         else
            echo "lconf --debug: want 3, have $CHECK_PTLDEBUG"
            return 1
         fi
-        CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug|cut -d= -f2`"
-        if [ $CHECK_SUBSYS = "20" ]; then
+        CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug | cut -d= -f2`"
+        if [ "$CHECK_SUBSYS" ] && [ $CHECK_SUBSYS -eq 20 ]; then
            echo "lconf --subsystem success"
         else
            echo "lconf --subsystem: want 20, have $CHECK_SUBSYS"
@@ -522,6 +522,8 @@ test_13() {
                        | sed "s/ /\n\r/g" | awk -F"'" '/uuid=/{print $2}'`
         FOUNDMDS2UUID=`awk -F"'" '/<mds .*uuid=/' $XMLCONFIG | sed -n '2p' \
                        | sed "s/ /\n\r/g" | awk -F"'" '/uuid=/{print $2}'`
+       [ -z "$FOUNDMDS1UUID" ] && echo "MDS1 UUID empty" && return 1
+       [ -z "$FOUNDMDS2UUID" ] && echo "MDS2 UUID empty" && return 1
         if ([ $EXPECTEDMDS1UUID = $FOUNDMDS1UUID ] && [ $EXPECTEDMDS2UUID = $FOUNDMDS2UUID ]) || \
            ([ $EXPECTEDMDS1UUID = $FOUNDMDS2UUID ] && [ $EXPECTEDMDS2UUID = $FOUNDMDS1UUID ]); then
                 echo "Success:long uuid truncated successfully and being unique."
@@ -574,7 +576,7 @@ test_14() {
 
         FOUNDSTRING=`awk -F"<" '/<mkfsoptions>/{print $2}' $XMLCONFIG`
         EXPECTEDSTRING="mkfsoptions>-Llabel_conf_14"
-        if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then
+        if [ "$EXPECTEDSTRING" != "$FOUNDSTRING" ]; then
                 echo "Error: expected: $EXPECTEDSTRING; found: $FOUNDSTRING"
                 return 1
         fi
@@ -585,7 +587,7 @@ test_14() {
         start_ost
         start_mds
         mount_client $MOUNT || return $?
-        if [ -z "`dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then
+        if [ -z "`do_facet ost dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then
                 echo "Error: the mkoptions not applied to mke2fs of ost."
                 return 1
         fi
@@ -616,11 +618,11 @@ test_15() {
        [ -f "$MOUNTLUSTRE" ] && echo "can't move $MOUNTLUSTRE" && return 40
        trap cleanup_15 EXIT INT
        [ ! `cp $(which llmount) $MOUNTLUSTRE` ] || return $?
-       do_node `hostname` mkdir -p $MOUNT 2> /dev/null
+       do_facet client "mkdir -p $MOUNT 2> /dev/null"
        # load llite module on the client if it isn't in /lib/modules
-       do_node `hostname` lconf --nosetup --node client_facet $XMLCONFIG
-       do_node `hostname` mount -t lustre -o nettype=$NETTYPE,$MOUNTOPT \
-               `facet_active_host mds`:/mds_svc/client_facet $MOUNT ||return $?
+       do_facet client "$LCONF --nosetup --node client_facet $XMLCONFIG"
+       do_facet client "mount -t lustre -o $MOUNTOPT \
+               `facet_nid mds`:/mds_svc/client_facet $MOUNT" ||return $?
        echo "mount lustre on $MOUNT with $MOUNTLUSTRE: success"
        [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
        check_mount || return 41
@@ -629,7 +631,7 @@ test_15() {
        [ -f "$MOUNTLUSTRE" ] && rm -f $MOUNTLUSTRE
        echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....."
        do_node `hostname` mount -t lustre -o nettype=$NETTYPE,$MOUNTOPT \
-               `facet_active_host mds`:/mds_svc/client_facet $MOUNT &&return $?
+               `facet_nid mds`:/mds_svc/client_facet $MOUNT &&return $?
        echo "mount lustre on $MOUNT without $MOUNTLUSTRE failed as expected"
        cleanup || return $?
        cleanup_15
@@ -638,7 +640,7 @@ run_test 15 "zconf-mount without /sbin/mount.lustre (should return error)"
 
 test_16() {
         TMPMTPT="/mnt/conf16"
-                                                                                                                             
+
         if [ ! -f "$MDSDEV" ]; then
             echo "no $MDSDEV existing, so mount Lustre to create one"
             start_ost
@@ -647,45 +649,43 @@ test_16() {
             check_mount || return 41
             cleanup || return $?
         fi
-                                                                                                                             
+
         echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
-        [ -d $TMPMTPT ] || mkdir -p $TMPMTPT
-        mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $?
-        chmod 555 $TMPMTPT/OBJECTS || return $?
-        chmod 555 $TMPMTPT/LOGS || return $?
-        chmod 555 $TMPMTPT/PENDING || return $?
-        umount $TMPMTPT || return $?
-                                                                                                                             
+        do_facet mds "[ -d $TMPMTPT ] || mkdir -p $TMPMTPT;
+                      mount -o loop -t ext3 $MDSDEV $TMPMTPT || return \$?;
+                      chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} || return \$?;
+                      umount $TMPMTPT || return \$?" || return $?
+
         echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre"
         start_ost
         start_mds
         mount_client $MOUNT
         check_mount || return 41
         cleanup || return $?
-                                                                                                                             
+
         echo "read the mode of OBJECTS/LOGS/PENDING and check if they has been changed properly"
-        EXPECTEDOBJECTSMODE=`debugfs -R "stat OBJECTS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
-        EXPECTEDLOGSMODE=`debugfs -R "stat LOGS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
-        EXPECTEDPENDINGMODE=`debugfs -R "stat PENDING" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'`
+        EXPECTEDOBJECTSMODE=`do_facet mds "debugfs -R 'stat OBJECTS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"`
+        EXPECTEDLOGSMODE=`do_facet mds "debugfs -R 'stat LOGS' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"`
+        EXPECTEDPENDINGMODE=`do_facet mds "debugfs -R 'stat PENDING' $MDSDEV 2> /dev/null" | grep 'Mode: ' | sed -e "s/.*Mode: *//" -e "s/ *Flags:.*//"`
 
         if [ "$EXPECTEDOBJECTSMODE" = "0777" ]; then
                 echo "Success:Lustre change the mode of OBJECTS correctly"
         else
-                echo "Error: Lustre does not change the mode of OBJECTS properly"
+                echo "Error: Lustre does not change mode of OBJECTS properly"
                 return 1
         fi
-                                                                                                                             
+
         if [ "$EXPECTEDLOGSMODE" = "0777" ]; then
                 echo "Success:Lustre change the mode of LOGS correctly"
         else
-                echo "Error: Lustre does not change the mode of LOGS properly"
+                echo "Error: Lustre does not change mode of LOGS properly"
                 return 1
         fi
-                                                                                                                             
+
         if [ "$EXPECTEDPENDINGMODE" = "0777" ]; then
                 echo "Success:Lustre change the mode of PENDING correctly"
         else
-                echo "Error: Lustre does not change the mode of PENDING properly"
+                echo "Error: Lustre does not change mode of PENDING properly"
                 return 1
         fi
 }
@@ -704,10 +704,7 @@ test_17() {
         fi
 
         echo "Remove mds config log"
-        [ -d $TMPMTPT ] || mkdir -p $TMPMTPT
-        mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $?
-        rm -f $TMPMTPT/LOGS/mds_svc || return $?
-        umount $TMPMTPT || return $?
+        do_facet mds "debugfs -w -R 'unlink LOGS/mds_svc' $MDSDEV || return \$?" || return $?
 
         start_ost
        start mds $MDSLCONFARGS && return 42
@@ -721,15 +718,15 @@ test_18() {
         OLDMDSSIZE=$MDSSIZE
         MDSSIZE=2000000
         gen_config
-                                                                                                                             
+
         echo "mount lustre system..."
         start_ost
         start_mds
         mount_client $MOUNT
         check_mount || return 41
-                                                                                                                             
+
         echo "check journal size..."
-        FOUNDJOURNALSIZE=`debugfs -R "stat <8>" $MDSDEV | awk '/Size: / { print $6; exit;}'`
+        FOUNDJOURNALSIZE=`do_facet mds "debugfs -R 'stat <8>' $MDSDEV" | awk '/Size: / { print $NF; exit;}'`
         if [ "$FOUNDJOURNALSIZE" = "79691776" ]; then
                 echo "Success:lconf creates large journals"
         else
@@ -737,9 +734,9 @@ test_18() {
                 echo "expected journal size: 79691776(76M), found journal size: $FOUNDJOURNALSIZE"
                 return 1
         fi
-                                                                                                                             
+
         cleanup || return $?
-                                                                                                                             
+
         MDSSIZE=$OLDMDSSIZE
         gen_config
 }
index a0f4c39..6cf43ee 100644 (file)
@@ -531,7 +531,10 @@ check_trunc_hack(void)
 
        ftruncate(fd, (off_t)0);
        ftruncate(fd, (off_t)100000);
-       fstat(fd, &statbuf);
+       if (fstat(fd, &statbuf)) {
+               prterr("trunc_hack: fstat");
+               statbuf.st_size = -1;
+       }
        if (statbuf.st_size != (off_t)100000) {
                prt("no extend on truncate! not posix!\n");
                exit(130);
index 35dbadd..deca747 100755 (executable)
@@ -50,6 +50,7 @@ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
        echo "$LEAK_LUSTRE" 1>&2
        echo "$LEAK_PORTALS" 1>&2
        mv $TMP/debug $TMP/debug-leak.`date +%s`
+       echo "Memory leaks detected"
        exit 254
 fi
 lsmod | grep lnet && echo "modules still loaded" && exit 1
index 4044610..98b8d12 100644 (file)
@@ -79,6 +79,7 @@ run_test 0 "Prepare fileset"
 test_1() {
     ./chownmany 1000 $DIR/llog-%d $LCOUNT
     sleep 5
+    $CHECKSTAT -u \#1000 $DIR/llog-* || return 4
 }
 run_test 1 "Do chowns"
 
@@ -88,13 +89,14 @@ test_2() {
     fail ost
     ./chownmany 500 $DIR/llog-%d $HALFCOUNT $LCOUNT
     sleep 5
+    $CHECKSTAT -u \#500 $DIR/llog-* || return 5
 }
-#run_test 2 "Fail OST during chown"
+run_test 2 "Fail OST during chown"
 
 test_3() {
     ./unlinkmany $DIR/llog-%d $LCOUNT
     sleep 2
-    $CHECKSTAT -t file $DIR/llog-* && return 1 || true
+    $CHECKSTAT -t file $DIR/llog-* && return 10 || true
 }
 run_test 3 "Remove testset"
 
index c02ef0f..1722afb 100644 (file)
@@ -148,13 +148,15 @@ int main(int argc, char **argv)
        nblocks = size / bsize;
        buf = malloc(bsize);
        if (buf == NULL) {
-               LOG(LOG_CRIT, "malloc(%i) failure: %m\n", bsize);
+               LOG(LOG_CRIT, "malloc(%lu) failure: %s\n", (long)bsize,
+                   strerror(errno));
                return RR_MALLOC;
        }
 
        fd = open(fname, (preclean ? O_RDWR : O_RDONLY) | O_CREAT, 0700);
        if (fd == -1) {
-               LOG(LOG_CRIT, "malloc(\"%s\") failure: %m\n", fname);
+               LOG(LOG_CRIT, "malloc(\"%s\") failure: %s\n", fname,
+                   strerror(errno));
                return RR_OPEN;
        }
        if (preclean) {
@@ -167,7 +169,8 @@ int main(int argc, char **argv)
                        memset(buf, bsize, seed + i++);
                        ret = write(fd, buf, count);
                        if (ret < 0) {
-                               LOG(LOG_CRIT, "write() failure: %m\n");
+                               LOG(LOG_CRIT, "write() failure: %s\n",
+                                   strerror(errno));
                                return RR_PRECLEAN;
                        }
                }
@@ -188,8 +191,8 @@ int main(int argc, char **argv)
                        ret = pread(fd, buf, bsize, (block_nr + j) * bsize);
                        if (ret != bsize) {
                                LOG(LOG_CRIT,
-                                   "pread(...%zi, %li) got: %zi, %m\n",
-                                   bsize, block_nr * bsize, ret);
+                                   "pread(...%zi, %li) got: %zi, %s\n", bsize,
+                                   block_nr * bsize, ret, strerror(errno));
                                return RR_READ;
                        }
                }
index 4dade7d..0f1ceb7 100755 (executable)
@@ -136,7 +136,7 @@ run_test 9 "pause bulk on OST (bug 1420)"
 #bug 1521
 test_10() {
     do_facet client mcreate $MOUNT/$tfile        || return 1
-    drop_bl_callback "chmod 0777 $MOUNT/$tfile"  || return 2
+    drop_bl_callback "chmod 0777 $MOUNT/$tfile"  || echo "evicted as expected"
     # wait for the mds to evict the client
     #echo "sleep $(($TIMEOUT*2))"
     #sleep $(($TIMEOUT*2))
@@ -155,8 +155,7 @@ test_11(){
     cancel_lru_locks OSC
 
     do_facet client multiop $MOUNT/$tfile or  || return 3
-    drop_bl_callback multiop $MOUNT/$tfile Ow  || 
-        echo "client evicted, as expected"
+    drop_bl_callback multiop $MOUNT/$tfile Ow || echo "evicted as expected"
 
     do_facet client munlink $MOUNT/$tfile  || return 4
 }
@@ -316,7 +315,7 @@ run_test 18b "eviction and reconnect clears page cache (2766)"
 test_19a() {
     f=$MOUNT/$tfile
     do_facet client mcreate $f        || return 1
-    drop_ldlm_cancel "chmod 0777 $f"  || echo evicted
+    drop_ldlm_cancel "chmod 0777 $f"  || echo "evicted as expected"
 
     do_facet client checkstat -v -p 0777 $f  || echo evicted
     # let the client reconnect
@@ -446,8 +445,8 @@ test_27() {
 run_test 27 "fail LOV while using OSC's"
 
 test_28() {      # bug 6086 - error adding new clients
-       do_facet client mcreate $MOUNT/$tfile        || return 1
-       drop_bl_callback "chmod 0777 $MOUNT/$tfile"  || return 2
+       do_facet client mcreate $MOUNT/$tfile       || return 1
+       drop_bl_callback "chmod 0777 $MOUNT/$tfile" ||echo "evicted as expected"
        #define OBD_FAIL_MDS_ADD_CLIENT 0x12f
        do_facet mds sysctl -w lustre.fail_loc=0x8000012f
        # fail once (evicted), reconnect fail (fail_loc), ok
index fa7b783..5fe9d3a 100755 (executable)
@@ -2,8 +2,8 @@
 
 set -e
 
-# bug number:  6088 
-ALWAYS_EXCEPT="8    $REPLAY_DUAL_EXCEPT"
+# bug number:  6088 10124
+ALWAYS_EXCEPT="8    15c   $REPLAY_DUAL_EXCEPT"
 
 LUSTRE=${LUSTRE:-`dirname $0`/..}
 . $LUSTRE/tests/test-framework.sh
index 6528fee..8bb8f63 100755 (executable)
@@ -15,8 +15,8 @@ init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
 
 # Skip these tests
-# bug number: 2766 
-ALWAYS_EXCEPT="0b  $REPLAY_SINGLE_EXCEPT"
+# bug number: 2766 9930
+ALWAYS_EXCEPT="0b  39   $REPLAY_SINGLE_EXCEPT"
 
 build_test_filter
 
index b7201f2..515a347 100644 (file)
@@ -5,7 +5,11 @@ TMP=${TMP:-/tmp}
 MDS=`ls /proc/fs/lustre/mds | grep -v num_refs | head -n 1`
 [ -z "$MDS" ] && echo "no MDS available, skipping llog test" && exit 0
 
-insmod ../obdclass/llog_test.o || exit 1
+case `uname -r` in
+2.4.*) insmod ../obdclass/llog_test.o || exit 1 ;;
+2.6.*) insmod ../obdclass/llog_test.ko || exit 1 ;;
+*) echo "unknown kernel version `uname -r`" && exit 99 ;;
+esac
 lctl modules > $TMP/ogdb-`hostname`
 echo "NOW reload debugging syms.."
 
index 80fc412..101c7b7 100644 (file)
@@ -7,21 +7,18 @@
 set -e
 
 ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 2108 3637 3561 5188/5749
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c  45   68"}
+# bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42b  42c  42d  45   68"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 [ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 64b 71 101"
 
 case `uname -r` in
-2.4*) FSTYPE=${FSTYPE:-ext3} ;;
-2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69";;
+2.4*) FSTYPE=${FSTYPE:-ext3};    ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
+2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69" ;;
 *) error "unsupported kernel" ;;
 esac
 
-[ "$ALWAYS_EXCEPT$EXCEPT$SANITY_EXCEPT" ] && \
-       echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITY_EXCEPT`"
-
 SRCDIR=`dirname $0`
 export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin
 
@@ -71,7 +68,7 @@ SAVE_PWD=$PWD
 
 clean() {
        echo -n "cln.."
-       sh llmountcleanup.sh ${FORCE} > /dev/null || exit 20
+       sh llmountcleanup.sh ${FORCE} > /dev/null || { echo "FAILed to clean up"; exit 20; }
 }
 CLEAN=${CLEAN:-:}
 
@@ -107,16 +104,26 @@ check_kernel_version() {
        return 1
 }
 
+_basetest() {
+    echo $*
+}
+
+basetest() {
+    IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
 run_one() {
        if ! mount | grep -q $DIR; then
                $START
        fi
+       testnum=$1
+       message=$2
        BEFORE=`date +%s`
-       log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
-       export TESTNAME=test_$1
+       log "== test $testnum: $message= `date +%H:%M:%S` ($BEFORE)"
+       export TESTNAME=test_$testnum
        export tfile=f${testnum}
        export tdir=d${base}
-       test_$1 || error "exit with rc=$?"
+       test_${testnum} || error "exit with rc=$?"
        unset TESTNAME
        pass "($((`date +%s` - $BEFORE))s)"
        cd $SAVE_PWD
@@ -124,6 +131,9 @@ run_one() {
 }
 
 build_test_filter() {
+       [ "$ALWAYS_EXCEPT$EXCEPT$SANITY_EXCEPT" ] && \
+           echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITY_EXCEPT`"
+
         for O in $ONLY; do
             eval ONLY_${O}=true
         done
@@ -141,7 +151,7 @@ basetest() {
 }
 
 run_test() {
-         base=`basetest $1`
+         export base=`basetest $1`
          if [ "$ONLY" ]; then
                  testname=ONLY_$1
                  if [ ${!testname}x != x ]; then
@@ -217,7 +227,7 @@ rm -rf $DIR/[Rdfs][1-9]*
 
 build_test_filter
 
-echo preparing for tests involving mounts
+echo "preparing for tests involving mounts"
 EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
 touch $EXT2_DEV
 mke2fs -j -F $EXT2_DEV 8000 > /dev/null
@@ -226,12 +236,12 @@ echo # add a newline after mke2fs.
 umask 077
 
 test_0() {
-       touch $DIR/f
-       $CHECKSTAT -t file $DIR/f || error
-       rm $DIR/f
-       $CHECKSTAT -a $DIR/f || error
+       touch $DIR/$tfile
+       $CHECKSTAT -t file $DIR/$tfile || error
+       rm $DIR/$tfile
+       $CHECKSTAT -a $DIR/$tfile || error
 }
-run_test 0 "touch .../f ; rm .../f ============================="
+run_test 0 "touch .../$tfile ; rm .../$tfile ====================="
 
 test_0b() {
        chmod 0755 $DIR || error
@@ -835,18 +845,18 @@ run_test 26e "unlink multiple component recursive symlink ======"
 
 # recursive symlinks (bug 7022)
 test_26f() {
-       mkdir $DIR/foo         || error "mkdir $DIR/foo failed"
-       cd $DIR/foo            || error "cd $DIR/foo failed"
-       mkdir -p bar/bar1      || error "mkdir bar/bar1 failed"
-       mkdir foo              || error "mkdir foo failed"
-       cd foo                 || error "cd foo failed"
-       ln -s .. dotdot        || error "ln dotdot failed"
-       ln -s dotdot/bar bar   || error "ln bar failed"
-       cd ../..               || error "cd ../.. failed"
-       output=`ls foo/foo/bar/bar1`
+       mkdir $DIR/$tfile        || error "mkdir $DIR/$tfile failed"
+       cd $DIR/$tfile           || error "cd $DIR/$tfile failed"
+       mkdir -p $tdir/bar1      || error "mkdir $tdir/bar1 failed"
+       mkdir $tfile             || error "mkdir $tfile failed"
+       cd $tfile                || error "cd $tfile failed"
+       ln -s .. dotdot          || error "ln dotdot failed"
+       ln -s dotdot/$tdir $tdir || error "ln $tdir failed"
+       cd ../..                 || error "cd ../.. failed"
+       output=`ls $tfile/$tfile/$tdir/bar1`
        [ "$output" = bar1 ] && error "unexpected output"
-       rm -r foo              || error "rm foo failed"
-       $CHECKSTAT -a $DIR/foo || error "foo not gone"
+       rm -r $tfile             || error "rm $tfile failed"
+       $CHECKSTAT -a $DIR/$tfile || error "$tfile not gone"
 }
 run_test 26f "rm -r of a directory which has recursive symlink ="
 
@@ -1408,13 +1418,13 @@ test_32r() {
 run_test 32r "opendir follows mountpoints in Lustre (should return error)"
 
 test_33() {
-       rm -f $DIR/test_33_file
-       touch $DIR/test_33_file
-       chmod 444 $DIR/test_33_file
-       chown $RUNAS_ID $DIR/test_33_file
-        log 33_1
-        $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true
-        log 33_2
+       rm -f $DIR/$tfile
+       touch $DIR/$tfile
+       chmod 444 $DIR/$tfile
+       chown $RUNAS_ID $DIR/$tfile
+       log 33_1
+       $RUNAS $OPENFILE -f O_RDWR $DIR/$tfile && error || true
+       log 33_2
 }
 run_test 33 "write file with mode 444 (should return error) ===="
 
@@ -1549,20 +1559,20 @@ test_38() {
 run_test 38 "open a regular file with O_DIRECTORY =============="
 
 test_39() {
-       touch $DIR/test_39_file
-       touch $DIR/test_39_file2
-#      ls -l  $DIR/test_39_file $DIR/test_39_file2
-#      ls -lu  $DIR/test_39_file $DIR/test_39_file2
-#      ls -lc  $DIR/test_39_file $DIR/test_39_file2
+       touch $DIR/$tfile
+       touch $DIR/${tfile}2
+#      ls -l  $DIR/$tfile $DIR/${tfile}2
+#      ls -lu  $DIR/$tfile $DIR/${tfile}2
+#      ls -lc  $DIR/$tfile $DIR/${tfile}2
        sleep 2
-       $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2
-       if [ ! $DIR/test_39_file2 -nt $DIR/test_39_file ]; then
+       $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/${tfile}2
+       if [ ! $DIR/${tfile}2 -nt $DIR/$tfile ]; then
                echo "mtime"
-               ls -l  $DIR/test_39_file $DIR/test_39_file2
+               ls -l  $DIR/$tfile $DIR/${tfile}2
                echo "atime"
-               ls -lu  $DIR/test_39_file $DIR/test_39_file2
+               ls -lu  $DIR/$tfile $DIR/${tfile}2
                echo "ctime"
-               ls -lc  $DIR/test_39_file $DIR/test_39_file2
+               ls -lc  $DIR/$tfile $DIR/${tfile}2
                error "O_TRUNC didn't change timestamps"
        fi
 }
@@ -2305,9 +2315,9 @@ test_62() {
         f="$DIR/f62"
         echo foo > $f
         cancel_lru_locks osc
-        echo 0x405 > /proc/sys/lustre/fail_loc
+        sysctl -w lustre.fail_loc=0x405
         cat $f && error "cat succeeded, expect -EIO"
-        echo 0 > /proc/sys/lustre/fail_loc
+        sysctl -w lustre.fail_loc=0
 }
 run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)"
 
@@ -2338,12 +2348,12 @@ test_63b() {
        sysctl -w lnet.debug=-1
 
        # ensure we have a grant to do async writes
-       dd if=/dev/zero of=/mnt/lustre/f63b bs=4k count=1
-       rm /mnt/lustre/f63b
+       dd if=/dev/zero of=$DIR/$tfile bs=4k count=1
+       rm $DIR/$tfile
 
        #define OBD_FAIL_OSC_BRW_PREP_REQ        0x406
        sysctl -w lustre.fail_loc=0x80000406
-       multiop /mnt/lustre/f63b Owy && \
+       multiop $DIR/$tfile Owy && \
                $LCTL dk /tmp/test63b.debug && \
                sysctl -w lnet.debug=$DBG_SAVE && \
                error "sync didn't return ENOMEM"
@@ -2407,7 +2417,7 @@ test_65e() {
        mkdir -p $DIR/d65
 
        $LSTRIPE $DIR/d65 0 -1 0 || error "setstripe"
-        $LFS find -v $DIR/d65 | grep "$DIR/d65/ has no stripe info" || error "no stripe info failed"
+        $LFS find -v $DIR/d65 | grep "has no stripe info" || error "no stripe info failed"
        touch $DIR/d65/f6
        $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed"
 }
@@ -2423,7 +2433,7 @@ test_65g() {
         mkdir -p $DIR/d65
         $LSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
         $LSTRIPE -d $DIR/d65 || error "setstripe"
-        $LFS find -v $DIR/d65 | grep "$DIR/d65/ has no stripe info" || \
+        $LFS find -v $DIR/d65 | grep "has no stripe info" || \
                error "delete default stripe failed"
 }
 run_test 65g "directory setstripe -d ==========================="
@@ -2536,27 +2546,27 @@ test_69() {
        f="$DIR/f69"
        touch $f
 
-       echo 0x217 > /proc/sys/lustre/fail_loc
+       sysctl -w lustre.fail_loc=0x217
        truncate $f 1 # vmtruncate() will ignore truncate() error.
        $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT"
 
-       echo 0 > /proc/sys/lustre/fail_loc
+       sysctl -w lustre.fail_loc=0
        $DIRECTIO write $f 0 2 || error "write error"
 
        cancel_lru_locks osc
        $DIRECTIO read $f 0 1 || error "read error"
 
-       echo 0x217 > /proc/sys/lustre/fail_loc
+       sysctl -w lustre.fail_loc=0x217
        $DIRECTIO read $f 1 1 && error "read succeeded, expect -ENOENT"
 
-       echo 0 > /proc/sys/lustre/fail_loc
+       sysctl -w lustre.fail_loc=0
        rm -f $f
 }
 run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======"
 
 test_71() {
        DBENCH_LIB=${DBENCH_LIB:-/usr/lib/dbench}
-       PATH=${PATH}:$DBENCH_LIB
+       PATH=${DBENCH_LIB}:${PATH}
        cp `which dbench` $DIR
 
        [ ! -f $DIR/dbench ] && echo "dbench not installed, skip this test" && return 0
@@ -2569,7 +2579,7 @@ test_71() {
 
        echo "copying necessary lib to $DIR"
        [ -d /lib64 ] && LIB71=/lib64 || LIB71=/lib
-       mkdir $DIR$LIB71 || error "can't create $DIR$LIB71"
+       mkdir -p $DIR$LIB71 || error "can't create $DIR$LIB71"
        cp $LIB71/libc* $DIR$LIB71 || error "can't copy $LIB71/libc*"
        cp $LIB71/ld-* $DIR$LIB71 || error "can't create $LIB71/ld-*"
 
@@ -2577,9 +2587,7 @@ test_71() {
        chroot $DIR /dbench -c client.txt 2
        RC=$?
 
-       rm -f $DIR/dbench
-       rm -f $TGT
-       rm -fr $DIR$LIB71
+       rm -rf $DIR/dbench $TGT $DIR$LIB71
 
        return $RC
 }
@@ -2622,6 +2630,7 @@ test_75() {
        F128k=${F}_128k
        FHEAD=${F}_head
        FTAIL=${F}_tail
+       echo "using F=$F, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL"
        rm -f $F*
 
        dd if=/dev/urandom of=${F}_128k bs=1024 count=128 || error "dd failed"
@@ -2631,16 +2640,14 @@ test_75() {
        cat ${F128k} ${F128k} > ${F}_sim_sim
 
        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       diff ${FHEAD} ${F}_sim_sim
-       diff -u ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ"
+       cmp ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ"
        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join"
 
        cp -p ${F128k} ${FTAIL}
        cat ${F}_sim_sim >> ${F}_join_sim
        cat ${F128k} >> ${F}_join_sim
        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       diff -u ${FHEAD} ${F}_join_sim
-       diff -u ${FHEAD} ${F}_join_sim || \
+       cmp ${FHEAD} ${F}_join_sim || \
                error "${FHEAD} ${F}_join_sim are different"
        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join"
 
@@ -2648,7 +2655,7 @@ test_75() {
        cat ${F128k} >> ${F}_sim_join
        cat ${F}_join_sim >> ${F}_sim_join
        $JOIN ${FTAIL} ${FHEAD} || error "join error"
-       diff -u ${FTAIL} ${F}_sim_join || \
+       cmp ${FTAIL} ${F}_sim_join || \
                error "${FTAIL} ${F}_sim_join are different"
        $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join"
 
@@ -2658,7 +2665,7 @@ test_75() {
        cat ${F}_sim_join >> ${F}_join_join
        $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error"
        $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error"
-       diff -u ${FHEAD} ${F}_join_join ||error "${FHEAD} ${F}_join_join differ"
+       cmp ${FHEAD} ${F}_join_join || error "${FHEAD} ${F}_join_join differ"
        $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join"
        $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)"
 
@@ -2672,12 +2679,10 @@ test_75() {
                        error "join ${F}_join_10 ${FTAIL} error"
                $CHECKSTAT -a ${FTAIL} || error "tail file exist after join"
        done
-       diff -u ${F}_join_10 ${F}_join_10_compare || \
+       cmp ${F}_join_10 ${F}_join_10_compare || \
                error "files ${F}_join_10 ${F}_join_10_compare are different"
        $LFS getstripe ${F}_join_10
        $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open"
-
-       rm -f $F*
 }
 run_test 75 "TEST join file"
 
@@ -2767,8 +2772,13 @@ test_100() {
                RPORT=`echo $REMOTE | cut -d: -f2`
                [ "$RPORT" != "$ACCEPTOR_PORT" ] && continue
                LPORT=`echo $LOCAL | cut -d: -f2`
-               [ $LPORT -ge 1024 ] && error "local port: $LPORT > 1024" || true
+               if [ $LPORT -ge 1024 ]; then
+                       echo "bad: $PROT $SND $RCV $LOCAL $REMOTE $STAT"
+                       netstat -tna
+                       error "local: $LPORT > 1024, remote: $RPORT"
+               fi
        done
+       true
 }
 run_test 100 "check local port using privileged port ==========="
 
@@ -2827,7 +2837,7 @@ test_102() {
         touch $testfile
 
        [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
-       [ -z "grep \<xattr\> $LPROC/mdc/*-mdc-*/connect_flags" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
+       [ -z "`grep \<xattr\> $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
        echo "set/get xattr..."
         setfattr -n trusted.name1 -v value1 $testfile || error
         [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \
@@ -2879,6 +2889,7 @@ test_103 () {
     [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
     [ -z "`mount | grep " $DIR .*\<acl\>"`" ] && echo "skipping $TESTNAME (must have acl)" && return
     [ -z "`grep acl $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return
+    $(which setfacl 2>/dev/null) || echo "skipping $TESTNAME (could not find setfacl)" && return
 
     echo "performing cp ..."
     run_acl_subtest cp || error
@@ -2904,6 +2915,23 @@ test_103 () {
 }
 run_test 103 "==============acl test ============="
 
+test_104() {
+       touch $DIR/$tfile
+       lfs df || error "lfs df failed"
+       lfs df -ih || error "lfs df -ih failed"
+       lfs df $DIR || error "lfs df $DIR failed"
+       lfs df -ih $DIR || error "lfs df -ih $DIR failed"
+       lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed"
+       lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed"
+       
+       OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1`
+       lctl --device %$OSC deactivate
+       lfs df || error "lfs df with deactivated OSC failed"
+       lctl --device %$OSC recover
+       lfs df || error "lfs df with reactivated OSC failed"
+}
+run_test 104 "lfs>df [-ih] [path] test ============"
+
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
 HOME=$OLDHOME
index 0de58a6..e7b0294 100644 (file)
@@ -7,9 +7,6 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b  14c"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
-[ "$ALWAYS_EXCEPT$EXCEPT$SANITYN_EXCEPT" ] && \
-       echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITYN_EXCEPT`"
-
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
@@ -67,38 +64,68 @@ run_one() {
        if ! mount | grep -q $DIR1; then
                $START
        fi
+       testnum=$1
+       message=$2
        BEFORE=`date +%s`
-       log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
-       export TESTNAME=test_$1
+       log "== test $testnum: $message= `date +%H:%M:%S` ($BEFORE)"
+       export TESTNAME=test_$testnum
        export tfile=f${testnum}
        export tdir=d${base}
-       test_$1 || error "test_$1: exit with rc=$?"
+       test_$1 || error "exit with rc=$?"
        unset TESTNAME
        pass "($((`date +%s` - $BEFORE))s)"
        cd $SAVE_PWD
        $CLEAN
 }
 
+build_test_filter() {
+       [ "$ALWAYS_EXCEPT$EXCEPT$SANITYN_EXCEPT" ] && \
+           echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITYN_EXCEPT`"
+
+        for O in $ONLY; do
+            eval ONLY_${O}=true
+        done
+        for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do
+            eval EXCEPT_${E}=true
+        done
+}
+
+_basetest() {
+    echo $*
+}
+
+basetest() {
+    IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
 run_test() {
-       for O in $ONLY; do
-               if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then
-                       echo ""
-                       run_one $1 "$2"
-                       return $?
-               else
-                       echo -n "."
-               fi
-       done
-       for X in $EXCEPT $ALWAYS_EXCEPT $SANITYN_EXCEPT; do
-               if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then
-                       echo "skipping excluded test $1"
-                       return 0
-               fi
-       done
-       if [ -z "$ONLY" ]; then
-               run_one $1 "$2"
-               return $?
-       fi
+         export base=`basetest $1`
+         if [ "$ONLY" ]; then
+                 testname=ONLY_$1
+                 if [ ${!testname}x != x ]; then
+                       run_one $1 "$2"
+                       return $?
+                 fi
+                 testname=ONLY_$base
+                 if [ ${!testname}x != x ]; then
+                         run_one $1 "$2"
+                         return $?
+                 fi
+                 echo -n "."
+                 return 0
+       fi
+        testname=EXCEPT_$1
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1"
+                 return 0
+        fi
+        testname=EXCEPT_$base
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1 (base $base)"
+                 return 0
+        fi
+        run_one $1 "$2"
+       return $?
 }
 
 [ "$SANITYLOG" ] && rm -f $SANITYLOG || true
@@ -130,6 +157,8 @@ export DIR2=${DIR2:-$MOUNT2}
 
 rm -rf $DIR1/[df][0-9]* $DIR1/lnk
 
+build_test_filter
+
 test_1a() {
        touch $DIR1/f1
        [ -f $DIR2/f1 ] || error
@@ -347,7 +376,7 @@ test_14c() { # bug 3430
 run_test 14c "open(O_TRUNC) of executing file return -ETXTBSY =="
 
 test_15() {    # bug 974 - ENOSPC
-       echo $PATH
+       echo "PATH=$PATH"
        sh oos2.sh $MOUNT1 $MOUNT2
 }
 run_test 15 "test out-of-space with multiple writers ==========="
@@ -464,6 +493,32 @@ test_22() { # Bug 9926
 }
 run_test 22 " After joining in one dir,  open/close unlink file in anther dir" 
 
+test_23() { # Bug 5972
+       echo "others should see updated atime while another read" > $DIR1/f23
+       
+       # clear the lock(mode: LCK_PW) gotten from creating operation
+       cancel_lru_locks OSC
+       
+       time1=`date +%s`        
+       sleep 2
+       
+       multiop $DIR1/f23 or20_c &
+       MULTIPID=$!
+
+       sleep 2
+       time2=`stat -c "%X" $DIR2/f23`
+
+       if (( $time2 <= $time1 )); then
+               kill -USR1 $MULTIPID
+               error "atime doesn't update among nodes"
+       fi
+
+       kill -USR1 $MULTIPID || return 1
+       rm -f $DIR1/f23 || error "rm -f $DIR1/f23 failed"
+       true
+}
+run_test 23 " others should see updated atime while another read===="
+
 log "cleanup: ======================================================"
 rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true
 
index 150e128..741b13d 100644 (file)
@@ -115,7 +115,9 @@ zconf_mount() {
     fi
 
     do_node $client mount -t lustre $OPTIONS \
-       `facet_nid mgs`:/lustre-client $mnt || return 1
+       `facet_nid mgs`:/lustre $mnt || return 1
+
+    do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }"
 
     [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname`
     return 0
@@ -278,12 +280,12 @@ facet_nid() {
    facet=$1
    HOST=`facet_host $facet`
    if [ -z "$HOST" ]; then
-       echo "The env variable ${facet}_HOST must be set."
-       exit 1
+           echo "The env variable ${facet}_HOST must be set."
+           exit 1
    fi
    if [ -z "$NETTYPE" ]; then
-       echo "The env variable NETTYPE must be set."
-       exit 1
+           echo "The env variable NETTYPE must be set."
+           exit 1
    fi
    echo `h2$NETTYPE $HOST`
 }
@@ -333,11 +335,15 @@ change_active() {
 do_node() {
     HOST=$1
     shift
+    local myPDSH=$PDSH
+    if [ "$HOST" = "$(hostname)" ]; then
+        myPDSH="no_dsh"
+    fi
     if $VERBOSE; then
         echo "CMD: $HOST $@"
-        $PDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || :
+        $myPDSH $HOST $LCTL mark "$@" > /dev/null 2>&1 || :
     fi
-    $PDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")"
+    $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; sh -c \"$@\")"
 }
 
 do_facet() {
@@ -512,7 +518,7 @@ pgcache_empty() {
 ##################################
 # Test interface 
 error() {
-       sysctl -w lustre.fail_loc=0
+       sysctl -w lustre.fail_loc=0 || true
        echo "${TESTSUITE}: **** FAIL:" $@
        log "FAIL: $@"
        exit 1
index 27b5e5d..82bdbdc 100755 (executable)
@@ -781,7 +781,7 @@ def if2addr(iface):
     ip = string.split(addr, ':')[1]
     return ip
 
-def def_mount_options(fstype, target):
+def def_mount_options(fstype, target, blkdev):
     """returns deafult mount options for passed fstype and target (mds, ost)"""
     if fstype == 'ext3' or fstype == 'ldiskfs':
         mountfsoptions = "errors=remount-ro"
@@ -795,6 +795,64 @@ def def_mount_options(fstype, target):
                 mountfsoptions = "%s,user_xattr" % (mountfsoptions)
             if config.acl:
                 mountfsoptions = "%s,acl" % (mountfsoptions)
+
+        if blkdev:
+            # grab superblock info
+            dumpe2fs="dumpe2fs -f -h"
+            (ret, sb) = run(dumpe2fs, blkdev)
+            if ret:
+                panic("unable to get superblock for ", blkdev)
+
+            # extract journal UUID
+            journal_UUID=''
+            journal_DEV=''
+            for line in sb:
+                lst = string.split(line, ":")
+                if lst[0] == 'Journal UUID':
+                    if len(lst[1]) < 3:
+                        panic("cannot retrieve journal UUID for ", blkdev)
+                    if string.split(lst[1])[0] != '<none>':
+                        journal_UUID = string.split(lst[1])[0]
+                        debug(blkdev, 'has journal UUID', journal_UUID)
+                if lst[0] == 'Journal device':
+                    if len(lst[1]) < 3:
+                        panic("cannot retrieve journal device for ", blkdev)
+                    if string.split(lst[1])[0] != '0x0000':
+                        journal_DEV = string.split(lst[1])[0]
+                        debug(blkdev, 'has journal device', journal_DEV)
+                    break
+
+            if len(journal_UUID) == 0 or len(journal_DEV) == 0:
+                debug('no external journal found for', blkdev)
+                # use internal journal
+                return mountfsoptions
+        
+            # run blkid
+            blkid = "blkid -o device -t UUID='%s'" % (journal_UUID)
+            (ret, devname) = run(blkid)
+            if ret or len(devname) == 0:
+                panic("cannot find external journal for ", blkdev)
+            debug('found', blkdev, 'journal UUID', journal_UUID, 'on',
+                  string.replace(devname[0], '\n', ''))
+
+            try: # sigh, python 1.5 does not support os.stat().st_rdev
+                jdevpath = my_realpath(string.replace(devname[0], '\n', ''))
+                ret, out = runcmd("ls -l %s" %jdevpath)
+                debug('ls -l:', out)
+                major = int(string.split(string.split(out[0])[4], ',')[0])
+                minor = int(string.split(out[0])[5])
+                debug('major', major, 'minor', minor)
+                rdev = major << 8 | minor
+            except OSError:
+                panic("cannot stat ", devname[0])
+
+            debug('found', blkdev, 'journal UUID', journal_UUID, 'on',
+                  jdevpath, 'rdev', rdev)
+
+            # add mount option
+            if string.atoi(journal_DEV, 0) != rdev:
+                mountfsoptions =  "%s,journal_dev=%#x" % (mountfsoptions,rdev)
+
         return mountfsoptions
     return ""
 
@@ -1153,7 +1211,10 @@ class MDSDEV(Module):
         self.nspath = self.db.get_val('nspath', '')
         self.mkfsoptions = '-i 4096 ' + self.db.get_val('mkfsoptions', '')
         self.mountfsoptions = self.db.get_val('mountfsoptions', '')
-        self.quota = self.db.get_val('quota', '')
+        if config.quota:
+            self.quota = config.quota
+        else:
+            self.quota = self.db.get_val('quota', '')
         # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
         target_uuid = self.db.get_first_ref('target')
         mds = self.db.lookup(target_uuid)
@@ -1241,7 +1302,7 @@ class MDSDEV(Module):
         if not is_prepared('MDT'):
             lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
         try:
-            mountfsoptions = def_mount_options(self.fstype, 'mds')
+            mountfsoptions = def_mount_options(self.fstype, 'mds', blkdev)
 
             if config.mountfsoptions:
                 if mountfsoptions:
@@ -1417,7 +1478,10 @@ class OSD(Module):
         if self.size > 1000000:
                 self.mkfsoptions = '-i 16384 ' + self.mkfsoptions
         self.mountfsoptions = self.db.get_val('mountfsoptions', '')
-        self.quota = self.db.get_val('quota', '')
+        if config.quota:
+            self.quota = config.quota
+        else:
+            self.quota = self.db.get_val('quota', '')
 
         self.fstype = self.db.get_val('fstype', '')
         if sys_get_branch() == '2.4' and self.fstype == 'ldiskfs':
@@ -1480,7 +1544,7 @@ class OSD(Module):
                                config.reformat, self.format, self.journal_size,
                                self.inode_size, self.mkfsoptions)
 
-        mountfsoptions = def_mount_options(self.fstype, 'ost')
+        mountfsoptions = def_mount_options(self.fstype, 'ost', blkdev)
 
         if config.mountfsoptions:
             if mountfsoptions:
@@ -1686,8 +1750,10 @@ class COBD(Module):
 
 # virtual interface for  OSC and LOV
 class VOSC(Module):
-    def __init__(self, db, uuid, fs_name, name_override = None):
+    def __init__(self, db, uuid, fs_name, name_override = None, quota = None):
         Module.__init__(self, 'VOSC', db)
+        if quota:
+            self.add_lustre_module('quota', 'lquota')
         if db.get_class() == 'lov':
             self.osc = LOV(db, uuid, fs_name, name_override)
         else:
@@ -1701,9 +1767,11 @@ class VOSC(Module):
     def cleanup(self):
         self.osc.cleanup()
     def load_module(self):
+        Module.load_module(self)
         self.osc.load_module()
     def cleanup_module(self):
         self.osc.cleanup_module()
+        Module.cleanup_module(self)
 
 
 class ECHO_CLIENT(Module):
@@ -1773,16 +1841,17 @@ class Mountpoint(Module):
         self.fs_uuid = self.db.get_first_ref('filesystem')
         fs = self.db.lookup(self.fs_uuid)
         self.mds_uuid = fs.get_first_ref('mds')
+        mds_db = self.db.lookup(self.mds_uuid)
+        if config.quota:
+            quota = config.quota
+        else:
+            quota = mds_db.get_val('quota', config.quota)
         self.obd_uuid = fs.get_first_ref('obd')
         obd = self.db.lookup(self.obd_uuid)
         client_uuid = generate_client_uuid(self.name)
-        self.vosc = VOSC(obd, client_uuid, self.name)
+        self.vosc = VOSC(obd, client_uuid, self.name, quota=quota)
         self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
 
-        mds_db = self.db.lookup(self.mds_uuid)
-        quota = mds_db.get_val('quota', '')
-        if quota:
-                self.add_lustre_module('quota', 'lquota')
         self.add_lustre_module('mdc', 'mdc')
         self.add_lustre_module('llite', 'llite')
 
@@ -2638,6 +2707,7 @@ lconf_options = [
                 PARAMLIST),
     ('user_xattr', """Enable user_xattr support on MDS""", FLAG, 0),
     ('acl', """Enable ACL support on MDS""", FLAG, 0),
+    ('quota', "Enable quota support for client file system", PARAM), 
     ]
 
 def main():
@@ -2775,7 +2845,7 @@ if __name__ == "__main__":
         rc = e.rc
         if rc == 0:
             rc = 1
-        sys.exit(e.rc)
+        sys.exit(rc)
 
     if first_cleanup_error:
         sys.exit(first_cleanup_error)
index 34d691e..63e015f 100644 (file)
@@ -120,6 +120,8 @@ command_t cmdlist[] = {
          "Omitting the count means indefinitely, 0 means restore, "
          "otherwise fail 'count' messages.\n"
          "usage: fail nid|_all_ [count]"},
+        {"ping", jt_ptl_ping, 0, "Check LNET connectivity\n"
+         "usage: ping nid [timeout] [pid]"},
 
         /* Device selection commands */
         {"=== device selection ===", jt_noop, 0, "device selection"},
index 65cf9bd..49fc5ca 100644 (file)
@@ -55,6 +55,7 @@ static int lfs_setstripe(int argc, char **argv);
 static int lfs_find(int argc, char **argv);
 static int lfs_getstripe(int argc, char **argv);
 static int lfs_osts(int argc, char **argv);
+static int lfs_df(int argc, char **argv);
 static int lfs_check(int argc, char **argv);
 static int lfs_catinfo(int argc, char **argv);
 #ifdef HAVE_QUOTA_SUPPORT
@@ -99,6 +100,10 @@ command_t cmdlist[] = {
          "join two lustre files into one - join A, B, will be like cat B >> A & del B\n"
          "usage: join <filename_A> <filename_B>\n"},
         {"osts", lfs_osts, 0, "osts"},
+        {"df", lfs_df, 0,
+         "report filesystem disk space usage or inodes usage"
+         "of each MDS/OSD.\n"
+         "Usage: df [-i] [-h] [path]"},
 #ifdef HAVE_QUOTA_SUPPORT
         {"quotachown",lfs_quotachown, 0,
          "Change files' owner or group on the specified filesystem.\n"
@@ -312,7 +317,7 @@ static int lfs_osts(int argc, char **argv)
         fp = setmntent(MOUNTED, "r");
 
         if (fp == NULL) {
-                 fprintf(stderr, "setmntent(%s): %s:", MOUNTED,
+                 fprintf(stderr, "%s: setmntent(%s): %s:", argv[0], MOUNTED,
                         strerror (errno));
         } else {
                 mnt = getmntent(fp);
@@ -321,8 +326,323 @@ static int lfs_osts(int argc, char **argv)
                                 rc = llapi_find(mnt->mnt_dir, obduuid, 0, 0, 0);
                                 if (rc)
                                         fprintf(stderr,
-                                               "error: lfs osts failed on %s\n",
-                                               mnt->mnt_dir);
+                                               "error: %s: failed on %s\n",
+                                               argv[0], mnt->mnt_dir);
+                        }
+                        mnt = getmntent(fp);
+                }
+                endmntent(fp);
+        }
+
+        return rc;
+}
+
+#define COOK(value)                                                     \
+({                                                                      \
+        int radix = 0;                                                  \
+        while (value > 1024) {                                          \
+                value /= 1024;                                          \
+                radix++;                                                \
+        }                                                               \
+        radix;                                                          \
+})
+#define UUF     "%-20s"
+#define CSF     "%9s"
+#define CDF     "%9llu"
+#define HSF     "%8s"
+#define HDF     "%6.1f"
+#define RSF     "%5s"
+#define RDF     "%5d"
+
+static int path2mnt(char *path, FILE *fp, char *mntdir, int dir_len)
+{
+        char rpath[PATH_MAX] = {'\0'};
+        struct mntent *mnt;
+        int rc, len, out_len = 0;
+
+        if (!realpath(path, rpath)) {
+                rc = -errno;
+                fprintf(stderr, "error: lfs df: invalid path '%s': %s\n",
+                        path, strerror(-rc));
+                return rc;
+        }
+
+        len = 0;
+        mnt = getmntent(fp);
+        while (feof(fp) == 0 && ferror(fp) == 0) {
+                if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+                        len = strlen(mnt->mnt_dir);
+                        if (len > out_len &&
+                            !strncmp(rpath, mnt->mnt_dir, len)) {
+                                out_len = len;
+                                memset(mntdir, 0, dir_len);
+                                strncpy(mntdir, mnt->mnt_dir, dir_len);
+                        }
+                }
+                mnt = getmntent(fp);
+        }
+
+        if (out_len > 0)
+                return 0;
+        
+        fprintf(stderr, "error: lfs df: %s isn't mounted on lustre\n", path);
+        return -EINVAL;
+}
+
+static int showdf(char *mntdir, struct obd_statfs *stat,
+                  struct obd_uuid *uuid, int ishow, int cooked,
+                  char *type, int index, int rc)
+{
+        __u64 avail, used, total;
+        double ratio = 0;
+        int obd_type;
+        char *suffix = "KMGTPEZY";
+        char tbuf[10], ubuf[10], abuf[10], rbuf[10];
+
+        if (!uuid || !stat || !type)
+                return -EINVAL;
+        if (!strncmp(type, "MDT", 3)) {
+                obd_type = 0;
+        } else if(!strncmp(type, "OST", 3)){
+                obd_type = 1;
+        } else {
+                fprintf(stderr, "error: lfs df: invalid type '%s'\n", type);
+                return -EINVAL;
+        }
+
+        if (rc == 0) {
+                if (ishow) {
+                        avail = stat->os_ffree;
+                        used = stat->os_files - stat->os_ffree;
+                        total = stat->os_files;
+                } else {
+                        avail = stat->os_bavail * stat->os_bsize / 1024;
+                        used = stat->os_blocks - stat->os_bavail;
+                        used = used * stat->os_bsize / 1024;
+                        total = stat->os_blocks * stat->os_bsize / 1024;
+                }
+
+                if (total > 0)
+                        ratio = (double)used / (double)total;
+
+                if (cooked) {
+                        int i;
+                        double total_d, used_d, avail_d;
+                        
+                        total_d = (double)total;
+                        i = COOK(total_d);
+                        if (i > 0)
+                                sprintf(tbuf, HDF"%c", total_d, suffix[i - 1]);
+                        else
+                                sprintf(tbuf, CDF, total);
+
+                        used_d = (double)used;
+                        i = COOK(used_d);
+                        if (i > 0)
+                                sprintf(ubuf, HDF"%c", used_d, suffix[i - 1]);
+                        else
+                                sprintf(ubuf, CDF, used);
+
+                        avail_d = (double)avail;
+                        i = COOK(avail_d);
+                        if (i > 0)
+                                sprintf(abuf, HDF"%c", avail_d, suffix[i - 1]);
+                        else
+                                sprintf(abuf, CDF, avail);
+                } else {
+                        sprintf(tbuf, CDF, total);
+                        sprintf(ubuf, CDF, used);
+                        sprintf(abuf, CDF, avail);
+                }
+
+                sprintf(rbuf, RDF, (int)(ratio * 100));
+                if (obd_type == 0)
+                        printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[MDT:%d]\n",
+                               (char *)uuid, tbuf, ubuf, abuf, rbuf,
+                               mntdir, index);
+                else
+                        printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[OST:%d]\n",
+                               (char *)uuid, tbuf, ubuf, abuf, rbuf,
+                               mntdir, index);
+
+                return 0;
+        }
+        switch (rc) {
+        case -ENODATA:
+                printf(UUF": inactive OST\n", (char *)uuid);
+                break;
+        default:
+                printf(UUF": %s\n", (char *)uuid, strerror(-rc));
+                break;
+        }
+
+        return 0;
+}
+
+static int mntdf(char *mntdir, int ishow, int cooked)
+{
+        struct obd_statfs stat_buf;
+        struct obd_uuid uuid_buf;
+        __u32 index;
+        __u64 avail_sum, used_sum, total_sum;
+        char tbuf[10], ubuf[10], abuf[10], rbuf[10];        
+        double ratio_sum;
+        int rc;
+
+        if (ishow)
+                printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
+                       "UUID", "Inodes", "IUsed", "IFree",
+                       "IUse%", "Mounted on");
+        else
+                printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
+                       "UUID", "1K-blocks", "Used", "Available",
+                       "Use%", "Mounted on");
+
+        avail_sum = total_sum = 0; 
+        for (index = 0; ; index++) {
+                memset(&stat_buf, 0, sizeof(struct obd_statfs));
+                memset(&uuid_buf, 0, sizeof(struct obd_uuid));
+                rc = llapi_obd_statfs(mntdir, LL_STATFS_MDC, index,
+                                      &stat_buf, &uuid_buf);
+                if (rc == -ENODEV)
+                        break;
+
+                if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
+                    rc == -ENODATA || rc == 0) {
+                        showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked,
+                               "MDT", index, rc);
+                } else {
+                        fprintf(stderr,
+                                "error: llapi_obd_statfs(%s): %s (%d)\n",
+                                uuid_buf.uuid, strerror(-rc), rc);
+                        return rc;
+                }
+                if (!rc && ishow) {
+                        avail_sum += stat_buf.os_ffree;
+                        total_sum += stat_buf.os_files;
+                }
+        }
+
+        for (index = 0;;index++) {
+                memset(&stat_buf, 0, sizeof(struct obd_statfs));
+                memset(&uuid_buf, 0, sizeof(struct obd_uuid));
+                rc = llapi_obd_statfs(mntdir, LL_STATFS_LOV, index,
+                                      &stat_buf, &uuid_buf);
+                if (rc == -ENODEV)
+                        break;
+
+                if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
+                    rc == -ENODATA || rc == 0) {
+                        showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked,
+                               "OST", index, rc);
+                } else {
+                        fprintf(stderr,
+                                "error: llapi_obd_statfs failed: %s (%d)\n",
+                                strerror(-rc), rc);
+                        return rc;
+                }
+                if (!rc && !ishow) {
+                        __u64 avail, total;
+                        avail = stat_buf.os_bavail * stat_buf.os_bsize;
+                        avail /= 1024;
+                        total = stat_buf.os_blocks * stat_buf.os_bsize;
+                        total /= 1024;
+                        
+                        avail_sum += avail;
+                        total_sum += total;
+                }
+        }
+
+        used_sum = total_sum - avail_sum;
+        ratio_sum = (double)(total_sum - avail_sum) / (double)total_sum;
+        sprintf(rbuf, RDF, (int)(ratio_sum * 100));
+        if (cooked) {
+                int i;
+                char *suffix = "KMGTPEZY";
+                double total_sum_d, used_sum_d, avail_sum_d;
+
+                total_sum_d = (double)total_sum;
+                i = COOK(total_sum_d);
+                if (i > 0)
+                        sprintf(tbuf, HDF"%c", total_sum_d, suffix[i - 1]);
+                else
+                        sprintf(tbuf, CDF, total_sum);
+                
+                used_sum_d = (double)used_sum;
+                i = COOK(used_sum_d);
+                if (i > 0)
+                        sprintf(ubuf, HDF"%c", used_sum_d, suffix[i - 1]);
+                else
+                        sprintf(ubuf, CDF, used_sum);
+                        
+                avail_sum_d = (double)avail_sum;
+                i = COOK(avail_sum_d);
+                if (i > 0)
+                        sprintf(abuf, HDF"%c", avail_sum_d, suffix[i - 1]);
+                else
+                        sprintf(abuf, CDF, avail_sum);
+        } else {
+                sprintf(tbuf, CDF, total_sum);
+                sprintf(ubuf, CDF, used_sum);
+                sprintf(abuf, CDF, avail_sum);
+        }
+       
+        printf("\n"UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
+               "filesystem summary:", tbuf, ubuf, abuf, rbuf, mntdir);
+
+        return 0;
+}
+
+static int lfs_df(int argc, char **argv)
+{
+        FILE *fp;
+        char *path = NULL;
+        struct mntent *mnt = NULL;
+        char mntdir[PATH_MAX] = {'\0'};
+        int ishow = 0, cooked = 0;
+        int c, rc = 0;
+
+        optind = 0;
+        while ((c = getopt(argc, argv, "ih")) != -1) {
+                switch (c) {
+                case 'i':
+                        ishow = 1;
+                        break;
+                case 'h':
+                        cooked = 1;
+                        break;
+                default:
+                        return CMD_HELP;
+                }
+        }
+        if (optind < argc )
+                path = argv[optind];
+
+        fp = setmntent(MOUNTED, "r");
+        if (fp == NULL) {
+                rc = -errno;
+                fprintf(stderr, "error: %s: open %s failed( %s )\n",
+                        argv[0], MOUNTED, strerror(errno));
+                return rc;
+        }
+        if (path) {
+                rc = path2mnt(path, fp, mntdir, sizeof(mntdir));
+                if (rc) {
+                        endmntent(fp);
+                        return rc;
+                }
+
+                rc = mntdf(mntdir, ishow, cooked);
+                printf("\n");
+                endmntent(fp);
+        } else {
+                mnt = getmntent(fp);
+                while (feof(fp) == 0 && ferror(fp) == 0) {
+                        if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+                                rc = mntdf(mnt->mnt_dir, ishow, cooked);
+                                if (rc)
+                                        break;
+                                printf("\n");
                         }
                         mnt = getmntent(fp);
                 }
@@ -520,6 +840,8 @@ static int lfs_quotacheck(int argc, char **argv)
 
         if (check_type)
                 check_type--;
+        else /* check both user & group quota by default */
+                check_type = 0x02;
 
         if (argc == optind)
                 return CMD_HELP;
@@ -530,7 +852,11 @@ static int lfs_quotacheck(int argc, char **argv)
         qctl.qc_cmd = LUSTRE_Q_QUOTAOFF;
         qctl.qc_id = QFMT_LDISKFS;
         qctl.qc_type = check_type;
-        llapi_quotactl(mnt, &qctl);
+        rc = llapi_quotactl(mnt, &qctl);
+        if (rc) {
+                fprintf(stderr, "quota off failed: %s\n", strerror(errno));
+                return rc;
+        }
 
         rc = llapi_quotacheck(mnt, check_type);
         if (rc) {
@@ -903,20 +1229,20 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int ost_only)
                                 printf("%s\n%15s", mnt, "");
                         else
                                 printf("%15s", mnt);
-                        
+
                         if (bover)
                                 diff2str(dqb->dqb_btime, timebuf, now);
-                        
+
                         sprintf(numbuf[0], "%llu", toqb(dqb->dqb_curspace));
                         sprintf(numbuf[1], "%llu", dqb->dqb_bsoftlimit);
                         sprintf(numbuf[2], "%llu", dqb->dqb_bhardlimit);
                         printf(" %7s%c %6s %7s %7s",
                                numbuf[0], bover ? '*' : ' ', numbuf[1],
                                numbuf[2], bover > 1 ? timebuf : "");
-                        
+
                         if (iover)
                                 diff2str(dqb->dqb_itime, timebuf, now);
-                        
+
                         sprintf(numbuf[0], "%llu", dqb->dqb_curinodes);
                         sprintf(numbuf[1], "%llu", dqb->dqb_isoftlimit);
                         sprintf(numbuf[2], "%llu", dqb->dqb_ihardlimit);
@@ -976,6 +1302,8 @@ static void print_lov_quota(char *mnt, struct if_quotactl *qctl)
         for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
                 memcpy(&qctl->obd_uuid, uuidp, sizeof(*uuidp));
 
+                /* XXX clear this flag to get quota from osts */
+                qctl->qc_dqblk.dqb_valid = 0;
                 rc = llapi_quotactl(mnt, qctl);
                 if (rc) {
                         fprintf(stderr, "%s quotactl failed: %s\n",
index 2f68644..5c4c68a 100644 (file)
@@ -99,8 +99,8 @@ int llapi_file_create(char *name, long stripe_size, int stripe_offset,
         }
         if (stripe_size < 0 || (stripe_size & (LOV_MIN_STRIPE_SIZE - 1))) {
                 errno = rc = -EINVAL;
-                err_msg("error: stripe_size must be an even "
-                        "multiple of %d bytes", page_size);
+                err_msg("error: bad stripe_size %lu, must be an even "
+                        "multiple of %d bytes", stripe_size, page_size);
                 goto out;
         }
         if (stripe_offset < -1 || stripe_offset > 2048) {
@@ -174,7 +174,7 @@ static int prepare_find(struct find_param *param)
 {
         param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT);
         if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
-                err_msg("unable to allocate %d bytes of memory for ioctl",
+                err_msg("error: allocation of %d bytes for ioctl",
                         sizeof(lstat_t) + param->lumlen);
                 return ENOMEM;
         }
@@ -209,7 +209,7 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count)
 
         obdgens = malloc(size_round(max_ost_count * sizeof(*obdgens)));
         if (!obdgens) {
-                err_msg("no memory for %d generation #'s", max_ost_count);
+                err_msg("error: %d generation #'s", max_ost_count);
                 return(-ENOMEM);
         }
 
@@ -223,21 +223,22 @@ int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count)
         desc.ld_tgt_count = max_ost_count;
 
         if (obd_ioctl_pack(&data, &buf, OBD_MAX_IOCTL_BUFFER)) {
-                fprintf(stderr, "internal buffer error packing\n");
+                fprintf(stderr, "error: %s: internal packing error\n",
+                        __FUNCTION__);
                 rc = EINVAL;
                 goto out;
         }
 
         rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
         if (rc) {
-                err_msg("error getting LOV config");
+                err_msg("error: %s: getting LOV config", __FUNCTION__);
                 rc = errno;
                 goto out;
         }
 
         if (obd_ioctl_unpack(&data, buf, OBD_MAX_IOCTL_BUFFER)) {
-                fprintf(stderr, "invalid reply from ioctl");
-                rc = EINVAL;
+                rc = errno = EINVAL;
+                err_msg("error: %s: internal ioctl unpack", __FUNCTION__);
                 goto out;
         }
 
@@ -261,8 +262,12 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
         /* Get the lov name */
         rc = ioctl(dirfd(dir), OBD_IOC_GETNAME, (void *)uuid);
         if (rc) {
-                fprintf(stderr, "error: can't get lov name: %s\n",
-                        strerror(rc = errno));
+                rc = errno;
+                if (rc == -ENOTTY)
+                        fprintf(stderr, "error: %s does not appear to be in "
+                                "a Lustre filesystem\n", dname);
+                else
+                        err_msg("error: can't get lov name: %s");
                 return rc;
         }
 
@@ -271,8 +276,7 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
                  uuid);
         fp = fopen(buf, "r");
         if (fp == NULL) {
-                fprintf(stderr, "error: %s opening %s\n",
-                        strerror(rc = errno), buf);
+                err_msg("error: opening '%s'", buf);
                 return rc;
         }
 
@@ -298,8 +302,9 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
         fclose(fp);
 
         if (param->obduuid && (param->obdindex == OBD_NOT_FOUND)) {
-                printf("unknown obduuid: %s\n", param->obduuid->uuid);
-                rc =  EINVAL;
+                fprintf(stderr, "error: %s: unknown obduuid: %s\n",
+                        __FUNCTION__, param->obduuid->uuid);
+                rc = EINVAL;
         }
 
         return (rc);
@@ -505,17 +510,19 @@ static int find_process_file(DIR *dir, char *dname, char *fname,
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
-                                fprintf(stderr,
-                                        "%s/%s has no stripe info\n",
+                                fprintf(stderr, "%s/%s has no stripe info\n",
                                         dname, fname);
                         rc = 0;
-                } else if (errno == EISDIR) {
-                        fprintf(stderr, "process_file on directory %s/%s!\n",
+                } else if (errno == ENOTTY) {
+                        fprintf(stderr, "error: %s/%s is not a Lustre fs?\n",
                                 dname, fname);
+                } else if (errno == EISDIR) {
+                        err_msg("error: %s: directory %s/%s",
+                                __FUNCTION__, dname, fname);
                         /* add fname to directory list; */
                         rc = errno;
                 } else {
-                        err_msg("IOC_MDC_GETSTRIPE ioctl failed for '%s/%s'",
+                        err_msg("error: IOC_MDC_GETSTRIPE failed for '%s/%s'",
                                 dname, fname);
                         rc = errno;
                 }
@@ -567,10 +574,14 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param)
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && param->verbose)
-                                printf("%s/%s has no stripe info\n", dname, "");
+                                printf("%s has no stripe info\n", dname);
                         rc = 0;
+                } else if (errno == ENOTTY) {
+                        fprintf(stderr, "error: %s: %s not on a Lustre fs?\n",
+                                __FUNCTION__, dname);
                 } else {
-                        err_msg("GETSTRIPE failed for %s", dname);
+                        err_msg("error: %s: LL_IOC_LOV_GETSTRIPE failed for %s",
+                                __FUNCTION__, dname);
                 }
         } else {
                llapi_lov_dump_user_lmm(param, dname, "");
@@ -586,8 +597,8 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param)
 
                 switch (dirp->d_type) {
                 case DT_UNKNOWN:
-                        err_msg("\"%s\" is UNKNOWN type %d", dirp->d_name,
-                                dirp->d_type);
+                        fprintf(stderr, "error: %s: '%s' is UNKNOWN type %d",
+                                __FUNCTION__, dirp->d_name, dirp->d_type);
                         /* If we cared we could stat the file to determine
                          * type and continue on here, but we don't since we
                          * know d_type should be valid for lustre and this
@@ -601,7 +612,8 @@ static int process_dir(DIR *dir, char *dname, struct find_param *param)
                         strcat(path, dirp->d_name);
                         subdir = opendir(path);
                         if (subdir == NULL) {
-                                err_msg("\"%.40s\" opendir failed", path);
+                                err_msg("error: %s: opendir '%.40s'",
+                                        __FUNCTION__, path);
                                 return errno;
                         }
                         rc = process_dir(subdir, path, param);
@@ -627,10 +639,12 @@ static int process_path(char *path, struct find_param *param)
         fname = strrchr(path, '/');
         if (fname != NULL && fname[1] == '\0') {
                 /* Trailing '/', it must be a dir */
-                *fname = '\0';
+                if (strlen(path) > 1)
+                        *fname = '\0';
+
                 dir = opendir(path);
                 if (dir == NULL) {
-                        err_msg("\"%.40s\" opendir failed", path);
+                        err_msg("error: %s: '%.40s' opendir",__FUNCTION__,path);
                         rc = errno;
                 } else {
                         rc = process_dir(dir, path, param);
@@ -649,10 +663,13 @@ static int process_path(char *path, struct find_param *param)
                         *fname = '\0';
                         fname++;
                         dname = path;
+                        if (dname[0] == '\0')
+                                dname = "/";
                 }
                 dir = opendir(dname);
                 if (dir == NULL) {
-                        err_msg("\"%.40s\" opendir failed", dname);
+                        err_msg("error: %s: '%.40s' open failed",
+                                __FUNCTION__, dname);
                         rc = errno;
                 } else {
                         if (!param->got_uuids)
@@ -696,6 +713,47 @@ out:
         return ret;
 }
 
+int llapi_obd_statfs(char *path, __u32 type, __u32 index,
+                     struct obd_statfs *stat_buf,
+                     struct obd_uuid *uuid_buf)
+{
+        int fd;
+        char raw[OBD_MAX_IOCTL_BUFFER] = {'\0'};
+        char *rawbuf = raw;
+        struct obd_ioctl_data data;
+        int rc = 0;
+
+        data.ioc_inlbuf1 = (char *)&type;
+        data.ioc_inllen1 = sizeof(__u32);
+        data.ioc_inlbuf2 = (char *)&index;
+        data.ioc_inllen2 = sizeof(__u32);
+        data.ioc_pbuf1 = (char *)stat_buf;
+        data.ioc_plen1 = sizeof(struct obd_statfs);
+        data.ioc_pbuf2 = (char *)uuid_buf;
+        data.ioc_plen2 = sizeof(struct obd_uuid);
+
+        if (obd_ioctl_pack(&data, &rawbuf, sizeof(raw))) {
+                fprintf(stderr, "llapi_obd_statfs: error packing ioctl data\n");
+                return rc;
+        }
+
+        fd = open(path, O_RDONLY);
+        if (errno == EISDIR)
+                fd = open(path, O_DIRECTORY | O_RDONLY);
+
+        if (fd < 0) {
+                rc = -errno;
+                err_msg("error: %s: opening '%s'", __FUNCTION__, path);
+                return rc;
+        }
+        rc = ioctl(fd, LL_IOC_OBD_STATFS, (void *)rawbuf);
+        if (rc)
+                rc = -errno;
+
+        close(fd);
+        return rc;
+}
+
 #define MAX_STRING_SIZE 128
 #define DEVICES_LIST "/proc/fs/lustre/devices"
 
@@ -777,8 +835,7 @@ static void do_target_check(char *obd_type_name, char *obd_name,
 
         rc = llapi_ping(obd_type_name, obd_name);
         if (rc) {
-                fprintf(stderr, "error: check %s: %s\n",
-                        obd_name, strerror(rc = errno));
+                err_msg("error: check '%s'", obd_name);
         } else {
                 printf("%s active.\n", obd_name);
         }
@@ -928,8 +985,8 @@ static int quotachown_process_file(DIR *dir, char *dname, char *fname,
          * invoke syscall directly. */
         rc = syscall(SYS_chown, pathname, st->st_uid, st->st_gid);
         if (rc)
-                fprintf(stderr, "chown %s (%u,%u) fail: %s\n",
-                        pathname, st->st_uid, st->st_gid, strerror(errno));
+                err_msg("error: chown %s (%u,%u)",
+                        pathname, st->st_uid, st->st_gid);
         return rc;
 }
 
index ae4beb3..fc75f21 100644 (file)
@@ -131,16 +131,16 @@ init_options(struct lustre_mount_data *lmd)
 }
 
 int
-print_options(struct lustre_mount_data *lmd, const char *options)
+print_options(FILE *out, struct lustre_mount_data *lmd, const char *options)
 {
         int i;
         for (i = 0; i < lmd->lmd_nid_count; i++) {
-                printf("mds nid %d:       %s\n", i, 
-                       libcfs_nid2str(lmd->lmd_nid[i]));
+                fprintf(out, "mds nid %d:       %s\n", i,
+                        libcfs_nid2str(lmd->lmd_nid[i]));
         }
-        printf("mds name:        %s\n", lmd->lmd_mds);
-        printf("profile:         %s\n", lmd->lmd_profile);
-        printf("options:         %s\n", options);
+        fprintf(out, "mds name:        %s\n", lmd->lmd_mds);
+        fprintf(out, "profile:         %s\n", lmd->lmd_profile);
+        fprintf(out, "options:         %s\n", options);
 
         return 0;
 }
@@ -150,17 +150,18 @@ static int parse_nids(struct lustre_mount_data *lmd, char *nids)
         int i = 0;
         char *tmp = 0;
         lnet_nid_t nid;
-        
+
         while ((tmp = strsep(&nids, ",:"))) {
                 nid = libcfs_str2nid(tmp);
                 if (nid == LNET_NID_ANY) {
-                        fprintf(stderr, "%s: Can't parse NID '%s'\n", 
+                        fprintf(stderr, "%s: Can't parse NID '%s'\n",
                                 progname, tmp);
                         continue;
                 }
                 lmd->lmd_nid[lmd->lmd_nid_count++] = nid;
                 if (lmd->lmd_nid_count >= MAX_FAILOVER_NIDS) {
-                        fprintf(stderr, "%s: Too many: ignoring nids after %s\n", 
+                        fprintf(stderr, "%s: Too many target NIDs: "
+                                "ignoring nids after %s\n",
                                 progname, tmp);
                         break;
                 }
@@ -417,7 +418,7 @@ int main(int argc, char *const argv[])
         }
 
         if (verbose)
-                print_options(&lmd, options);
+                print_options(stdout, &lmd, options);
 
         rc = access(target, F_OK);
         if (rc) {
@@ -432,6 +433,7 @@ int main(int argc, char *const argv[])
         if (rc) {
                 fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname,
                         source, target, strerror(errno));
+                print_options(stderr, &lmd, options);
                 if (errno == ENODEV)
                         fprintf(stderr, "Are the lustre modules loaded?\n"
                              "Check /etc/modules.conf and /proc/filesystems\n");
index 6e29ae8..ae2fa71 100644 (file)
@@ -216,6 +216,8 @@ static void print_1_cfg(struct lustre_cfg *lcfg)
         if (lcfg->lcfg_nid)
                 printf("nid=%s("LPX64")  ", libcfs_nid2str(lcfg->lcfg_nid),
                        lcfg->lcfg_nid);
+        if (lcfg->lcfg_nal)
+                printf("nal=%d ", lcfg->lcfg_nal);
         for (i = 0; i <  lcfg->lcfg_bufcount; i++)
                 printf("%d:%.*s  ", i, lcfg->lcfg_buflens[i], 
                        (char*)lustre_cfg_buf(lcfg, i));
index d584d29..fb80016 100755 (executable)
@@ -201,19 +201,20 @@ lmc_options = [
     ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""),
     ('nspath', "Local mount point of server namespace.", PARAM,""),
     ('format', ""),
-    ('quota', "quotaon:enable quota, only u|g|ug is supported now. \
-               iunit: the unit for slave to acquire/release inode quota from/to masteri.\
-                      Int type (>0), default value in Lustre is 5000 inodes.\
-               bunit: the unit for slave to acquire/release block quota from/to master.\
-                      Mbytes (>0), default value in Lustre is 100(Mbytes).\
-               itune: used to tune the threthold. When inode quota usage reach the threthold,\
-                      slave should acquire/release inode quota from/to master.\
-                      Int type (100 > btune > 0), default value in Lustre is 50 (percentge).\
-                      inode threthold = iunit * itune / 100.\
-               btune: used to tune the threthold. When block quota usage reach the threthold,\
-                      slave should acquire/release block quota from/to master.\
-                      Int type (100 > btune > 0), default value in Lustre is 50 (percentage).\
-                      block threthold = bunit * btune / 100.", PARAM,""),
+    ('quota', """
+    quotaon: enable quota, only u|g|ug is supported now.
+      iunit: the unit for slave to acquire/release inode quota from/to master.
+             Int type (>0), default value in Lustre is 5000 inodes.
+      bunit: the unit for slave to acquire/release block quota from/to master.
+             Mbytes (>0), default value in Lustre is 100(Mbytes).
+      itune: used to tune the threthold. When inode quota usage reach the threthold,
+             slave should acquire/release inode quota from/to master.
+             Int type (100 > btune > 0), default value in Lustre is 50 (percentge).
+             inode threthold = iunit * itune / 100.
+      btune: used to tune the threthold. When block quota usage reach the threthold,
+             slave should acquire/release block quota from/to master.
+             Int type (100 > btune > 0), default value in Lustre is 50 (percentage).
+             block threthold = bunit * btune / 100.""", PARAM,""),
     # clients: mountpoint and echo
     ('echo_client', "", PARAM),
     ('path', "Specify the mountpoint for Lustre.", PARAM),
@@ -693,12 +694,12 @@ def add_net(gen, lustre, options):
     # testing network
     if options.nonet:
         if options.verbose:
-            print "Skip the remote host networking test."
-    elif (node_name != 'client') and (real_net_type == 'tcp'):
+            print "Skipping the remote host networking test."
+    elif (real_net_type == 'tcp') and (nid != '*'):
         if options.verbose:
             print "Testing network on", node_name
         target = string.split(nid,'@')[0]
-        out = runcmd("ping -c 1 -w 10 %s" %target)
+        out = runcmd("ping -c 1 -w 5 %s" %target)
         if out != 0:
             print "Could not connect to", node_name,", Please check network."
             
index 4acb3c6..2a50cf2 100644 (file)
@@ -256,6 +256,9 @@ int do_add_uuid(char * func, char *uuid, lnet_nid_t nid)
 
         lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs);
         lcfg->lcfg_nid = nid;
+        /* Poison NAL -- pre 1.4.6 will LASSERT on 0 NAL, this way it 
+           doesn't work without crashing (bz 10130) */
+        lcfg->lcfg_nal = 0x5a;
 
 #if 0
         fprintf(stderr, "adding\tnid: %d\tuuid: %s\n",