Whamcloud - gitweb
LU-992 kernel: deprecate RHEL5 server support for master
authoryangsheng <ys@whamcloud.com>
Fri, 21 Dec 2012 04:46:29 +0000 (12:46 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 8 Jan 2013 06:20:18 +0000 (01:20 -0500)
Remove patches relate to RHEL5 server support.

Signed-off-by: yang sheng <ys@whamcloud.com>
Change-Id: I694c9bbe0b6713119501392540c9cf5c6f8e53f3
Reviewed-on: http://review.whamcloud.com/4865
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
60 files changed:
ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel54.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-disable-delalloc-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-dynlocks-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-dynlocks-common.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-failed-mount-b23368.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-force_over_128tb-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-inode-version-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-kill-dx_root.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-mballoc-group_check-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-misc-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-mmp-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-osd-iam-exports.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-osd-iop-common.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-pdir-fix.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-version-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-vmalloc-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel5.patch [deleted file]
ldiskfs/kernel_patches/patches/ext4_data_in_dirent.patch [deleted file]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series [deleted file]
lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel5.patch [deleted file]
lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/patches/export-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/patches/export_symbol_numa-2.6-fc5.patch [deleted file]
lustre/kernel_patches/patches/export_symbols-2.6.12.patch [deleted file]
lustre/kernel_patches/patches/fix-forever-in-do_get_write_access.patch [deleted file]
lustre/kernel_patches/patches/jbd-jcberr-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch [deleted file]
lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch [deleted file]
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch [deleted file]
lustre/kernel_patches/patches/jbd2_stats_proc_init-wrong-place.patch [deleted file]
lustre/kernel_patches/patches/lustre_iser_max_sectors_tuning_lustre2.0.patch [deleted file]
lustre/kernel_patches/patches/md-avoid-bug_on-when-bmc-overflow.patch [deleted file]
lustre/kernel_patches/patches/md-rebuild-policy.patch [deleted file]
lustre/kernel_patches/patches/mpt-fusion-max-sge.patch [deleted file]
lustre/kernel_patches/patches/prune-icache-use-trylock-rhel5.patch [deleted file]
lustre/kernel_patches/patches/quota-large-limits-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-configurable-cachesize-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-large-io-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-maxsectors-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-merge-ios-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-mmp-unplug-dev.patch [deleted file]
lustre/kernel_patches/patches/raid5-rebuild-corrupt-bug.patch [deleted file]
lustre/kernel_patches/patches/raid5-stats-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-stripe-by-stripe-handling-rhel5.patch [deleted file]
lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch [deleted file]
lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch [deleted file]
lustre/kernel_patches/patches/small-fixes-about-jbd.patch [deleted file]
lustre/kernel_patches/series/2.6-rhel5.series [deleted file]
lustre/kernel_patches/which_patch

diff --git a/ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel5.patch
deleted file mode 100644 (file)
index a89a0aa..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -185,6 +185,8 @@ void ext4_journal_abort_handle(const cha
-       jbd2_journal_abort_handle(handle);
- }
-+EXPORT_SYMBOL(ext4_journal_abort_handle);
-+
- /* Deal with the reporting of failure conditions on a filesystem such as
-  * inconsistencies detected or read IO failures.
-  *
-@@ -2459,6 +2461,8 @@ out_fail:
-       return ret;
- }
-+EXPORT_SYMBOL(ext4_force_commit);
-+
- /*
-  * Setup any per-fs journal parameters now.  We'll do this both on
-  * initial mount, once the journal has been initialised but before we've
-@@ -3504,6 +3508,12 @@ int ext4_map_inode_page(struct inode *in
-                       unsigned long *blocks, int *created, int create);
- EXPORT_SYMBOL(ext4_map_inode_page);
-+EXPORT_SYMBOL(ext4_xattr_get);
-+EXPORT_SYMBOL(ext4_xattr_set_handle);
-+EXPORT_SYMBOL(ext4_bread);
-+EXPORT_SYMBOL(ext4_journal_start_sb);
-+EXPORT_SYMBOL(__ext4_journal_stop);
-+
- MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
- MODULE_DESCRIPTION("Fourth Extended Filesystem with extents");
- MODULE_LICENSE("GPL");
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -1024,6 +1024,8 @@ extern unsigned long ext4_count_free_ino
-                                      ext4_group_t group,
-                                      struct ext4_group_desc *desc);
- extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
-+extern struct buffer_head *ext4_read_inode_bitmap(struct super_block *sb,
-+                                                ext4_group_t block_group);
- /* mballoc.c */
- extern long ext4_mb_stats;
-Index: linux-stage/fs/ext4/ialloc.c
-===================================================================
---- linux-stage.orig/fs/ext4/ialloc.c
-+++ linux-stage/fs/ext4/ialloc.c
-@@ -96,7 +96,7 @@ unsigned ext4_init_inode_bitmap(struct s
-  *
-  * Return buffer_head of bitmap on success or NULL.
-  */
--static struct buffer_head *
-+struct buffer_head *
- ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
- {
-       struct ext4_group_desc *desc;
-@@ -137,6 +137,7 @@ ext4_read_inode_bitmap(struct super_bloc
-       }
-       return bh;
- }
-+EXPORT_SYMBOL(ext4_read_inode_bitmap);
- /*
-  * NOTE! When we get the inode, we're the only people
-Index: linux-stage/fs/ext4/balloc.c
-===================================================================
---- linux-stage.orig/fs/ext4/balloc.c
-+++ linux-stage/fs/ext4/balloc.c
-@@ -236,6 +236,7 @@ struct ext4_group_desc * ext4_get_group_
-               *bh = sbi->s_group_desc[group_desc];
-       return desc;
- }
-+EXPORT_SYMBOL(ext4_get_group_desc);
- static int ext4_valid_block_bitmap(struct super_block *sb,
-                                       struct ext4_group_desc *desc,
diff --git a/ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel54.patch b/ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel54.patch
deleted file mode 100644 (file)
index c3b0ef8..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-diff -up a/fs/ext4/super.c b/s/ext4/super.c
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -706,9 +767,47 @@ static inline struct inode *dquot_to_ino
- static ssize_t ext4_quota_write(struct super_block *sb, int type,
-                               const char *data, size_t len, loff_t off);
-
-+static int ext4_dquot_initialize(struct inode *inode, int type)
-+{
-+      handle_t *handle;
-+      int ret, err;
-+
-+      /* We may create quota structure so we need to reserve enough blocks */
-+      handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+      ret = dquot_initialize(inode, type);
-+      err = ext4_journal_stop(handle);
-+      if (!ret)
-+              ret = err;
-+      return ret;
-+}
-+
-+static int ext4_dquot_drop(struct inode *inode)
-+{
-+      handle_t *handle;
-+      int ret, err;
-+
-+      /* We may delete quota structure so we need to reserve enough blocks */
-+      handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
-+      if (IS_ERR(handle)) {
-+              /*
-+               * We call dquot_drop() anyway to at least release references
-+               * to quota structures so that umount does not hang.
-+               */
-+              dquot_drop(inode);
-+              return PTR_ERR(handle);
-+      }
-+      ret = dquot_drop(inode);
-+      err = ext4_journal_stop(handle);
-+      if (!ret)
-+              ret = err;
-+      return ret;
-+}
-+
- static struct dquot_operations ext4_quota_operations = {
--      .initialize     = dquot_initialize,
--      .drop           = dquot_drop,
-+      .initialize     = ext4_dquot_initialize,
-+      .drop           = ext4_dquot_drop,
-       .alloc_space    = dquot_alloc_space,
-       .alloc_inode    = dquot_alloc_inode,
-       .free_space     = dquot_free_space,
diff --git a/ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel5.patch
deleted file mode 100644 (file)
index 6775a31..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/ext4/super.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/super.c
-+++ linux-2.6.18-128.1.6/fs/ext4/super.c
-@@ -70,6 +70,8 @@ struct page *ext4_zero_page;
- struct proc_dir_entry *proc_root_ext4;
-+static int bigendian_extents;
-+
- ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
-                              struct ext4_group_desc *bg)
- {
-@@ -1222,7 +1224,7 @@ enum {
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
-       Opt_inode_readahead_blks, Opt_journal_ioprio,
--      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_bigendian_extents,
- };
- static match_table_t tokens = {
-@@ -1284,6 +1286,7 @@ static match_table_t tokens = {
-       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
-       {Opt_auto_da_alloc, "auto_da_alloc"},
-       {Opt_noauto_da_alloc, "noauto_da_alloc"},
-+      {Opt_bigendian_extents, "bigendian_extents"},
-       {Opt_err, NULL},
- };
-@@ -1682,6 +1685,9 @@ clear_qf_name:
-                               return 0;
-                       sbi->s_stripe = option;
-                       break;
-+              case Opt_bigendian_extents:
-+                      bigendian_extents = 1;
-+                      break;
-               default:
-                       printk(KERN_ERR
-                              "EXT4-fs: Unrecognized mount option \"%s\" "
-@@ -2561,6 +2567,15 @@ static int ext4_fill_super(struct super_
-               goto failed_mount;
-       }
-+#ifdef __BIG_ENDIAN
-+      if (bigendian_extents == 0) {
-+              printk(KERN_ERR "EXT4-fs: extents feature is not guaranteed to "
-+                     "work on big-endian systems. Use \"bigendian_extents\" "
-+                     "mount option to override.\n");
-+              goto failed_mount;
-+      }
-+#endif
-+
-       bgl_lock_init(sbi->s_blockgroup_lock);
-       sbi->s_last_alloc_group = -1;
diff --git a/ldiskfs/kernel_patches/patches/ext4-disable-delalloc-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-disable-delalloc-rhel5.patch
deleted file mode 100644 (file)
index 912b52c..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-Disable the DELALLOC feature because it is not robust in ext4 versions < 2.6.31.
-
---
-diff -rupN linux-2.6.18-164.6.1_1/fs/ext4/super.c linux-2.6.18-164.6.1_2/fs/ext4/super.c
---- linux-2.6.18-164.6.1_1/fs/ext4/super.c     2010-08-05 13:44:07.000000000 +0530
-+++ linux-2.6.18-164.6.1_2/fs/ext4/super.c     2010-08-05 13:46:29.000000000 +0530
-@@ -2091,12 +2091,6 @@ static int ext4_fill_super(struct super_
-       set_opt(sbi->s_mount_opt, BARRIER);
--      /*
--       * enable delayed allocation by default
--       * Use -o nodelalloc to turn it off
--       */
--      set_opt(sbi->s_mount_opt, DELALLOC);
--
-       if (!parse_options((char *) data, sb, &journal_devnum,
-                          &journal_ioprio, NULL, 0))
-               goto failed_mount;
diff --git a/ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel5.patch
deleted file mode 100644 (file)
index 9a0d61a..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-Index: linux-stage/fs/ext4/xattr.c
-===================================================================
---- linux-stage.orig/fs/ext4/xattr.c
-+++ linux-stage/fs/ext4/xattr.c
-@@ -86,7 +86,8 @@
- # define ea_bdebug(f...)
- #endif
--static void ext4_xattr_cache_insert(struct buffer_head *);
-+static void ext4_xattr_cache_insert(struct super_block *,
-+                                  struct buffer_head *);
- static struct buffer_head *ext4_xattr_cache_find(struct inode *,
-                                                struct ext4_xattr_header *,
-                                                struct mb_cache_entry **);
-@@ -233,7 +234,7 @@ bad_block: ext4_error(inode->i_sb, __fun
-               error = -EIO;
-               goto cleanup;
-       }
--      ext4_xattr_cache_insert(bh);
-+      ext4_xattr_cache_insert(inode->i_sb, bh);
-       entry = BFIRST(bh);
-       error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
-       if (error == -EIO)
-@@ -375,7 +376,7 @@ ext4_xattr_block_list(struct inode *inod
-               error = -EIO;
-               goto cleanup;
-       }
--      ext4_xattr_cache_insert(bh);
-+      ext4_xattr_cache_insert(inode->i_sb, bh);
-       error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
- cleanup:
-@@ -472,7 +473,9 @@ ext4_xattr_release_block(handle_t *handl
-       struct mb_cache_entry *ce = NULL;
-       int error = 0;
--      ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
-+      if (!test_opt(inode->i_sb, NO_MBCACHE))
-+              ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev,
-+                                      bh->b_blocknr);
-       error = ext4_journal_get_write_access(handle, bh);
-       if (error)
-               goto out;
-@@ -700,8 +703,10 @@ ext4_xattr_block_set(handle_t *handle, s
-       if (i->value && i->value_len > sb->s_blocksize)
-               return -ENOSPC;
-       if (s->base) {
--              ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
--                                      bs->bh->b_blocknr);
-+              if (!test_opt(inode->i_sb, NO_MBCACHE))
-+                      ce = mb_cache_entry_get(ext4_xattr_cache,
-+                                              bs->bh->b_bdev,
-+                                              bs->bh->b_blocknr);
-               error = ext4_journal_get_write_access(handle, bs->bh);
-               if (error)
-                       goto cleanup;
-@@ -718,7 +723,7 @@ ext4_xattr_block_set(handle_t *handle, s
-                               if (!IS_LAST_ENTRY(s->first))
-                                       ext4_xattr_rehash(header(s->base),
-                                                         s->here);
--                              ext4_xattr_cache_insert(bs->bh);
-+                              ext4_xattr_cache_insert(sb, bs->bh);
-                       }
-                       unlock_buffer(bs->bh);
-                       if (error == -EIO)
-@@ -801,7 +806,8 @@ inserted:
-                               if (error)
-                                       goto cleanup_dquot;
-                       }
--                      mb_cache_entry_release(ce);
-+                      if (ce)
-+                              mb_cache_entry_release(ce);
-                       ce = NULL;
-               } else if (bs->bh && s->base == bs->bh->b_data) {
-                       /* We were modifying this block in-place. */
-@@ -845,7 +851,7 @@ getblk_failed:
-                       memcpy(new_bh->b_data, s->base, new_bh->b_size);
-                       set_buffer_uptodate(new_bh);
-                       unlock_buffer(new_bh);
--                      ext4_xattr_cache_insert(new_bh);
-+                      ext4_xattr_cache_insert(sb, new_bh);
-                       error = ext4_handle_dirty_metadata(handle,
-                                                          inode, new_bh);
-                       if (error)
-@@ -1404,12 +1410,15 @@ ext4_xattr_put_super(struct super_block 
-  * Returns 0, or a negative error number on failure.
-  */
- static void
--ext4_xattr_cache_insert(struct buffer_head *bh)
-+ext4_xattr_cache_insert(struct super_block *sb, struct buffer_head *bh)
- {
-       __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
-       struct mb_cache_entry *ce;
-       int error;
-+      if (test_opt(sb, NO_MBCACHE))
-+              return;
-+
-       ce = mb_cache_entry_alloc(ext4_xattr_cache);
-       if (!ce) {
-               ea_bdebug(bh, "out of memory");
-@@ -1483,6 +1492,8 @@ ext4_xattr_cache_find(struct inode *inod
-       __u32 hash = le32_to_cpu(header->h_hash);
-       struct mb_cache_entry *ce;
-+      if (test_opt(inode->i_sb, NO_MBCACHE))
-+              return NULL;
-       if (!header->h_hash)
-               return NULL;  /* never share */
-       ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -1481,6 +1481,7 @@ enum {
-
-       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_bigendian_extents,
-       Opt_force_over_128tb,
-+      Opt_no_mbcache,
- };
- static match_table_t tokens = {
-@@ -1554,6 +1555,7 @@ static match_table_t tokens = {
-       {Opt_noauto_da_alloc, "noauto_da_alloc"},
-       {Opt_bigendian_extents, "bigendian_extents"},
-       {Opt_force_over_128tb, "force_over_128tb"},
-+      {Opt_no_mbcache, "no_mbcache"},
-       {Opt_err, NULL},
- };
-@@ -2030,6 +2032,9 @@ set_qf_format:
-                       }
-                       clear_opt(sbi->s_mount_opt, EXTENTS);
-                       break;
-+              case Opt_no_mbcache:
-+                      set_opt(sbi->s_mount_opt, NO_MBCACHE);
-+                      break;
-               default:
-                       ext4_msg(sb, KERN_ERR,
-                              "Unrecognized mount option \"%s\" "
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -715,7 +715,8 @@ struct ext4_inode_info {
- /*
-  * Mount flags
-  */
--#define EXT4_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
-+#define EXT4_MOUNT_NO_MBCACHE           0x00001 /* Disable mbcache */
-+#define EXT4_MOUNT_OLDALLOC           0x00002 /* Don't use the new Orlov allocator */
- #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
- #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
- #define EXT4_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
diff --git a/ldiskfs/kernel_patches/patches/ext4-dynlocks-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-dynlocks-2.6-rhel5.patch
deleted file mode 100644 (file)
index cecbbb1..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-Index: linux-stage/fs/ext4/Makefile
-===================================================================
---- linux-stage.orig/fs/ext4/Makefile
-+++ linux-stage/fs/ext4/Makefile
-@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
- ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-               ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
--              mmp.o
-+              mmp.o dynlocks.o
- ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
- ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -4159,6 +4159,7 @@ static int __init init_ext4_fs(void)
-       err = init_inodecache();
-       if (err)
-               goto out1;
-+      dynlock_cache_init();
-       err = register_filesystem(&ext4_fs_type);
-       if (err)
-               goto out;
-@@ -4195,6 +4196,7 @@ static void __exit exit_ext4_fs(void)
-       unregister_filesystem(&ext4dev_fs_type);
- #endif
-       destroy_inodecache();
-+      dynlock_cache_exit();
-       exit_ext4_xattr();
-       exit_ext4_mballoc();
-       __free_page(ext4_zero_page);
diff --git a/ldiskfs/kernel_patches/patches/ext4-dynlocks-common.patch b/ldiskfs/kernel_patches/patches/ext4-dynlocks-common.patch
deleted file mode 100644 (file)
index b9dcbd9..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-diff -rupN linux-2.6.18-128.1.6_1/fs/ext4/dynlocks.c linux-2.6.18-128.1.6_2/fs/ext4/dynlocks.c
---- linux-2.6.18-128.1.6_1/fs/ext4/dynlocks.c  1970-01-01 05:30:00.000000000 +0530
-+++ linux-2.6.18-128.1.6_2/fs/ext4/dynlocks.c  2009-08-13 20:42:59.000000000 +0530
-@@ -0,0 +1,236 @@
-+/*
-+ * Dynamic Locks
-+ *
-+ * struct dynlock is lockspace
-+ * one may request lock (exclusive or shared) for some value
-+ * in that lockspace
-+ *
-+ */
-+
-+#include <linux/dynlocks.h>
-+#include <linux/module.h>
-+#include <linux/slab.h>
-+#include <linux/sched.h>
-+
-+#define DYNLOCK_HANDLE_MAGIC  0xd19a10c
-+#define DYNLOCK_HANDLE_DEAD   0xd1956ee
-+#define DYNLOCK_LIST_MAGIC    0x11ee91e6
-+
-+static kmem_cache_t * dynlock_cachep = NULL;
-+
-+struct dynlock_handle {
-+      unsigned                dh_magic;
-+      struct list_head        dh_list;
-+      unsigned long           dh_value;       /* lock value */
-+      int                     dh_refcount;    /* number of users */
-+      int                     dh_readers;
-+      int                     dh_writers;
-+      int                     dh_pid;         /* holder of the lock */
-+      wait_queue_head_t       dh_wait;
-+};
-+
-+int __init dynlock_cache_init(void)
-+{
-+      int rc = 0;
-+
-+      /* printk(KERN_INFO "init dynlocks cache\n"); */
-+      dynlock_cachep = kmem_cache_create("dynlock_cache",
-+                                       sizeof(struct dynlock_handle),
-+                                       0,
-+                                       SLAB_HWCACHE_ALIGN,
-+                                       NULL, NULL);
-+      if (dynlock_cachep == NULL) {
-+              printk(KERN_ERR "Not able to create dynlock cache");
-+              rc = -ENOMEM;
-+      }
-+      return rc;
-+}
-+
-+void dynlock_cache_exit(void)
-+{
-+      /* printk(KERN_INFO "exit dynlocks cache\n"); */
-+      kmem_cache_destroy(dynlock_cachep);
-+}
-+
-+/*
-+ * dynlock_init
-+ *
-+ * initialize lockspace
-+ *
-+ */
-+void dynlock_init(struct dynlock *dl)
-+{
-+      spin_lock_init(&dl->dl_list_lock);
-+      INIT_LIST_HEAD(&dl->dl_list);
-+      dl->dl_magic = DYNLOCK_LIST_MAGIC;
-+}
-+EXPORT_SYMBOL(dynlock_init);
-+
-+/*
-+ * dynlock_lock
-+ *
-+ * acquires lock (exclusive or shared) in specified lockspace
-+ * each lock in lockspace is allocated separately, so user have
-+ * to specify GFP flags.
-+ * routine returns pointer to lock. this pointer is intended to
-+ * be passed to dynlock_unlock
-+ *
-+ */
-+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value,
-+                                  enum dynlock_type lt, gfp_t gfp)
-+{
-+      struct dynlock_handle *nhl = NULL;
-+      struct dynlock_handle *hl;
-+
-+      BUG_ON(dl == NULL);
-+      BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC);
-+
-+repeat:
-+      /* find requested lock in lockspace */
-+      spin_lock(&dl->dl_list_lock);
-+      BUG_ON(dl->dl_list.next == NULL);
-+      BUG_ON(dl->dl_list.prev == NULL);
-+      list_for_each_entry(hl, &dl->dl_list, dh_list) {
-+              BUG_ON(hl->dh_list.next == NULL);
-+              BUG_ON(hl->dh_list.prev == NULL);
-+              BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
-+              if (hl->dh_value == value) {
-+                      /* lock is found */
-+                      if (nhl) {
-+                              /* someone else just allocated
-+                               * lock we didn't find and just created
-+                               * so, we drop our lock
-+                               */
-+                              kmem_cache_free(dynlock_cachep, nhl);
-+                              nhl = NULL;
-+                      }
-+                      hl->dh_refcount++;
-+                      goto found;
-+              }
-+      }
-+      /* lock not found */
-+      if (nhl) {
-+              /* we already have allocated lock. use it */
-+              hl = nhl;
-+              nhl = NULL;
-+              list_add(&hl->dh_list, &dl->dl_list);
-+              goto found;
-+      }
-+      spin_unlock(&dl->dl_list_lock);
-+      
-+      /* lock not found and we haven't allocated lock yet. allocate it */
-+      nhl = kmem_cache_alloc(dynlock_cachep, gfp);
-+      if (nhl == NULL)
-+              return NULL;
-+      nhl->dh_refcount = 1;
-+      nhl->dh_value = value;
-+      nhl->dh_readers = 0;
-+      nhl->dh_writers = 0;
-+      nhl->dh_magic = DYNLOCK_HANDLE_MAGIC;
-+      init_waitqueue_head(&nhl->dh_wait);
-+
-+      /* while lock is being allocated, someone else may allocate it
-+       * and put onto to list. check this situation
-+       */
-+      goto repeat;
-+
-+found:
-+      if (lt == DLT_WRITE) {
-+              /* exclusive lock: user don't want to share lock at all
-+               * NOTE: one process may take the same lock several times
-+               * this functionaly is useful for rename operations */
-+              while ((hl->dh_writers && hl->dh_pid != current->pid) ||
-+                              hl->dh_readers) {
-+                      spin_unlock(&dl->dl_list_lock);
-+                      wait_event(hl->dh_wait,
-+                              hl->dh_writers == 0 && hl->dh_readers == 0);
-+                      spin_lock(&dl->dl_list_lock);
-+              }
-+              hl->dh_writers++;
-+      } else {
-+              /* shared lock: user do not want to share lock with writer */
-+              while (hl->dh_writers) {
-+                      spin_unlock(&dl->dl_list_lock);
-+                      wait_event(hl->dh_wait, hl->dh_writers == 0);
-+                      spin_lock(&dl->dl_list_lock);
-+              }
-+              hl->dh_readers++;
-+      }
-+      hl->dh_pid = current->pid;
-+      spin_unlock(&dl->dl_list_lock);
-+
-+      return hl;
-+}
-+EXPORT_SYMBOL(dynlock_lock);
-+
-+
-+/*
-+ * dynlock_unlock
-+ *
-+ * user have to specify lockspace (dl) and pointer to lock structure
-+ * returned by dynlock_lock()
-+ *
-+ */
-+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl)
-+{
-+      int wakeup = 0;
-+      
-+      BUG_ON(dl == NULL);
-+      BUG_ON(hl == NULL);
-+      BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC);
-+
-+      if (hl->dh_magic != DYNLOCK_HANDLE_MAGIC)
-+              printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dh_magic);
-+
-+      BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
-+      BUG_ON(hl->dh_writers != 0 && current->pid != hl->dh_pid);
-+
-+      spin_lock(&dl->dl_list_lock);
-+      if (hl->dh_writers) {
-+              BUG_ON(hl->dh_readers != 0);
-+              hl->dh_writers--;
-+              if (hl->dh_writers == 0)
-+                      wakeup = 1;
-+      } else if (hl->dh_readers) {
-+              hl->dh_readers--;
-+              if (hl->dh_readers == 0)
-+                      wakeup = 1;
-+      } else {
-+              BUG();
-+      }
-+      if (wakeup) {
-+              hl->dh_pid = 0;
-+              wake_up(&hl->dh_wait);
-+      }
-+      if (--(hl->dh_refcount) == 0) {
-+              hl->dh_magic = DYNLOCK_HANDLE_DEAD;
-+              list_del(&hl->dh_list);
-+              kmem_cache_free(dynlock_cachep, hl);
-+      }
-+      spin_unlock(&dl->dl_list_lock);
-+}
-+EXPORT_SYMBOL(dynlock_unlock);
-+
-+int dynlock_is_locked(struct dynlock *dl, unsigned long value)
-+{
-+      struct dynlock_handle *hl;
-+      int result = 0;
-+
-+      /* find requested lock in lockspace */
-+      spin_lock(&dl->dl_list_lock);
-+      BUG_ON(dl->dl_list.next == NULL);
-+      BUG_ON(dl->dl_list.prev == NULL);
-+      list_for_each_entry(hl, &dl->dl_list, dh_list) {
-+              BUG_ON(hl->dh_list.next == NULL);
-+              BUG_ON(hl->dh_list.prev == NULL);
-+              BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
-+              if (hl->dh_value == value && hl->dh_pid == current->pid) {
-+                      /* lock is found */
-+                      result = 1;
-+                      break;
-+              }
-+      }
-+      spin_unlock(&dl->dl_list_lock);
-+      return result;
-+}
-+EXPORT_SYMBOL(dynlock_is_locked);
-diff -rupN linux-2.6.18-128.1.6_1/include/linux/dynlocks.h linux-2.6.18-128.1.6_2/include/linux/dynlocks.h
---- linux-2.6.18-128.1.6_1/include/linux/dynlocks.h    1970-01-01 05:30:00.000000000 +0530
-+++ linux-2.6.18-128.1.6_2/include/linux/dynlocks.h    2009-08-13 20:43:18.000000000 +0530
-@@ -0,0 +1,34 @@
-+#ifndef _LINUX_DYNLOCKS_H
-+#define _LINUX_DYNLOCKS_H
-+
-+#include <linux/list.h>
-+#include <linux/wait.h>
-+
-+struct dynlock_handle;
-+
-+/*
-+ * lock's namespace:
-+ *   - list of locks
-+ *   - lock to protect this list
-+ */
-+struct dynlock {
-+      unsigned                dl_magic;
-+      struct list_head        dl_list;
-+      spinlock_t              dl_list_lock;
-+};
-+
-+enum dynlock_type {
-+      DLT_WRITE,
-+      DLT_READ
-+};
-+
-+int dynlock_cache_init(void);
-+void dynlock_cache_exit(void);
-+void dynlock_init(struct dynlock *dl);
-+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value,
-+                                  enum dynlock_type lt, gfp_t gfp);
-+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock);
-+int dynlock_is_locked(struct dynlock *dl, unsigned long value);
-+
-+#endif
-+
diff --git a/ldiskfs/kernel_patches/patches/ext4-failed-mount-b23368.patch b/ldiskfs/kernel_patches/patches/ext4-failed-mount-b23368.patch
deleted file mode 100644 (file)
index e38f7c7..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -3427,7 +3427,6 @@ failed_mount:
-       brelse(bh);
- out_fail:
-       sb->s_fs_info = NULL;
--      kfree(sbi->s_blockgroup_lock);
-       kfree(sbi);
-       lock_kernel();
-       return ret;
diff --git a/ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel5.patch
deleted file mode 100644 (file)
index 47269d8..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-diff -rupN linux-2.6.18-164.6.1_1/fs/ext4/ext4.h linux-2.6.18-164.6.1_2/fs/ext4/ext4.h
---- linux-2.6.18-164.6.1_1/fs/ext4/ext4.h      2009-12-22 13:07:27.000000000 +0530
-+++ linux-2.6.18-164.6.1_2/fs/ext4/ext4.h      2009-12-22 13:10:18.000000000 +0530
-@@ -305,6 +305,7 @@ struct ext4_new_group_data {
- #define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
- #define EXT4_IOC_GROUP_ADD            _IOW('f', 8, struct ext4_new_group_input)
- #define EXT4_IOC_MIGRATE              _IO('f', 9)
-+#define EXT4_IOC_FIEMAP               _IOWR('f', 11, struct fiemap)
-  /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
-  /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
-diff -rupN linux-2.6.18-164.6.1_1/fs/ext4/ioctl.c linux-2.6.18-164.6.1_2/fs/ext4/ioctl.c
---- linux-2.6.18-164.6.1_1/fs/ext4/ioctl.c     2009-12-22 13:06:51.000000000 +0530
-+++ linux-2.6.18-164.6.1_2/fs/ext4/ioctl.c     2009-12-22 13:09:45.000000000 +0530
-@@ -17,6 +17,71 @@
- #include "ext4_jbd2.h"
- #include "ext4.h"
-+/* So that the fiemap access checks can't overflow on 32 bit machines. */
-+#define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
-+
-+static int fiemap_check_ranges(struct super_block *sb,
-+                             u64 start, u64 len, u64 *new_len)
-+{
-+      *new_len = len;
-+
-+      if (len == 0)
-+              return -EINVAL;
-+
-+      if (start > sb->s_maxbytes)
-+              return -EFBIG;
-+
-+      /*
-+       * Shrink request scope to what the fs can actually handle.
-+       */
-+      if ((len > sb->s_maxbytes) ||
-+          (sb->s_maxbytes - len) < start)
-+              *new_len = sb->s_maxbytes - start;
-+
-+      return 0;
-+}
-+
-+int ioctl_fiemap(struct inode *inode, struct file *filp, unsigned long arg)
-+{
-+      struct fiemap fiemap;
-+      u64 len;
-+      struct fiemap_extent_info fieinfo = {0, };
-+      struct super_block *sb = inode->i_sb;
-+      int error = 0;
-+
-+      if (copy_from_user(&fiemap, (struct fiemap __user *) arg,
-+                         sizeof(struct fiemap)))
-+               return -EFAULT;
-+
-+      if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
-+              return -EINVAL;
-+
-+      error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
-+                                  &len);
-+      if (error)
-+              return error;
-+
-+      fieinfo.fi_flags = fiemap.fm_flags;
-+      fieinfo.fi_extents_max = fiemap.fm_extent_count;
-+      fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
-+
-+      if (fiemap.fm_extent_count != 0 &&
-+          !access_ok(VERIFY_WRITE, (void *)arg,
-+                     offsetof(typeof(fiemap), fm_extents[fiemap.fm_extent_count])))
-+              return -EFAULT;
-+
-+      if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
-+              filemap_write_and_wait(inode->i_mapping);
-+
-+      error = ext4_fiemap(inode, &fieinfo, fiemap.fm_start, len);
-+      fiemap.fm_flags = fieinfo.fi_flags;
-+      fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
-+      if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
-+              error = -EFAULT;
-+
-+      return error;
-+}
-+
- long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
- {
-       struct inode *inode = filp->f_dentry->d_inode;
-@@ -249,6 +314,9 @@ flags_out:
-               mutex_unlock(&(inode->i_mutex));
-               return err;
-       }
-+      case EXT4_IOC_FIEMAP: {
-+              return ioctl_fiemap(inode, filp, arg);
-+      }
-       default:
-               return -ENOTTY;
diff --git a/ldiskfs/kernel_patches/patches/ext4-force_over_128tb-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-force_over_128tb-rhel5.patch
deleted file mode 100644 (file)
index 487b2cc..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-Index: linux-2.6.18-164.6.1/fs/ext4/super.c
-===================================================================
---- linux-2.6.18-164.6.1.orig/fs/ext4/super.c
-+++ linux-2.6.18-164.6.1/fs/ext4/super.c
-@@ -51,6 +51,8 @@
- struct proc_dir_entry *ext4_proc_root;
-+static int force_over_128tb;
-+
- static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
-                            unsigned long journal_devnum);
- static int ext4_commit_super(struct super_block *sb,
-@@ -1343,6 +1345,7 @@ enum {
-       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_inode_readahead_blks, Opt_journal_ioprio,
-       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_bigendian_extents,
-+      Opt_force_over_128tb,
- };
- static match_table_t tokens = {
-@@ -1410,6 +1413,7 @@ static match_table_t tokens = {
-       {Opt_auto_da_alloc, "auto_da_alloc"},
-       {Opt_noauto_da_alloc, "noauto_da_alloc"},
-       {Opt_bigendian_extents, "bigendian_extents"},
-+      {Opt_force_over_128tb, "force_over_128tb"},
-       {Opt_err, NULL},
- };
-@@ -1837,6 +1841,9 @@ set_qf_format:
-                       break;
-               case Opt_mballoc:
-                       break;
-+              case Opt_force_over_128tb:
-+                      force_over_128tb = 1;
-+                      break;
-               default:
-                       printk(KERN_ERR
-                              "EXT4-fs: Unrecognized mount option \"%s\" "
-@@ -2692,6 +2699,16 @@ static int ext4_fill_super(struct super_
-               goto failed_mount;
-       }
-+      if (ext4_blocks_count(es) > (8ULL << 32)) {
-+              if (force_over_128tb == 0) {
-+                      printk(KERN_ERR "EXT4-fs does not support filesystems "
-+                             "greater than 128TB and can cause data corruption."
-+                             "Use \"force_over_128tb\" mount option to override."
-+                             "\n");
-+                      goto failed_mount;
-+              }
-+      }
-+
-       if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
-               goto cantfind_ext4;
diff --git a/ldiskfs/kernel_patches/patches/ext4-inode-version-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-inode-version-rhel5.patch
deleted file mode 100644 (file)
index bc583d2..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/ext4/inode.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/inode.c
-+++ linux-2.6.18-128.1.6/fs/ext4/inode.c
-@@ -2850,11 +2850,11 @@ struct inode *ext4_iget(struct super_blo
-       EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
-       EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
--      inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
-+      ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version);
-       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
-               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
--                      inode->i_version |=
--                      (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
-+                      ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi))
-+                                                                       << 32;
-       }
-       if (S_ISREG(inode->i_mode)) {
-@@ -3043,16 +3043,11 @@ static int ext4_do_update_inode(handle_t
-       } else for (block = 0; block < EXT4_N_BLOCKS; block++)
-               raw_inode->i_block[block] = ei->i_data[block];
--      raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
-+      raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version);
-       if (ei->i_extra_isize) {
-               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
--                      /* in RHEL5 i_version is an unsigned long */
--#if BITS_PER_LONG == 64
--                      raw_inode->i_version_hi =
--                      cpu_to_le32(inode->i_version >> 32);
--#else
--                      raw_inode->i_version_hi = 0;
--#endif
-+                      raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version
-+                                                            >> 32);
-               raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
-       }
-Index: linux-2.6.18-128.1.6/fs/ext4/ext4.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h
-+++ linux-2.6.18-128.1.6/fs/ext4/ext4.h
-@@ -21,6 +21,8 @@
- #include <linux/seqlock.h>
- #include <linux/mutex.h>
-+#define HAVE_DISK_INODE_VERSION
-+
- /* data type for block offset of block group */
- typedef int ext4_grpblk_t;
-@@ -164,6 +166,8 @@ struct ext4_inode_info {
-        */
-       tid_t i_sync_tid;
-       tid_t i_datasync_tid;
-+
-+      __u64 i_fs_version;
- };
- /*
-Index: linux-2.6.18-128.1.6/fs/ext4/ialloc.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/ialloc.c
-+++ linux-2.6.18-128.1.6/fs/ext4/ialloc.c
-@@ -878,6 +878,7 @@ got:
-       ei->i_block_alloc_info = NULL;
-       ei->i_block_group = group;
-       ei->i_last_alloc_group = ~0;
-+      ei->i_fs_version = 0;
-       ext4_set_inode_flags(inode);
-       if (IS_DIRSYNC(inode))
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch
deleted file mode 100644 (file)
index 4c08c9e..0000000
+++ /dev/null
@@ -1,448 +0,0 @@
-Index: linux-stage/fs/ext4/ext4_jbd2.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4_jbd2.h
-+++ linux-stage/fs/ext4/ext4_jbd2.h
-@@ -106,6 +106,80 @@
- #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
- #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-+/**
-+ *   struct ext4_journal_cb_entry - Base structure for callback information.
-+ *
-+ *   This struct is a 'seed' structure for a using with your own callback
-+ *   structs. If you are using callbacks you must allocate one of these
-+ *   or another struct of your own definition which has this struct
-+ *   as it's first element and pass it to ext4_journal_callback_add().
-+ */
-+struct ext4_journal_cb_entry {
-+      /* list information for other callbacks attached to the same handle */
-+      struct list_head jce_list;
-+
-+      /*  Function to call with this callback structure */
-+      void (*jce_func)(struct super_block *sb,
-+                       struct ext4_journal_cb_entry *jce, int error);
-+
-+      /* user data goes here */
-+};
-+
-+/**
-+ * ext4_journal_callback_add: add a function to call after transaction commit
-+ * @handle: active journal transaction handle to register callback on
-+ * @func: callback function to call after the transaction has committed:
-+ *        @sb: superblock of current filesystem for transaction
-+ *        @jce: returned journal callback data
-+ *        @rc: journal state at commit (0 = transaction committed properly)
-+ * @jce: journal callback data (internal and function private data struct)
-+ *
-+ * The registered function will be called in the context of the journal thread
-+ * after the transaction for which the handle was created has completed.
-+ *
-+ * No locks are held when the callback function is called, so it is safe to
-+ * call blocking functions from within the callback, but the callback should
-+ * not block or run for too long, or the filesystem will be blocked waiting for
-+ * the next transaction to commit. No journaling functions can be used, or
-+ * there is a risk of deadlock.
-+ *
-+ * There is no guaranteed calling order of multiple registered callbacks on
-+ * the same transaction.
-+ */
-+static inline void ext4_journal_callback_add(handle_t *handle,
-+                      void (*func)(struct super_block *sb,
-+                                   struct ext4_journal_cb_entry *jce,
-+                                   int rc),
-+                      struct ext4_journal_cb_entry *jce)
-+{
-+      struct ext4_sb_info *sbi =
-+                      EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+      /* Add the jce to transaction's private list */
-+      jce->jce_func = func;
-+      spin_lock(&sbi->s_md_lock);
-+      list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
-+      spin_unlock(&sbi->s_md_lock);
-+}
-+
-+/**
-+ * ext4_journal_callback_del: delete a registered callback
-+ * @handle: active journal transaction handle on which callback was registered
-+ * @jce: registered journal callback entry to unregister
-+ */
-+static inline void ext4_journal_callback_del(handle_t *handle,
-+                                           struct ext4_journal_cb_entry *jce)
-+{
-+      struct ext4_sb_info *sbi =
-+                      EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+      spin_lock(&sbi->s_md_lock);
-+      list_del_init(&jce->jce_list);
-+      spin_unlock(&sbi->s_md_lock);
-+}
-+
-+#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
-+
- int
- ext4_mark_iloc_dirty(handle_t *handle,
-                    struct inode *inode,
-Index: linux-stage/fs/ext4/mballoc.c
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.c
-+++ linux-stage/fs/ext4/mballoc.c
-@@ -21,6 +21,7 @@
-  * mballoc.c contains the multiblocks allocation routines
-  */
-+#include "ext4_jbd2.h"
- #include "mballoc.h"
- #include <linux/debugfs.h>
-@@ -335,14 +336,12 @@
-  */
- static struct kmem_cache *ext4_pspace_cachep;
- static struct kmem_cache *ext4_ac_cachep;
--static struct kmem_cache *ext4_free_ext_cachep;
-+static struct kmem_cache *ext4_free_data_cachep;
- static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                       ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
-                                               ext4_group_t group);
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
--
--
-+static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
- static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
- {
-@@ -2942,8 +2941,6 @@ int ext4_mb_init(struct super_block *sb,
-       ext4_mb_history_init(sb);
--      if (sbi->s_journal)
--              sbi->s_journal->j_commit_callback = release_blocks_on_commit;
-       return 0;
- }
-@@ -3032,46 +3029,42 @@ int ext4_mb_release(struct super_block *
-  * This function is called by the jbd2 layer once the commit has finished,
-  * so we know we can free the blocks that were released with that commit.
-  */
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
-+static void ext4_free_data_callback(struct super_block *sb,
-+                                  struct ext4_journal_cb_entry *jce,
-+                                  int rc)
- {
--      struct super_block *sb = journal->j_private;
-+      struct ext4_free_data *entry = (struct ext4_free_data *)jce;
-       struct ext4_buddy e4b;
-       struct ext4_group_info *db;
-       int err, count = 0, count2 = 0;
--      struct ext4_free_data *entry;
--      struct list_head *l, *ltmp;
--      list_for_each_safe(l, ltmp, &txn->t_private_list) {
--              entry = list_entry(l, struct ext4_free_data, list);
--
--              mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
--                       entry->count, entry->group, entry);
--
--              err = ext4_mb_load_buddy(sb, entry->group, &e4b);
--              /* we expect to find existing buddy because it's pinned */
--              BUG_ON(err != 0);
--
--              db = e4b.bd_info;
--              /* there are blocks to put in buddy to make them really free */
--              count += entry->count;
--              count2++;
--              ext4_lock_group(sb, entry->group);
--              /* Take it out of per group rb tree */
--              rb_erase(&entry->node, &(db->bb_free_root));
--              mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
--
--              if (!db->bb_free_root.rb_node) {
--                      /* No more items in the per group rb tree
--                       * balance refcounts from ext4_mb_free_metadata()
--                       */
--                      page_cache_release(e4b.bd_buddy_page);
--                      page_cache_release(e4b.bd_bitmap_page);
--              }
--              ext4_unlock_group(sb, entry->group);
-+      mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
-+               entry->efd_count, entry->efd_group, entry);
--              kmem_cache_free(ext4_free_ext_cachep, entry);
--              ext4_mb_unload_buddy(&e4b);
-+      err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
-+      /* we expect to find existing buddy because it's pinned */
-+      BUG_ON(err != 0);
-+
-+      db = e4b.bd_info;
-+      /* there are blocks to put in buddy to make them really free */
-+      count += entry->efd_count;
-+      count2++;
-+      ext4_lock_group(sb, entry->efd_group);
-+      /* Take it out of per group rb tree */
-+      rb_erase(&entry->efd_node, &(db->bb_free_root));
-+      mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
-+
-+      if (!db->bb_free_root.rb_node) {
-+              /* No more items in the per group rb tree
-+               * balance refcounts from ext4_mb_free_metadata()
-+               */
-+              page_cache_release(e4b.bd_buddy_page);
-+              page_cache_release(e4b.bd_bitmap_page);
-       }
-+      ext4_unlock_group(sb, entry->efd_group);
-+
-+      kmem_cache_free(ext4_free_data_cachep, entry);
-+      ext4_mb_unload_buddy(&e4b);
-       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
- }
-@@ -3123,22 +3116,24 @@ int __init init_ext4_mballoc(void)
-               kmem_cache_create("ext4_alloc_context",
-                                    sizeof(struct ext4_allocation_context),
-                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
--      if (ext4_ac_cachep == NULL) {
--              kmem_cache_destroy(ext4_pspace_cachep);
--              return -ENOMEM;
--      }
-+      if (ext4_ac_cachep == NULL)
-+              goto out_err;
--      ext4_free_ext_cachep =
--              kmem_cache_create("ext4_free_block_extents",
-+      ext4_free_data_cachep =
-+              kmem_cache_create("ext4_free_data",
-                                    sizeof(struct ext4_free_data),
-                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
--      if (ext4_free_ext_cachep == NULL) {
--              kmem_cache_destroy(ext4_pspace_cachep);
--              kmem_cache_destroy(ext4_ac_cachep);
--              return -ENOMEM;
--      }
-+      if (ext4_free_data_cachep == NULL)
-+              goto out1_err;
-+
-       ext4_create_debugfs_entry();
-       return 0;
-+
-+out1_err:
-+      kmem_cache_destroy(ext4_ac_cachep);
-+out_err:
-+      kmem_cache_destroy(ext4_pspace_cachep);
-+      return -ENOMEM;
- }
- void exit_ext4_mballoc(void)
-@@ -3150,7 +3145,7 @@ void exit_ext4_mballoc(void)
-       rcu_barrier();
-       kmem_cache_destroy(ext4_pspace_cachep);
-       kmem_cache_destroy(ext4_ac_cachep);
--      kmem_cache_destroy(ext4_free_ext_cachep);
-+      kmem_cache_destroy(ext4_free_data_cachep);
-       ext4_remove_debugfs_entry();
- }
-@@ -3688,8 +3683,8 @@ static void ext4_mb_generate_from_freeli
-       n = rb_first(&(grp->bb_free_root));
-       while (n) {
--              entry = rb_entry(n, struct ext4_free_data, node);
--              mb_set_bits(bitmap, entry->start_blk, entry->count);
-+              entry = rb_entry(n, struct ext4_free_data, efd_node);
-+              mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
-               n = rb_next(n);
-       }
-       return;
-@@ -4974,11 +4969,11 @@ out3:
-  * AND the blocks are associated with the same group.
-  */
- static int can_merge(struct ext4_free_data *entry1,
--                      struct ext4_free_data *entry2)
-+                   struct ext4_free_data *entry2)
- {
--      if ((entry1->t_tid == entry2->t_tid) &&
--          (entry1->group == entry2->group) &&
--          ((entry1->start_blk + entry1->count) == entry2->start_blk))
-+      if ((entry1->efd_tid == entry2->efd_tid) &&
-+          (entry1->efd_group == entry2->efd_group) &&
-+          ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
-               return 1;
-       return 0;
- }
-@@ -4991,7 +4986,6 @@ ext4_mb_free_metadata(handle_t *handle, 
-       struct ext4_free_data *entry;
-       struct ext4_group_info *db = e4b->bd_info;
-       struct super_block *sb = e4b->bd_sb;
--      struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct rb_node **n = &db->bb_free_root.rb_node, *node;
-       struct rb_node *parent = NULL, *new_node;
-@@ -4999,8 +4993,8 @@ ext4_mb_free_metadata(handle_t *handle, 
-       BUG_ON(e4b->bd_bitmap_page == NULL);
-       BUG_ON(e4b->bd_buddy_page == NULL);
--      new_node = &new_entry->node;
--      block = new_entry->start_blk;
-+      new_node = &new_entry->efd_node;
-+      block = new_entry->efd_start_blk;
-       if (!*n) {
-               /* first free block exent. We need to
-@@ -5013,15 +5007,15 @@ ext4_mb_free_metadata(handle_t *handle, 
-       }
-       while (*n) {
-               parent = *n;
--              entry = rb_entry(parent, struct ext4_free_data, node);
--              if (block < entry->start_blk)
-+              entry = rb_entry(parent, struct ext4_free_data, efd_node);
-+              if (block < entry->efd_start_blk)
-                       n = &(*n)->rb_left;
--              else if (block >= (entry->start_blk + entry->count))
-+              else if (block >= (entry->efd_start_blk + entry->efd_count))
-                       n = &(*n)->rb_right;
-               else {
-                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
-                                       "Double free of blocks %d (%d %d)",
--                                      block, entry->start_blk, entry->count);
-+                                      block, entry->efd_start_blk, entry->efd_count);
-                       return 0;
-               }
-       }
-@@ -5032,34 +5026,29 @@ ext4_mb_free_metadata(handle_t *handle, 
-       /* Now try to see the extent can be merged to left and right */
-       node = rb_prev(new_node);
-       if (node) {
--              entry = rb_entry(node, struct ext4_free_data, node);
-+              entry = rb_entry(node, struct ext4_free_data, efd_node);
-               if (can_merge(entry, new_entry)) {
--                      new_entry->start_blk = entry->start_blk;
--                      new_entry->count += entry->count;
-+                      new_entry->efd_start_blk = entry->efd_start_blk;
-+                      new_entry->efd_count += entry->efd_count;
-                       rb_erase(node, &(db->bb_free_root));
--                      spin_lock(&sbi->s_md_lock);
--                      list_del(&entry->list);
--                      spin_unlock(&sbi->s_md_lock);
--                      kmem_cache_free(ext4_free_ext_cachep, entry);
-+                      ext4_journal_callback_del(handle, &entry->efd_jce);
-+                      kmem_cache_free(ext4_free_data_cachep, entry);
-               }
-       }
-       node = rb_next(new_node);
-       if (node) {
--              entry = rb_entry(node, struct ext4_free_data, node);
-+              entry = rb_entry(node, struct ext4_free_data, efd_node);
-               if (can_merge(new_entry, entry)) {
--                      new_entry->count += entry->count;
-+                      new_entry->efd_count += entry->efd_count;
-                       rb_erase(node, &(db->bb_free_root));
--                      spin_lock(&sbi->s_md_lock);
--                      list_del(&entry->list);
--                      spin_unlock(&sbi->s_md_lock);
--                      kmem_cache_free(ext4_free_ext_cachep, entry);
-+                      ext4_journal_callback_del(handle, &entry->efd_jce);
-+                      kmem_cache_free(ext4_free_data_cachep, entry);
-               }
-       }
-       /* Add the extent to transaction's private list */
--      spin_lock(&sbi->s_md_lock);
--      list_add(&new_entry->list, &handle->h_transaction->t_private_list);
--      spin_unlock(&sbi->s_md_lock);
-+      ext4_journal_callback_add(handle, ext4_free_data_callback,
-+                                &new_entry->efd_jce);
-       return 0;
- }
-@@ -5191,11 +5180,11 @@ do_more:
-                * blocks being freed are metadata. these blocks shouldn't
-                * be used until this transaction is committed
-                */
--              new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
--              new_entry->start_blk = bit;
--              new_entry->group  = block_group;
--              new_entry->count = count;
--              new_entry->t_tid = handle->h_transaction->t_tid;
-+              new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
-+              new_entry->efd_start_blk = bit;
-+              new_entry->efd_group  = block_group;
-+              new_entry->efd_count = count;
-+              new_entry->efd_tid = handle->h_transaction->t_tid;
-               ext4_lock_group(sb, block_group);
-               mb_clear_bits(bitmap_bh->b_data, bit, count);
-Index: linux-stage/fs/ext4/mballoc.h
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.h
-+++ linux-stage/fs/ext4/mballoc.h
-@@ -107,23 +107,24 @@ extern u8 mb_enable_debug;
-  */
- #define MB_DEFAULT_GROUP_PREALLOC     512
--
- struct ext4_free_data {
--      /* this links the free block information from group_info */
--      struct rb_node node;
-+      /* MUST be the first member */
-+      struct ext4_journal_cb_entry    efd_jce;
--      /* this links the free block information from ext4_sb_info */
--      struct list_head list;
-+      /* ext4_free_data private data starts from here */
-+
-+      /* this links the free block information from group_info */
-+      struct rb_node          efd_node;
-       /* group which free block extent belongs */
--      ext4_group_t group;
-+      ext4_group_t            efd_group;
-       /* free block extent */
--      ext4_grpblk_t start_blk;
--      ext4_grpblk_t count;
-+      ext4_grpblk_t           efd_start_blk;
-+      ext4_grpblk_t           efd_count;
-       /* transaction which freed this extent */
--      tid_t   t_tid;
-+      tid_t                   efd_tid;
- };
- struct ext4_prealloc_space {
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -304,6 +304,23 @@ void ext4_journal_abort_handle(const cha
- EXPORT_SYMBOL(ext4_journal_abort_handle);
-+static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
-+{
-+      struct super_block              *sb = journal->j_private;
-+      struct ext4_sb_info             *sbi = EXT4_SB(sb);
-+      int                             error = is_journal_aborted(journal);
-+      struct ext4_journal_cb_entry    *jce, *tmp;
-+
-+      spin_lock(&sbi->s_md_lock);
-+      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
-+              list_del_init(&jce->jce_list);
-+              spin_unlock(&sbi->s_md_lock);
-+              jce->jce_func(sb, jce, error);
-+              spin_lock(&sbi->s_md_lock);
-+      }
-+      spin_unlock(&sbi->s_md_lock);
-+}
-+
- /* Deal with the reporting of failure conditions on a filesystem such as
-  * inconsistencies detected or read IO failures.
-  *
-@@ -2997,6 +3014,8 @@ static int ext4_fill_super(struct super_
-       }
-       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-+      sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-+
- no_journal:
-       if (test_opt(sb, NOBH)) {
diff --git a/ldiskfs/kernel_patches/patches/ext4-kill-dx_root.patch b/ldiskfs/kernel_patches/patches/ext4-kill-dx_root.patch
deleted file mode 100644 (file)
index c8f2d1a..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-removes static definition of dx_root struct. so that "." and ".." dirent can
-have extra data. This patch does not change any functionality but is required for
-ext4_data_in_dirent patch.
-Index: b/fs/ext4/namei.c
-===================================================================
---- a/fs/ext4/namei.c
-+++ b/fs/ext4/namei.c
-@@ -121,22 +121,13 @@ struct dx_entry
-  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
-  */
--struct dx_root
-+struct dx_root_info
- {
--      struct fake_dirent dot;
--      char dot_name[4];
--      struct fake_dirent dotdot;
--      char dotdot_name[4];
--      struct dx_root_info
--      {
--              __le32 reserved_zero;
--              u8 hash_version;
--              u8 info_length; /* 8 */
--              u8 indirect_levels;
--              u8 unused_flags;
--      }
--      info;
--      struct dx_entry entries[0];
-+      __le32 reserved_zero;
-+      u8 hash_version;
-+      u8 info_length; /* 8 */
-+      u8 indirect_levels;
-+      u8 unused_flags;
- };
- struct dx_node
-@@ -225,6 +216,16 @@ ext4_next_entry(struct ext4_dir_entry_2 
-  * Future: use high four bits of block for coalesce-on-delete flags
-  * Mask them off for now.
-  */
-+struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
-+{
-+       /* get dotdot first */
-+       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
-+
-+       /* dx root info is after dotdot entry */
-+       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
-+
-+       return (struct dx_root_info *) de;
-+}
- static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
- {
-@@ -378,7 +379,7 @@ dx_probe(struct dentry *dentry, struct i
- {
-       unsigned count, indirect;
-       struct dx_entry *at, *entries, *p, *q, *m;
--      struct dx_root *root;
-+      struct dx_root_info * info;
-       struct buffer_head *bh;
-       struct dx_frame *frame = frame_in;
-       u32 hash;
-@@ -388,18 +389,19 @@ dx_probe(struct dentry *dentry, struct i
-               dir = dentry->d_parent->d_inode;
-       if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
-               goto fail;
--      root = (struct dx_root *) bh->b_data;
--      if (root->info.hash_version != DX_HASH_TEA &&
--          root->info.hash_version != DX_HASH_HALF_MD4 &&
--          root->info.hash_version != DX_HASH_LEGACY) {
-+
-+      info = dx_get_dx_info((struct ext4_dir_entry_2*)bh->b_data);
-+      if (info->hash_version != DX_HASH_TEA &&
-+          info->hash_version != DX_HASH_HALF_MD4 &&
-+          info->hash_version != DX_HASH_LEGACY) {
-               ext4_warning(dir->i_sb, "Unrecognised inode hash code %d"
-                            "for directory #%lu",
--                           root->info.hash_version, dir->i_ino);
-+                           info->hash_version, dir->i_ino);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
-               goto fail;
-       }
--      hinfo->hash_version = root->info.hash_version;
-+      hinfo->hash_version = info->hash_version;
-       if (hinfo->hash_version <= DX_HASH_TEA)
-               hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
-       hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-@@ -398,27 +399,26 @@ dx_probe(struct dentry *dentry, struct i
-               ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
-       hash = hinfo->hash;
--      if (root->info.unused_flags & 1) {
-+      if (info->unused_flags & 1) {
-               ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
--                           root->info.unused_flags);
-+                           info->unused_flags);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
-               goto fail;
-       }
--      if ((indirect = root->info.indirect_levels) > 1) {
-+      if ((indirect = info->indirect_levels) > 1) {
-               ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
--                           root->info.indirect_levels);
-+                           info->indirect_levels);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
-               goto fail;
-       }
--      entries = (struct dx_entry *) (((char *)&root->info) +
--                                     root->info.info_length);
-+      entries = (struct dx_entry *) (((char *)info) + info->info_length);
-       if (dx_get_limit(entries) != dx_root_limit(dir,
--                                                 root->info.info_length)) {
-+                                                 info->info_length)) {
-               ext4_warning(dir->i_sb, "dx entry: limit != root limit");
-               brelse(bh);
-
-@@ -509,10 +510,12 @@ fail:
- static void dx_release (struct dx_frame *frames)
- {
-+      struct dx_root_info *info;
-       if (frames[0].bh == NULL)
-               return;
--      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
-+      info = dx_get_dx_info((struct ext4_dir_entry_2*)frames[0].bh->b_data);
-+      if (info->indirect_levels)
-               brelse(frames[1].bh);
-       brelse(frames[0].bh);
- }
-@@ -1430,17 +1433,16 @@ static int make_indexed_dir(handle_t *ha
-       const char      *name = dentry->d_name.name;
-       int             namelen = dentry->d_name.len;
-       struct buffer_head *bh2;
--      struct dx_root  *root;
-       struct dx_frame frames[2], *frame;
-       struct dx_entry *entries;
--      struct ext4_dir_entry_2 *de, *de2;
-+      struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
-       char            *data1, *top;
-       unsigned        len;
-       int             retval;
-       unsigned        blocksize;
-       struct dx_hash_info hinfo;
-       ext4_lblk_t  block;
--      struct fake_dirent *fde;
-+      struct dx_root_info *dx_info;
-       blocksize =  dir->i_sb->s_blocksize;
-       dxtrace(printk("Creating index\n"));
-@@ -1450,7 +1452,6 @@ static int make_indexed_dir(handle_t *ha
-               brelse(bh);
-               return retval;
-       }
--      root = (struct dx_root *) bh->b_data;
-       bh2 = ext4_append (handle, dir, &block, &retval);
-       if (!(bh2)) {
-@@ -1460,18 +1461,20 @@ static int make_indexed_dir(handle_t *ha
-       }
-       root = (struct dx_root *) bh->b_data;
-+      dot_de = (struct ext4_dir_entry_2 *) bh->b_data;
-+      dotdot_de = ext4_next_entry(dot_de, blocksize);
-+
-       /* The 0th block becomes the root, move the dirents out */
--      fde = &root->dotdot;
--      de = (struct ext4_dir_entry_2 *)((char *)fde +
--              ext4_rec_len_from_disk(fde->rec_len, blocksize));
-+      de = (struct ext4_dir_entry_2 *)((char *)dotdot_de +
-+              ext4_rec_len_from_disk(dotdot_de->rec_len, blocksize));
--      if ((char *) de >= (((char *) root) + blocksize)) {
-+      if ((char *) de >= (((char *) dot_de) + blocksize)) {
-               ext4_error(dir->i_sb,
-                          "invalid rec_len for '..' in inode %lu",
-                          dir->i_ino);
-               brelse(bh);
-               return -EIO;
-       }
--      len = ((char *) root) + blocksize - (char *) de;
-+      len = ((char *) dot_de) + blocksize - (char *) de;
-       /* Allocate new block for the 0th block's dirents */
-       bh2 = ext4_append(handle, dir, &block, &retval);
-@@ -1472,19 +1475,23 @@ static int make_indexed_dir(handle_t *ha
-       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
-                                          blocksize);
-       /* Initialize the root; the dot dirents already exist */
--      de = (struct ext4_dir_entry_2 *) (&root->dotdot);
--      de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
--                                         blocksize);
--      memset (&root->info, 0, sizeof(root->info));
--      root->info.info_length = sizeof(root->info);
--      root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
--      entries = root->entries;
--      dx_set_block(entries, 1);
--      dx_set_count(entries, 1);
--      dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
-+      dotdot_de->rec_len = ext4_rec_len_to_disk(blocksize -
-+                      le16_to_cpu(dot_de->rec_len), blocksize);
-+
-+      /* initialize hashing info */
-+      dx_info = dx_get_dx_info(dot_de);
-+      memset (dx_info, 0, sizeof(*dx_info));
-+      dx_info->info_length = sizeof(*dx_info);
-+      dx_info->hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
-+
-+      entries = (void *)dx_info + sizeof(*dx_info);
-+
-+      dx_set_block(entries, 1);
-+      dx_set_count(entries, 1);
-+      dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
-       /* Initialize as for dx_probe */
--      hinfo.hash_version = root->info.hash_version;
-+      hinfo.hash_version = dx_info->hash_version;
-       if (hinfo.hash_version <= DX_HASH_TEA)
-               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
-       hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-@@ -1724,6 +1733,7 @@ static int ext4_dx_add_entry(handle_t *h
-                               goto journal_error;
-                       brelse (bh2);
-               } else {
-+                      struct dx_root_info * info;
-                       dxtrace(printk(KERN_DEBUG
-                                      "Creating second level index...\n"));
-                       memcpy((char *) entries2, (char *) entries,
-@@ -1732,7 +1742,9 @@ static int ext4_dx_add_entry(handle_t *h
-                       /* Set up root */
-                       dx_set_count(entries, 1);
-                       dx_set_block(entries + 0, newblock);
--                      ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
-+                      info = dx_get_dx_info((struct ext4_dir_entry_2*)
-+                                      frames[0].bh->b_data);
-+                      info->indirect_levels = 1;
-                       /* Add new access path frame */
-                       frame = frames + 1;
diff --git a/ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel5.patch
deleted file mode 100644 (file)
index 7c3933c..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-diff -rupN linux-2.6.18-164.6.1_1//fs/ext4/ialloc.c linux-2.6.18-164.6.1_2//fs/ext4/ialloc.c
---- linux-2.6.18-164.6.1_1//fs/ext4/ialloc.c   2010-03-31 17:42:50.000000000 +0530
-+++ linux-2.6.18-164.6.1_2//fs/ext4/ialloc.c   2010-03-31 17:43:22.000000000 +0530
-@@ -622,11 +622,14 @@ struct inode *ext4_new_inode_goal(handle
-       sb = dir->i_sb;
-       trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
-                  dir->i_ino, mode);
-+      sbi = EXT4_SB(sb);
-+      if (sbi->s_max_dir_size > 0 && i_size_read(dir) >= sbi->s_max_dir_size)
-+              return ERR_PTR(-EFBIG);
-+
-       inode = new_inode(sb);
-       if (!inode)
-               return ERR_PTR(-ENOMEM);
-       ei = EXT4_I(inode);
--      sbi = EXT4_SB(sb);
-       if (goal)
-               goal = sbi->s_inode_goal;
-diff -rupN linux-2.6.18-164.6.1_1//fs/ext4/super.c linux-2.6.18-164.6.1_2//fs/ext4/super.c
---- linux-2.6.18-164.6.1_1//fs/ext4/super.c    2010-03-31 17:42:50.000000000 +0530
-+++ linux-2.6.18-164.6.1_2//fs/ext4/super.c    2010-03-31 17:45:32.000000000 +0530
-@@ -40,6 +40,7 @@
- EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
- EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
- EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
-+EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size);
- static struct attribute *ext4_attrs[] = {
-       ATTR_LIST(delayed_allocation_blocks),
-@@ -48,6 +48,7 @@
-       ATTR_LIST(mb_order2_req),
-       ATTR_LIST(mb_stream_req),
-       ATTR_LIST(mb_group_prealloc),
-+      ATTR_LIST(max_dir_size),
-       NULL,
- };
-diff -rupN linux-2.6.18-164.6.1_1//fs/ext4/ext4_sb.h linux-2.6.18-164.6.1_2//fs/ext4/ext4_sb.h
---- linux-2.6.18-164.6.1_1//fs/ext4/ext4.h     2010-03-31 17:42:50.000000000 +0530
-+++ linux-2.6.18-164.6.1_2//fs/ext4/ext4.h     2010-03-31 17:43:22.000000000 +0530
-@@ -119,6 +119,7 @@ struct ext4_sb_info {
-       /* where last allocation was done - for stream allocation */
-       unsigned long s_mb_last_group;
-       unsigned long s_mb_last_start;
-+      unsigned long s_max_dir_size;
-       /* history to debug policy */
-       struct ext4_mb_history *s_mb_history;
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel5.patch
deleted file mode 100644 (file)
index 9a9466f..0000000
+++ /dev/null
@@ -1,352 +0,0 @@
-diff -rupN linux-2.6.18-128.1.6_1//fs/ext4/mballoc.c linux-2.6.18-128.1.6_2//fs/ext4/mballoc.c
---- linux-2.6.18-128.1.6_1//fs/ext4/mballoc.c
-+++ linux-2.6.18-128.1.6_2//fs/ext4/mballoc.c
-@@ -360,8 +360,8 @@ static void ext4_mb_mark_free_simple(str
- static struct kmem_cache *ext4_pspace_cachep;
- static struct kmem_cache *ext4_ac_cachep;
- static struct kmem_cache *ext4_free_ext_cachep;
--static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
--                                      ext4_group_t group);
-+static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+                                      ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
-                                                ext4_group_t group);
-@@ -660,7 +660,7 @@ static void ext4_mb_mark_free_simple(str
- }
- static noinline_for_stack
--void ext4_mb_generate_buddy(struct super_block *sb,
-+int ext4_mb_generate_buddy(struct super_block *sb,
-                               void *buddy, void *bitmap, ext4_group_t group)
- {
-       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-@@ -692,14 +692,14 @@ static void ext4_mb_generate_buddy(struc
-       grp->bb_fragments = fragments;
-       if (free != grp->bb_free) {
--              ext4_grp_locked_error(sb, group,  __func__,
--                      "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
--                      group, free, grp->bb_free);
--              /*
--               * If we intent to continue, we consider group descritor
--               * corrupt and update bb_free using bitmap value
--               */
--              grp->bb_free = free;
-+              struct ext4_group_desc *gdp;
-+              gdp = ext4_get_group_desc (sb, group, NULL);
-+              ext4_error(sb,
-+                      "group %lu: %u blocks in bitmap, %u in bb, "
-+                      "%u in gd, %lu pa's\n", (long unsigned int)group,
-+                      free, grp->bb_free, ext4_free_blks_count(sb, gdp),
-+                      grp->bb_prealloc_nr);
-+              return -EIO;
-       }
-       clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
-@@ -709,6 +709,8 @@ static void ext4_mb_generate_buddy(struc
-       EXT4_SB(sb)->s_mb_buddies_generated++;
-       EXT4_SB(sb)->s_mb_generation_time += period;
-       spin_unlock(&EXT4_SB(sb)->s_bal_lock);
-+
-+      return 0;
- }
- /* The buddy information is attached the buddy cache inode
-@@ -814,7 +816,7 @@ static int ext4_mb_init_cache(struct pag
-       first_block = page->index * blocks_per_page;
-       /* init the page  */
-       memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
--      for (i = 0; i < blocks_per_page; i++) {
-+      for (i = 0; i < blocks_per_page && err == 0; i++) {
-               int group;
-               struct ext4_group_info *grinfo;
-@@ -848,7 +850,7 @@ static int ext4_mb_init_cache(struct pag
-                        * incore got set to the group block bitmap below
-                        */
-                       ext4_lock_group(sb, group);
--                      ext4_mb_generate_buddy(sb, data, incore, group);
-+                      err = ext4_mb_generate_buddy(sb, data, incore, group);
-                       ext4_unlock_group(sb, group);
-                       incore = NULL;
-               } else {
-@@ -861,7 +863,7 @@ static int ext4_mb_init_cache(struct pag
-                       memcpy(data, bitmap, blocksize);
-                       /* mark all preallocated blks used in in-core bitmap */
--                      ext4_mb_generate_from_pa(sb, data, group);
-+                      err = ext4_mb_generate_from_pa(sb, data, group);
-                       ext4_mb_generate_from_freelist(sb, data, group);
-                       ext4_unlock_group(sb, group);
-@@ -870,6 +872,7 @@ static int ext4_mb_init_cache(struct pag
-                       incore = data;
-               }
-       }
-+      if (likely(err == 0))
-       SetPageUptodate(page);
- out:
-@@ -1964,7 +1967,10 @@ static int ext4_mb_seq_history_show(stru
-                       hs->result.fe_start, hs->result.fe_len);
-               seq_printf(seq, "%-5u %-8u %-23s free\n",
-                               hs->pid, hs->ino, buf2);
-+      } else {
-+              seq_printf(seq, "unknown op %d\n", hs->op);
-       }
-+
-       return 0;
- }
-@@ -2092,9 +2098,11 @@ static void *ext4_mb_seq_groups_next(str
- static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
- {
-       struct super_block *sb = seq->private;
-+      struct ext4_group_desc *gdp;
-       ext4_group_t group = (ext4_group_t) ((unsigned long) v);
-       int i;
-       int err;
-+      int free = 0;
-       struct ext4_buddy e4b;
-       struct sg {
-               struct ext4_group_info info;
-@@ -2103,10 +2111,10 @@ static int ext4_mb_seq_groups_show(struc
-       group--;
-       if (group == 0)
--              seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
-+              seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
-                               "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
-                                 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
--                         "group", "free", "frags", "first",
-+                         "group", "free", "frags", "first", "first", "pa",
-                          "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
-                          "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
-@@ -2117,13 +2125,20 @@ static int ext4_mb_seq_groups_show(struc
-               seq_printf(seq, "#%-5lu: I/O error\n", group);
-               return 0;
-       }
-+
-+      gdp = ext4_get_group_desc(sb, group, NULL);
-+      if (gdp != NULL)
-+              free = ext4_free_blks_count(sb, gdp);
-+
-       ext4_lock_group(sb, group);
-       memcpy(&sg, ext4_get_group_info(sb, group), i);
-       ext4_unlock_group(sb, group);
-       ext4_mb_release_desc(&e4b);
--      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
--                      sg.info.bb_fragments, sg.info.bb_first_free);
-+      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
-+                      (long unsigned int)group, sg.info.bb_free, free,
-+                      sg.info.bb_fragments, sg.info.bb_first_free,
-+                      sg.info.bb_prealloc_nr);
-       for (i = 0; i <= 13; i++)
-               seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
-                               sg.info.bb_counters[i] : 0);
-@@ -2226,6 +2241,7 @@ ext4_mb_store_history(struct ext4_alloca
-       h.tail = ac->ac_tail;
-       h.buddy = ac->ac_buddy;
-       h.merged = 0;
-+      h.cr = ac->ac_criteria;
-       if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
-               if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
-                               ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
-@@ -3539,23 +3555,68 @@ ext4_mb_use_preallocated(struct ext4_all
- }
- /*
-+ * check free blocks in bitmap match free block in group descriptor
-+ * do this before taking preallocated blocks into account to be able
-+ * to detect on-disk corruptions. The group lock should be hold by the
-+ * caller.
-+ */
-+int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
-+                              struct ext4_group_desc *gdp, int group)
-+{
-+      unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
-+      unsigned short i, first, free = 0;
-+
-+      i = mb_find_next_zero_bit(bitmap, max, 0);
-+
-+      while (i < max) {
-+              first = i;
-+              i = mb_find_next_bit(bitmap, max, i);
-+              if (i > max)
-+                      i = max;
-+              free += i - first;
-+              if (i < max)
-+                      i = mb_find_next_zero_bit(bitmap, max, i);
-+      }
-+
-+      if (free != ext4_free_blks_count(sb, gdp)) {
-+              ext4_error(sb, "on-disk bitmap for group %d"
-+                      "corrupted: %u blocks free in bitmap, %u - in gd\n",
-+                      group, free, ext4_free_blks_count(sb, gdp));
-+              return -EIO;
-+      }
-+      return 0;
-+}
-+
-+/*
-  * the function goes through all preallocation in this group and marks them
-  * used in in-core bitmap. buddy must be generated from this bitmap
-  * Need to be called with ext4 group lock held
-  */
- static noinline_for_stack
--void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                       ext4_group_t group)
- {
-       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-       struct ext4_prealloc_space *pa;
-+      struct ext4_group_desc *gdp;
-       struct list_head *cur;
-       ext4_group_t groupnr;
-       ext4_grpblk_t start;
-       int preallocated = 0;
-       int count = 0;
-+      int skip = 0;
-+      int err;
-       int len;
-+      gdp = ext4_get_group_desc (sb, group, NULL);
-+      if (gdp == NULL)
-+              return -EIO;
-+
-+      /* before applying preallocations, check bitmap consistency */
-+      err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
-+      if (err)
-+              return err;
-+
-       /* all form of preallocation discards first load group,
-        * so the only competing code is preallocation use.
-        * we don't need any locking here
-@@ -3570,14 +3631,23 @@ static void ext4_mb_generate_from_pa(str
-                                            &groupnr, &start);
-               len = pa->pa_len;
-               spin_unlock(&pa->pa_lock);
--              if (unlikely(len == 0))
-+              if (unlikely(len == 0)) {
-+                      skip++;
-                       continue;
-+              }
-               BUG_ON(groupnr != group);
-               mb_set_bits(bitmap, start, len);
-               preallocated += len;
-               count++;
-       }
-+      if (count + skip != grp->bb_prealloc_nr) {
-+              ext4_error(sb, "lost preallocations: "
-+                         "count %d, bb_prealloc_nr %lu, skip %d\n",
-+                         count, grp->bb_prealloc_nr, skip);
-+              return -EIO;
-+      }
-       mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
-+      return 0;
- }
- static void ext4_mb_pa_callback(struct rcu_head *head)
-@@ -3629,6 +3699,7 @@ static void ext4_mb_put_pa(struct ext4_a
-        */
-       ext4_lock_group(sb, grp);
-       list_del(&pa->pa_group_list);
-+      ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
-       ext4_unlock_group(sb, grp);
-       spin_lock(pa->pa_obj_lock);
-@@ -3717,6 +3788,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
-       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+      grp->bb_prealloc_nr++;
-       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-       spin_lock(pa->pa_obj_lock);
-@@ -3776,6 +3848,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
-       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+      grp->bb_prealloc_nr++;
-       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-       /*
-@@ -3828,6 +3901,7 @@ ext4_mb_release_inode_pa(struct ext4_bud
-               ac->ac_sb = sb;
-               ac->ac_inode = pa->pa_inode;
-               ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-+              ac->ac_o_ex.fe_len = 1;
-       }
-       while (bit < end) {
-@@ -3972,6 +4046,8 @@ repeat:
-               spin_unlock(&pa->pa_lock);
-+              BUG_ON(grp->bb_prealloc_nr == 0);
-+              grp->bb_prealloc_nr--;
-               list_del(&pa->pa_group_list);
-               list_add(&pa->u.pa_tmp_list, &list);
-       }
-@@ -4107,7 +4183,7 @@ repeat:
-               if (err) {
-                       ext4_error(sb, "Error loading buddy information for %u",
-                                       group);
--                      continue;
-+                      return;
-               }
-               bitmap_bh = ext4_read_block_bitmap(sb, group);
-@@ -4119,6 +4195,8 @@ repeat:
-               }
-               ext4_lock_group(sb, group);
-+              BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
-+              e4b.bd_info->bb_prealloc_nr--;
-               list_del(&pa->pa_group_list);
-               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
-               ext4_unlock_group(sb, group);
-@@ -4394,6 +4472,7 @@ ext4_mb_discard_lg_preallocations(struct
-               }
-               ext4_lock_group(sb, group);
-               list_del(&pa->pa_group_list);
-+              ext4_get_group_info(sb, group)->bb_prealloc_nr--;
-               ext4_mb_release_group_pa(&e4b, pa, ac);
-               ext4_unlock_group(sb, group);
-diff -rupN linux-2.6.18-128.1.6/fs/ext4/ext4.h
---- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h
-+++ linux-2.6.18-128.1.6/fs/ext4/ext4.h
-@@ -119,6 +119,7 @@ struct ext4_group_info {
-       unsigned short  bb_free;
-       unsigned short  bb_fragments;
-       struct          list_head bb_prealloc_list;
-+      unsigned long   bb_prealloc_nr;
- #ifdef DOUBLE_CHECK
-       void            *bb_bitmap;
- #endif
-Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/mballoc.h
-+++ linux-2.6.18-128.1.6/fs/ext4/mballoc.h
-@@ -92,7 +92,7 @@
- /*
-  * for which requests use 2^N search using buddies
-  */
--#define MB_DEFAULT_ORDER2_REQS                2
-+#define MB_DEFAULT_ORDER2_REQS                8
-
- /*
-  * default group prealloc size 512 blocks
-@@ -228,7 +229,7 @@ struct ext4_mb_history {
-       __u16 tail;     /* what tail broke some buddy */
-       __u16 buddy;    /* buddy the tail ^^^ broke */
-       __u16 flags;
--      __u8 cr:3;      /* which phase the result extent was found at */
-+      __u8 cr:8;      /* which phase the result extent was found at */
-       __u8 op:4;
-       __u8 merged:1;
- };
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-group_check-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-group_check-rhel5.patch
deleted file mode 100644 (file)
index 3b9de5c..0000000
+++ /dev/null
@@ -1,320 +0,0 @@
-commit 8a57d9d61a6e361c7bb159dda797672c1df1a691
-Author: Curt Wohlgemuth <curtw@google.com>
-Date:   Sun May 16 15:00:00 2010 -0400
-
-    ext4: check for a good block group before loading buddy pages
-    
-    This adds a new field in ext4_group_info to cache the largest available
-    block range in a block group; and don't load the buddy pages until *after*
-    we've done a sanity check on the block group.
-    
-    With large allocation requests (e.g., fallocate(), 8MiB) and relatively full
-    partitions, it's easy to have no block groups with a block extent large
-    enough to satisfy the input request length.  This currently causes the loop
-    during cr == 0 in ext4_mb_regular_allocator() to load the buddy bitmap pages
-    for EVERY block group.  That can be a lot of pages.  The patch below allows
-    us to call ext4_mb_good_group() BEFORE we load the buddy pages (although we
-    have check again after we lock the block group).
-    
-    Addresses-Google-Bug: #2578108
-    Addresses-Google-Bug: #2704453
-    
-    Signed-off-by: Curt Wohlgemuth <curtw@google.com>
-    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-
-Index: linux-2.6.32/fs/ext4/ext4.h
-===================================================================
---- linux-2.6.32.orig/fs/ext4/ext4.h   2009-12-02 20:51:21.000000000 -0700
-+++ linux-2.6.32/fs/ext4/ext4.h        2011-02-17 23:54:52.708097710 -0700
-@@ -1625,6 +1625,7 @@ struct ext4_group_info {
-       ext4_grpblk_t   bb_first_free;  /* first free block */
-       ext4_grpblk_t   bb_free;        /* total free blocks */
-       ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
-+      ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
-       struct          list_head bb_prealloc_list;
- #ifdef DOUBLE_CHECK
-       void            *bb_bitmap;
-Index: linux-2.6.32/fs/ext4/mballoc.c
-===================================================================
---- linux-2.6.32.orig/fs/ext4/mballoc.c        2009-12-02 20:51:21.000000000 -0700
-+++ linux-2.6.32/fs/ext4/mballoc.c     2011-02-18 00:41:06.872097644 -0700
-@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(str
-       }
- }
-+/*
-+ * Cache the order of the largest free extent we have available in this block
-+ * group.
-+ */
-+static void
-+mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
-+{
-+      int i;
-+      int bits;
-+
-+      grp->bb_largest_free_order = -1; /* uninit */
-+
-+      bits = sb->s_blocksize_bits + 1;
-+      for (i = bits; i >= 0; i--) {
-+              if (grp->bb_counters[i] > 0) {
-+                      grp->bb_largest_free_order = i;
-+                      break;
-+              }
-+      }
-+}
-+
- static noinline_for_stack
- void ext4_mb_generate_buddy(struct super_block *sb,
-                               void *buddy, void *bitmap, ext4_group_t group)
-@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super
-                */
-               grp->bb_free = free;
-       }
-+      mb_set_largest_free_order(sb, grp);
-       clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
-@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super
-  * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
-  * So it can have information regarding groups_per_page which
-  * is blocks_per_page/2
-+ *
-+ * Locking note:  This routine takes the block group lock of all groups
-+ * for this page; do not hold this lock when calling this routine!
-  */
- static int ext4_mb_init_cache(struct page *page, char *incore)
-@@ -910,6 +935,11 @@ out:
-       return err;
- }
-+/*
-+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
-+ * block group lock of all groups for this page; do not hold the BG lock when
-+ * calling this routine!
-+ */
- static noinline_for_stack
- int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
- {
-@@ -1004,6 +1034,11 @@ err:
-       return ret;
- }
-+/*
-+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
-+ * block group lock of all groups for this page; do not hold the BG lock when
-+ * calling this routine!
-+ */
- static noinline_for_stack int
- ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-                                       struct ext4_buddy *e4b)
-@@ -1150,7 +1185,7 @@ err:
-       return ret;
- }
--static void ext4_mb_release_desc(struct ext4_buddy *e4b)
-+static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
- {
-       if (e4b->bd_bitmap_page)
-               page_cache_release(e4b->bd_bitmap_page);
-@@ -1300,6 +1335,7 @@ static void mb_free_blocks(struct inode
-                       buddy = buddy2;
-               } while (1);
-       }
-+      mb_set_largest_free_order(sb, e4b->bd_info);
-       mb_check_buddy(e4b);
- }
-@@ -1428,6 +1464,7 @@ static int mb_mark_used(struct ext4_budd
-               e4b->bd_info->bb_counters[ord]++;
-               e4b->bd_info->bb_counters[ord]++;
-       }
-+      mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
-       mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
-       mb_check_buddy(e4b);
-@@ -1618,7 +1655,7 @@ int ext4_mb_try_best_found(struct ext4_a
-       }
-       ext4_unlock_group(ac->ac_sb, group);
--      ext4_mb_release_desc(e4b);
-+      ext4_mb_unload_buddy(e4b);
-       return 0;
- }
-@@ -1674,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_all
-               ext4_mb_use_best_found(ac, e4b);
-       }
-       ext4_unlock_group(ac->ac_sb, group);
--      ext4_mb_release_desc(e4b);
-+      ext4_mb_unload_buddy(e4b);
-       return 0;
- }
-@@ -1823,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_al
-       }
- }
-+/* This is now called BEFORE we load the buddy bitmap. */
- static int ext4_mb_good_group(struct ext4_allocation_context *ac,
-                               ext4_group_t group, int cr)
- {
-       unsigned free, fragments;
--      unsigned i, bits;
-       int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
-       struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
-       BUG_ON(cr < 0 || cr >= 4);
--      BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
-+
-+      /* We only do this if the grp has never been initialized */
-+      if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
-+              int ret = ext4_mb_init_group(ac->ac_sb, group);
-+              if (ret)
-+                      return 0;
-+      }
-       free = grp->bb_free;
-       fragments = grp->bb_fragments;
-@@ -1845,17 +1888,16 @@ static int ext4_mb_good_group(struct ext
-       case 0:
-               BUG_ON(ac->ac_2order == 0);
-+              if (grp->bb_largest_free_order < ac->ac_2order)
-+                      return 0;
-+
-               /* Avoid using the first bg of a flexgroup for data files */
-               if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
-                   (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
-                   ((group % flex_size) == 0))
-                       return 0;
--              bits = ac->ac_sb->s_blocksize_bits + 1;
--              for (i = ac->ac_2order; i <= bits; i++)
--                      if (grp->bb_counters[i] > 0)
--                              return 1;
--              break;
-+              return 1;
-       case 1:
-               if ((free / fragments) >= ac->ac_g_ex.fe_len)
-                       return 1;
-@@ -2026,15 +2068,11 @@ repeat:
-               group = ac->ac_g_ex.fe_group;
-               for (i = 0; i < ngroups; group++, i++) {
--                      struct ext4_group_info *grp;
--                      struct ext4_group_desc *desc;
--
-                       if (group == ngroups)
-                               group = 0;
--                      /* quick check to skip empty groups */
--                      grp = ext4_get_group_info(sb, group);
--                      if (grp->bb_free == 0)
-+                      /* This now checks without needing the buddy page */
-+                      if (!ext4_mb_good_group(ac, group, cr))
-                               continue;
-                       err = ext4_mb_load_buddy(sb, group, &e4b);
-@@ -2042,15 +2080,18 @@ repeat:
-                               goto out;
-                       ext4_lock_group(sb, group);
-+
-+                      /*
-+                       * We need to check again after locking the
-+                       * block group
-+                       */
-                       if (!ext4_mb_good_group(ac, group, cr)) {
--                              /* someone did allocation from this group */
-                               ext4_unlock_group(sb, group);
--                              ext4_mb_release_desc(&e4b);
-+                              ext4_mb_unload_buddy(&e4b);
-                               continue;
-                       }
-                       ac->ac_groups_scanned++;
--                      desc = ext4_get_group_desc(sb, group, NULL);
-                       if (cr == 0)
-                               ext4_mb_simple_scan_group(ac, &e4b);
-                       else if (cr == 1 &&
-@@ -2060,7 +2101,7 @@ repeat:
-                               ext4_mb_complex_scan_group(ac, &e4b);
-                       ext4_unlock_group(sb, group);
--                      ext4_mb_release_desc(&e4b);
-+                      ext4_mb_unload_buddy(&e4b);
-                       if (ac->ac_status != AC_STATUS_CONTINUE)
-                               break;
-@@ -2150,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struc
-       ext4_lock_group(sb, group);
-       memcpy(&sg, ext4_get_group_info(sb, group), i);
-       ext4_unlock_group(sb, group);
--      ext4_mb_release_desc(&e4b);
-+      ext4_mb_unload_buddy(&e4b);
-       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
-                       sg.info.bb_fragments, sg.info.bb_first_free);
-@@ -2257,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_b
-       INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
-       init_rwsem(&meta_group_info[i]->alloc_sem);
-       meta_group_info[i]->bb_free_root = RB_ROOT;
-+      meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
- #ifdef DOUBLE_CHECK
-       {
-@@ -2567,7 +2609,7 @@ static void release_blocks_on_commit(jou
-               sb_issue_discard(sb, discard_block, entry->count);
-               kmem_cache_free(ext4_free_ext_cachep, entry);
--              ext4_mb_release_desc(&e4b);
-+              ext4_mb_unload_buddy(&e4b);
-       }
-       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
-@@ -3692,7 +3734,7 @@ out:
-       ext4_unlock_group(sb, group);
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
--      ext4_mb_release_desc(&e4b);
-+      ext4_mb_unload_buddy(&e4b);
-       put_bh(bitmap_bh);
-       return free;
- }
-@@ -3796,7 +3838,7 @@ repeat:
-               if (bitmap_bh == NULL) {
-                       ext4_error(sb, "Error reading block bitmap for %u",
-                                       group);
--                      ext4_mb_release_desc(&e4b);
-+                      ext4_mb_unload_buddy(&e4b);
-                       continue;
-               }
-@@ -3805,7 +3847,7 @@ repeat:
-               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
-               ext4_unlock_group(sb, group);
--              ext4_mb_release_desc(&e4b);
-+              ext4_mb_unload_buddy(&e4b);
-               put_bh(bitmap_bh);
-               list_del(&pa->u.pa_tmp_list);
-@@ -4069,7 +4111,7 @@ ext4_mb_discard_lg_preallocations(struct
-               ext4_mb_release_group_pa(&e4b, pa, ac);
-               ext4_unlock_group(sb, group);
--              ext4_mb_release_desc(&e4b);
-+              ext4_mb_unload_buddy(&e4b);
-               list_del(&pa->u.pa_tmp_list);
-               call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
-       }
-@@ -4570,7 +4612,7 @@ do_more:
-               atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
-       }
--      ext4_mb_release_desc(&e4b);
-+      ext4_mb_unload_buddy(&e4b);
-       *freed += count;
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch.patch
deleted file mode 100644 (file)
index ff6ef5f..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-Index: linux-stage/fs/ext4/mballoc.c
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.c 2010-01-26 22:50:37.000000000 +0800
-+++ linux-stage/fs/ext4/mballoc.c      2010-01-26 22:57:24.000000000 +0800
-@@ -3892,6 +3892,7 @@
-       INIT_LIST_HEAD(&pa->pa_group_list);
-       pa->pa_deleted = 0;
-       pa->pa_type = MB_INODE_PA;
-+      pa->pa_error = 0;
-       mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
-                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-@@ -3956,6 +3957,7 @@
-       INIT_LIST_HEAD(&pa->pa_group_list);
-       pa->pa_deleted = 0;
-       pa->pa_type = MB_GROUP_PA;
-+      pa->pa_error = 0;
-       mb_debug("new group pa %p: %llu/%u for %u\n", pa,
-                pa->pa_pstart, pa->pa_len, pa->pa_lstart);
-@@ -4019,7 +4021,9 @@
-       int err = 0;
-       int free = 0;
-+      assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
-       BUG_ON(pa->pa_deleted == 0);
-+      BUG_ON(pa->pa_inode == NULL);
-       ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
-       grp_blk_start = pa->pa_pstart - bit;
-       BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
-@@ -4059,11 +4064,18 @@
-               mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
-               bit = next + 1;
-       }
--      if (free != pa->pa_free) {
--              printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
--                      pa, (unsigned long) pa->pa_lstart,
--                      (unsigned long) pa->pa_pstart,
--                      (unsigned long) pa->pa_len);
-+
-+        /* "free < pa->pa_free" means we maybe double alloc the same blocks,
-+         * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
-+        if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
-+                ext4_error(sb,"pa free mismatch: [pa %p] "
-+                              "[phy %lu] [logic %lu] [len %u] [free %u] "
-+                              "[error %u] [inode %lu] [freed %u]", pa,
-+                              (unsigned long)pa->pa_pstart,
-+                              (unsigned long)pa->pa_lstart,
-+                              (unsigned)pa->pa_len, (unsigned)pa->pa_free,
-+                              (unsigned)pa->pa_error, pa->pa_inode->i_ino,
-+                              free);
-               ext4_grp_locked_error(sb, group,
-                                       __func__, "free %u, pa_free %u",
-                                       free, pa->pa_free);
-@@ -4072,6 +4084,7 @@
-                * from the bitmap and continue.
-                */
-       }
-+        BUG_ON(pa->pa_free != free);
-       atomic_add(free, &sbi->s_mb_discarded);
-       return err;
-@@ -4832,6 +4863,25 @@
-                       ac->ac_b_ex.fe_len = 0;
-                       ar->len = 0;
-                       ext4_mb_show_ac(ac);
-+                        if (ac->ac_pa) {
-+                              struct ext4_prealloc_space *pa = ac->ac_pa;
-+
-+                                      /* We can not make sure whether the bitmap has
-+                                       * been updated or not when fail case. So can
-+                                 * not revert pa_free back, just mark pa_error*/
-+                                pa->pa_error++;
-+                                ext4_error(sb,
-+                                            "Updating bitmap error: [err %d] "
-+                                            "[pa %p] [phy %lu] [logic %lu] "
-+                                            "[len %u] [free %u] [error %u] "
-+                                            "[inode %lu]", *errp, pa,
-+                                            (unsigned long)pa->pa_pstart,
-+                                            (unsigned long)pa->pa_lstart,
-+                                            (unsigned)pa->pa_len,
-+                                            (unsigned)pa->pa_free,
-+                                            (unsigned)pa->pa_error,
-+                                      pa->pa_inode ? pa->pa_inode->i_ino : 0);
-+                        }
-               } else {
-                       block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
-                       ar->len = ac->ac_b_ex.fe_len;
-Index: linux-stage/fs/ext4/mballoc.h
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.h 2010-01-26 22:50:36.000000000 +0800
-+++ linux-stage/fs/ext4/mballoc.h      2010-01-26 22:52:58.000000000 +0800
-@@ -21,6 +21,7 @@
- #include <linux/blkdev.h>
- #include <linux/marker.h>
- #include <linux/mutex.h>
-+#include <linux/genhd.h>
- #include "ext4_jbd2.h"
- #include "ext4.h"
- #include "group.h"
-@@ -134,6 +135,7 @@
-       ext4_grpblk_t           pa_len;         /* len of preallocated chunk */
-       ext4_grpblk_t           pa_free;        /* how many blocks are free */
-       unsigned short          pa_type;        /* pa type. inode or group */
-+      unsigned short          pa_error;
-       spinlock_t              *pa_obj_lock;
-       struct inode            *pa_inode;      /* hack, for history only */
- };
diff --git a/ldiskfs/kernel_patches/patches/ext4-misc-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-misc-rhel5.patch
deleted file mode 100644 (file)
index e77314e..0000000
+++ /dev/null
@@ -1,330 +0,0 @@
-Index: linux-stage/fs/ext4/ext4_jbd2.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4_jbd2.h
-+++ linux-stage/fs/ext4/ext4_jbd2.h
-@@ -35,6 +35,8 @@
-       (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
-        ? 27U : 8U)
-+#define ext4_journal_dirty_metadata(handle, bh)  \
-+                ext4_handle_dirty_metadata(handle, NULL, bh)
- /* Extended attribute operations touch at most two data buffers,
-  * two bitmap buffers, and two group summaries, in addition to the inode
-  * and the superblock, which are already accounted for. */
-Index: linux-stage/fs/ext4/extents.c
-===================================================================
---- linux-stage.orig/fs/ext4/extents.c
-+++ linux-stage/fs/ext4/extents.c
-@@ -59,6 +59,17 @@ ext4_fsblk_t ext_pblock(struct ext4_exte
- }
- /*
-+ * ext4_ext_store_pblock:
-+ * stores a large physical block number into an extent struct,
-+ * breaking it into parts
-+ */
-+void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-+{
-+      ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-+      ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-+}
-+
-+/*
-  * idx_pblock:
-  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
-  */
-@@ -72,17 +83,6 @@ ext4_fsblk_t idx_pblock(struct ext4_exte
- }
- /*
-- * ext4_ext_store_pblock:
-- * stores a large physical block number into an extent struct,
-- * breaking it into parts
-- */
--void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
--{
--      ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
--      ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
--}
--
--/*
-  * ext4_idx_store_pblock:
-  * stores a large physical block number into an index struct,
-  * breaking it into parts
-@@ -2097,6 +2097,56 @@ static int ext4_ext_rm_idx(handle_t *han
- }
- /*
-+ * This routine returns max. credits extent tree can consume.
-+ * It should be OK for low-performance paths like ->writepage()
-+ * To allow many writing process to fit a single transaction,
-+ * caller should calculate credits under truncate_mutex and
-+ * pass actual path.
-+ */
-+int ext4_ext_calc_credits_for_insert(struct inode *inode,
-+                                  struct ext4_ext_path *path)
-+{
-+      int depth, needed;
-+
-+      if (path) {
-+              /* probably there is space in leaf? */
-+              depth = ext_depth(inode);
-+              if (le16_to_cpu(path[depth].p_hdr->eh_entries)
-+                              < le16_to_cpu(path[depth].p_hdr->eh_max))
-+                      return 1;
-+      }
-+
-+      /*
-+       * given 32bit logical block (4294967296 blocks), max. tree
-+       * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
-+       * let's also add one more level for imbalance.
-+       */
-+      depth = 5;
-+
-+      /* allocation of new data block(s) */
-+      needed = 2;
-+
-+      /*
-+       * tree can be full, so it'd need to grow in depth:
-+       * we need one credit to modify old root, credits for
-+       * new root will be added in split accounting
-+       */
-+      needed += 1;
-+
-+      /*
-+       * Index split can happen, we'd need:
-+       *    allocate intermediate indexes (bitmap + group)
-+       *  + change two blocks at each level, but root (already included)
-+       */
-+      needed += (depth * 2) + (depth * 2);
-+
-+      /* any allocation modifies superblock */
-+      needed += 1;
-+
-+      return needed;
-+}
-+
-+/*
-  * ext4_ext_calc_credits_for_single_extent:
-  * This routine returns max. credits that needed to insert an extent
-  * to the extent tree.
-@@ -3941,3 +3991,15 @@ int ext4_fiemap(struct inode *inode, str
-       return error;
- }
-+EXPORT_SYMBOL(ext4_ext_store_pblock);
-+EXPORT_SYMBOL(ext4_ext_search_right);
-+EXPORT_SYMBOL(ext4_ext_search_left);
-+EXPORT_SYMBOL(ext_pblock);
-+EXPORT_SYMBOL(ext4_ext_insert_extent);
-+EXPORT_SYMBOL(ext4_mb_new_blocks);
-+EXPORT_SYMBOL(ext4_ext_walk_space);
-+EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
-+EXPORT_SYMBOL(ext4_mark_inode_dirty);
-+EXPORT_SYMBOL(ext4_ext_find_extent);
-+EXPORT_SYMBOL(ext4_ext_drop_refs);
-+
-Index: linux-stage/fs/ext4/ext4_extents.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4_extents.h
-+++ linux-stage/fs/ext4/ext4_extents.h
-@@ -58,6 +58,12 @@
-  */
- #define EXT_STATS_
-+/*
-+ * define EXT4_ALLOC_NEEDED to 0 since block bitmap, group desc. and sb
-+ * are now accounted in ext4_ext_calc_credits_for_insert()
-+ */
-+#define EXT4_ALLOC_NEEDED 0
-+#define HAVE_EXT_PREPARE_CB_EXTENT
- /*
-  * ext4_inode has i_block array (60 bytes total).
-@@ -231,6 +237,8 @@ extern ext4_fsblk_t ext_pblock(struct ex
- extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
- extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
- extern int ext4_extent_tree_init(handle_t *, struct inode *);
-+extern int ext4_ext_calc_credits_for_insert(struct inode *,
-+                                          struct ext4_ext_path *);
- extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
-                                                  int num,
-                                                  struct ext4_ext_path *path);
-Index: linux-stage/fs/ext4/mballoc.c
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.c
-+++ linux-stage/fs/ext4/mballoc.c
-@@ -4313,6 +4313,7 @@ repeat:
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
- }
-+EXPORT_SYMBOL(ext4_discard_preallocations); 
- /*
-  * finds all preallocated spaces and return blocks being freed to them
-@@ -5127,3 +5128,6 @@ error_return:
-               kmem_cache_free(ext4_ac_cachep, ac);
-       return;
- }
-+
-+EXPORT_SYMBOL(ext4_free_blocks);
-+
-Index: linux-stage/fs/ext4/ext4_jbd2.c
-===================================================================
---- linux-stage.orig/fs/ext4/ext4_jbd2.c
-+++ linux-stage/fs/ext4/ext4_jbd2.c
-@@ -31,6 +31,7 @@ int __ext4_journal_get_write_access(cons
-       }
-       return err;
- }
-+EXPORT_SYMBOL(__ext4_journal_get_write_access);
- int __ext4_journal_forget(const char *where, handle_t *handle,
-                               struct buffer_head *bh)
-@@ -107,3 +108,4 @@ int __ext4_handle_dirty_metadata(const c
-       }
-       return err;
- }
-+EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -1528,6 +1528,8 @@ extern int ext4_mb_add_groupinfo(struct 
- extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
- extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
-                                               ext4_group_t, int);
-+extern void ext4_mb_discard_inode_preallocations(struct inode *);
-+
- /* inode.c */
- int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
-               struct buffer_head *bh, ext4_fsblk_t blocknr);
-Index: linux-stage/fs/ext4/inode.c
-===================================================================
---- linux-stage.orig/fs/ext4/inode.c
-+++ linux-stage/fs/ext4/inode.c
-@@ -5078,6 +5078,7 @@ bad_inode:
-       iget_failed(inode);
-       return ERR_PTR(ret);
- }
-+EXPORT_SYMBOL(ext4_iget);
- static int ext4_inode_blocks_set(handle_t *handle,
-                               struct ext4_inode *raw_inode,
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -90,6 +90,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct su
-               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
- }
-+EXPORT_SYMBOL(ext4_inode_bitmap);
- ext4_fsblk_t ext4_inode_table(struct super_block *sb,
-                             struct ext4_group_desc *bg)
-@@ -114,6 +115,7 @@ __u32 ext4_free_inodes_count(struct supe
-               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-                (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
- }
-+EXPORT_SYMBOL(ext4_itable_unused_count);
- __u32 ext4_used_dirs_count(struct super_block *sb,
-                             struct ext4_group_desc *bg)
-@@ -1489,9 +1491,11 @@ enum {
-       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-       Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
-       Opt_usrquota, Opt_grpquota, Opt_i_version,
-+      Opt_mballoc, Opt_extents,
-       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_block_validity, Opt_noblock_validity,
--      Opt_inode_readahead_blks, Opt_journal_ioprio
-+      Opt_inode_readahead_blks, Opt_journal_ioprio,
-+      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
- };
- static match_table_t tokens = {
-@@ -1547,6 +1551,11 @@ static match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_i_version, "i_version"},
-+      {Opt_mballoc, "mballoc"},
-+      {Opt_extents, "extents"},
-+      {Opt_iopen, "iopen"},
-+      {Opt_noiopen, "noiopen"},
-+      {Opt_iopen_nopriv, "iopen_nopriv"},
-       {Opt_stripe, "stripe=%u"},
-       {Opt_resize, "resize"},
-       {Opt_delalloc, "delalloc"},
-@@ -1993,6 +2002,12 @@ set_qf_format:
-                       else
-                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
-                       break;
-+              case Opt_mballoc:
-+              case Opt_extents:
-+              case Opt_iopen:
-+              case Opt_noiopen:
-+              case Opt_iopen_nopriv:
-+                      break;
-               default:
-                       ext4_msg(sb, KERN_ERR,
-                              "Unrecognized mount option \"%s\" "
-@@ -2543,7 +2558,7 @@ static ssize_t delayed_allocation_blocks
-                                             char *buf)
- {
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
--                      (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
-+                      (unsigned long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
- }
- static ssize_t session_write_kbytes_show(struct ext4_attr *a,
-@@ -2564,11 +2579,11 @@ static ssize_t lifetime_write_kbytes_sho
-       struct super_block *sb = sbi->s_buddy_cache->i_sb;
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
--                      sbi->s_kbytes_written + 
-+                      (unsigned long long)(sbi->s_kbytes_written + 
-                       (sb->s_bdev->bd_part ?
-                       (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
-                         EXT4_SB(sb)->s_sectors_written_start) >> 1
--                      : 0));
-+                      : 0)));
- }
- static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
-@@ -3042,7 +3057,7 @@ static int ext4_fill_super(struct super_
-       if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-               ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
-                      "exceeds size of device (%llu blocks)",
--                     ext4_blocks_count(es), blocks_count);
-+                     ext4_blocks_count(es), (unsigned long long)blocks_count);
-               goto failed_mount;
-       }
-Index: linux-stage/fs/ext4/fsync.c
-===================================================================
---- linux-stage.orig/fs/ext4/fsync.c
-+++ linux-stage/fs/ext4/fsync.c
-@@ -61,7 +61,7 @@ int ext4_sync_file(struct file *file, st
-       trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
-                  inode->i_sb->s_id, datasync, inode->i_ino,
--                 dentry->d_parent->d_inode->i_ino);
-+                 0L);
-       ret = flush_aio_dio_completed_IO(inode);
-       if (ret < 0)
-Index: linux-stage/fs/ext4/move_extent.c
-===================================================================
---- linux-stage.orig/fs/ext4/move_extent.c
-+++ linux-stage/fs/ext4/move_extent.c
-@@ -1358,7 +1358,8 @@ ext4_move_extents(struct file *o_filp, s
-                               ext4_error(orig_inode->i_sb,
-                                       "We replaced blocks too much! "
-                                       "sum of replaced: %llu requested: %llu",
--                                      *moved_len, len);
-+                                      (unsigned long long)(*moved_len),
-+                                      (unsigned long long)(len));
-                               ret1 = -EIO;
-                               break;
-                       }
diff --git a/ldiskfs/kernel_patches/patches/ext4-mmp-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-mmp-rhel5.patch
deleted file mode 100644 (file)
index d028930..0000000
+++ /dev/null
@@ -1,578 +0,0 @@
-Prevent an ext4 filesystem from being mounted multiple times.
-A sequence number is stored on disk and is periodically updated (every 5
-seconds by default) by a mounted filesystem.
-At mount time, we now wait for s_mmp_update_interval seconds to make sure
-that the MMP sequence does not change.
-In case of failure, the nodename, bdevname and the time at which the MMP
-block was last updated is displayed.
-Move all mmp code to a dedicated file (mmp.c).
-
-Signed-off-by: Andreas Dilger <adilger <at> whamcloud.com>
-Signed-off-by: Johann Lombardi <johann <at> whamcloud.com>
----
- fs/ext4/Makefile |    3 +-
- fs/ext4/ext4.h   |   76 ++++++++++++-
- fs/ext4/mmp.c    |  351 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
- fs/ext4/super.c  |   18 +++-
- 4 files changed, 444 insertions(+), 4 deletions(-)
- create mode 100644 fs/ext4/mmp.c
-
-Index: linux-stage/fs/ext4/Makefile
-===================================================================
---- linux-stage.orig/fs/ext4/Makefile
-+++ linux-stage/fs/ext4/Makefile
-@@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
- ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
--              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
-+              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
-+              mmp.o
- ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
- ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -878,7 +878,7 @@ struct ext4_super_block {
-       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
-       __le32  s_flags;                /* Miscellaneous flags */
-       __le16  s_raid_stride;          /* RAID stride */
--      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
-+      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
-       __le64  s_mmp_block;            /* Block for multi-mount protection */
-       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
-       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
-@@ -1032,6 +1032,9 @@ struct ext4_sb_info {
-       /* workqueue for dio unwritten */
-       struct workqueue_struct *dio_unwritten_wq;
-+
-+      /* Kernel thread for multiple mount protection */
-+      struct task_struct *s_mmp_tsk;
- };
- static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
-@@ -1169,7 +1172,8 @@ static inline void ext4_clear_inode_stat
-                                        EXT4_FEATURE_INCOMPAT_META_BG| \
-                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
-                                        EXT4_FEATURE_INCOMPAT_64BIT| \
--                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
-+                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-+                                       EXT4_FEATURE_INCOMPAT_MMP)
- #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
-                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
-@@ -1376,6 +1380,67 @@ void ext4_get_group_no_and_offset(struct
- extern struct proc_dir_entry *ext4_proc_root;
- /*
-+ * This structure will be used for multiple mount protection. It will be
-+ * written into the block number saved in the s_mmp_block field in the
-+ * superblock. Programs that check MMP should assume that if
-+ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
-+ * to use the filesystem, regardless of how old the timestamp is.
-+ */
-+#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
-+#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
-+#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
-+#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
-+
-+struct mmp_struct {
-+       __le32  mmp_magic;              /* Magic number for MMP */
-+       __le32  mmp_seq;                /* Sequence no. updated periodically */
-+
-+       /*
-+        * mmp_time, mmp_nodename & mmp_bdevname are only used for information
-+        * purposes and do not affect the correctness of the algorithm
-+        */
-+       __le64  mmp_time;               /* Time last updated */
-+       char    mmp_nodename[64];       /* Node which last updated MMP block */
-+       char    mmp_bdevname[32];       /* Bdev which last updated MMP block */
-+
-+       /*
-+        * mmp_check_interval is used to verify if the MMP block has been
-+        * updated on the block device. The value is updated based on the
-+        * maximum time to write the MMP block during an update cycle.
-+        */
-+       __le16  mmp_check_interval;
-+
-+       __le16  mmp_pad1;
-+       __le32  mmp_pad2[227];
-+};
-+
-+/* arguments passed to the mmp thread */
-+struct mmpd_data {
-+       struct buffer_head *bh; /* bh from initial read_mmp_block() */
-+       struct super_block *sb;  /* super block of the fs */
-+};
-+
-+/*
-+ * Check interval multiplier
-+ * The MMP block is written every update interval and initially checked every
-+ * update interval x the multiplier (the value is then adapted based on the
-+ * write latency). The reason is that writes can be delayed under load and we
-+ * don't want readers to incorrectly assume that the filesystem is no longer
-+ * in use.
-+ */
-+#define EXT4_MMP_CHECK_MULT            2UL
-+
-+/*
-+ * Minimum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MIN_CHECK_INTERVAL    5UL
-+
-+/*
-+ * Maximum interval for MMP checking in seconds.
-+ */
-+#define EXT4_MMP_MAX_CHECK_INTERVAL    300UL
-+
-+/*
-  * Function prototypes
-  */
-@@ -1547,6 +1612,10 @@ extern void __ext4_warning(struct super_
- #define ext4_warning(sb, message...)   __ext4_warning(sb, __func__, ## message)
- extern void ext4_msg(struct super_block *, const char *, const char *, ...)
-       __attribute__ ((format (printf, 3, 4)));
-+extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
-+                         const char *, const char *);
-+#define dump_mmp_msg(sb, mmp, msg)     __dump_mmp_msg(sb, mmp, __func__, msg)
-+
- extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
-                               const char *, const char *, ...)
-       __attribute__ ((format (printf, 4, 5)));
-@@ -1784,6 +1853,9 @@ static inline void ext4_unlock_group(str
-       spin_unlock(ext4_group_lock_ptr(sb, group));
- }
-+/* mmp.c */
-+extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
-+
- /*
-  * Inodes and files operations
-  */
-Index: linux-stage/fs/ext4/mmp.c
-===================================================================
---- /dev/null
-+++ linux-stage/fs/ext4/mmp.c
-@@ -0,0 +1,351 @@
-+#include <linux/fs.h>
-+#include <linux/random.h>
-+#include <linux/buffer_head.h>
-+#include <linux/utsname.h>
-+#include <linux/kthread.h>
-+
-+#include "ext4.h"
-+
-+/*
-+ * Write the MMP block using WRITE_SYNC to try to get the block on-disk
-+ * faster.
-+ */
-+static int write_mmp_block(struct buffer_head *bh)
-+{
-+       mark_buffer_dirty(bh);
-+       lock_buffer(bh);
-+       bh->b_end_io = end_buffer_write_sync;
-+       get_bh(bh);
-+       submit_bh(WRITE_SYNC, bh);
-+       wait_on_buffer(bh);
-+       if (unlikely(!buffer_uptodate(bh)))
-+               return 1;
-+
-+       return 0;
-+}
-+
-+/*
-+ * Read the MMP block. It _must_ be read from disk and hence we clear the
-+ * uptodate flag on the buffer.
-+ */
-+static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
-+                         ext4_fsblk_t mmp_block)
-+{
-+       struct mmp_struct *mmp;
-+
-+       if (*bh)
-+               clear_buffer_uptodate(*bh);
-+
-+       /* This would be sb_bread(sb, mmp_block), except we need to be sure
-+        * that the MD RAID device cache has been bypassed, and that the read
-+        * is not blocked in the elevator. */
-+       if (!*bh)
-+               *bh = sb_getblk(sb, mmp_block);
-+       if (*bh) {
-+               get_bh(*bh);
-+               lock_buffer(*bh);
-+               (*bh)->b_end_io = end_buffer_read_sync;
-+               submit_bh(READ_SYNC, *bh);
-+               wait_on_buffer(*bh);
-+               if (!buffer_uptodate(*bh)) {
-+                       brelse(*bh);
-+                       *bh = NULL;
-+               }
-+       }
-+       if (!*bh) {
-+               ext4_warning(sb, "Error while reading MMP block %llu",
-+                            mmp_block);
-+               return -EIO;
-+       }
-+
-+       mmp = (struct mmp_struct *)((*bh)->b_data);
-+       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
-+               return -EINVAL;
-+
-+       return 0;
-+}
-+
-+/*
-+ * Dump as much information as possible to help the admin.
-+ */
-+void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
-+                   const char *function, const char *msg)
-+{
-+       __ext4_warning(sb, function, "%s", msg);
-+       __ext4_warning(sb, function,
-+                      "MMP failure info: last update time: %llu, last update "
-+                      "node: %s, last update device: %s\n",
-+                      (long long unsigned int) le64_to_cpu(mmp->mmp_time),
-+                      mmp->mmp_nodename, mmp->mmp_bdevname);
-+}
-+
-+/*
-+ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
-+ */
-+static int kmmpd(void *data)
-+{
-+       struct super_block *sb = ((struct mmpd_data *) data)->sb;
-+       struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
-+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+       struct mmp_struct *mmp;
-+       ext4_fsblk_t mmp_block;
-+       u32 seq = 0;
-+       unsigned long failed_writes = 0;
-+       int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
-+       unsigned mmp_check_interval;
-+       unsigned long last_update_time;
-+       unsigned long diff;
-+       int retval;
-+
-+       mmp_block = le64_to_cpu(es->s_mmp_block);
-+       mmp = (struct mmp_struct *)(bh->b_data);
-+       mmp->mmp_time = cpu_to_le64(get_seconds());
-+       /*
-+        * Start with the higher mmp_check_interval and reduce it if
-+        * the MMP block is being updated on time.
-+        */
-+       mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
-+                                EXT4_MMP_MIN_CHECK_INTERVAL);
-+       mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+       bdevname(bh->b_bdev, mmp->mmp_bdevname);
-+
-+       memcpy(mmp->mmp_nodename, init_utsname()->sysname,
-+              sizeof(mmp->mmp_nodename));
-+
-+       while (!kthread_should_stop()) {
-+               if (++seq > EXT4_MMP_SEQ_MAX)
-+                       seq = 1;
-+
-+               mmp->mmp_seq = cpu_to_le32(seq);
-+               mmp->mmp_time = cpu_to_le64(get_seconds());
-+               last_update_time = jiffies;
-+
-+               retval = write_mmp_block(bh);
-+               /*
-+                * Don't spew too many error messages. Print one every
-+                * (s_mmp_update_interval * 60) seconds.
-+                */
-+               if (retval) {
-+                       if ((failed_writes % 60) == 0)
-+                               ext4_error(sb, "Error writing to MMP block");
-+                       failed_writes++;
-+               }
-+
-+               if (!(le32_to_cpu(es->s_feature_incompat) &
-+                   EXT4_FEATURE_INCOMPAT_MMP)) {
-+                       ext4_warning(sb, "kmmpd being stopped since MMP feature"
-+                                    " has been disabled.");
-+                       EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                       goto failed;
-+               }
-+
-+               if (sb->s_flags & MS_RDONLY) {
-+                       ext4_warning(sb, "kmmpd being stopped since filesystem "
-+                                    "has been remounted as readonly.");
-+                       EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                       goto failed;
-+               }
-+
-+               diff = jiffies - last_update_time;
-+               if (diff < mmp_update_interval * HZ)
-+                       schedule_timeout_interruptible(mmp_update_interval *
-+                                                      HZ - diff);
-+
-+               /*
-+                * We need to make sure that more than mmp_check_interval
-+                * seconds have not passed since writing. If that has happened
-+                * we need to check if the MMP block is as we left it.
-+                */
-+               diff = jiffies - last_update_time;
-+               if (diff > mmp_check_interval * HZ) {
-+                       struct buffer_head *bh_check = NULL;
-+                       struct mmp_struct *mmp_check;
-+
-+                       retval = read_mmp_block(sb, &bh_check, mmp_block);
-+                       if (retval) {
-+                               ext4_error(sb, "error reading MMP data: %d",
-+                                          retval);
-+
-+                               EXT4_SB(sb)->s_mmp_tsk = NULL;
-+                               goto failed;
-+                       }
-+
-+                       mmp_check = (struct mmp_struct *)(bh_check->b_data);
-+                       if (mmp->mmp_seq != mmp_check->mmp_seq ||
-+                           memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
-+                                  sizeof(mmp->mmp_nodename))) {
-+                               dump_mmp_msg(sb, mmp_check,
-+                                            "Error while updating MMP info. "
-+                                            "The filesystem seems to have been"
-+                                            " multiply mounted.");
-+                               ext4_error(sb, "abort");
-+                               goto failed;
-+                       }
-+                       put_bh(bh_check);
-+               }
-+
-+                /*
-+                * Adjust the mmp_check_interval depending on how much time
-+                * it took for the MMP block to be written.
-+                */
-+               mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
-+                                            EXT4_MMP_MAX_CHECK_INTERVAL),
-+                                        EXT4_MMP_MIN_CHECK_INTERVAL);
-+               mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
-+       }
-+
-+       /*
-+        * Unmount seems to be clean.
-+        */
-+       mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
-+       mmp->mmp_time = cpu_to_le64(get_seconds());
-+
-+       retval = write_mmp_block(bh);
-+
-+failed:
-+       kfree(data);
-+       brelse(bh);
-+       return retval;
-+}
-+
-+/*
-+ * Get a random new sequence number but make sure it is not greater than
-+ * EXT4_MMP_SEQ_MAX.
-+ */
-+static unsigned int mmp_new_seq(void)
-+{
-+       u32 new_seq;
-+
-+       do {
-+               get_random_bytes(&new_seq, sizeof(u32));
-+       } while (new_seq > EXT4_MMP_SEQ_MAX);
-+
-+       return new_seq;
-+}
-+
-+/*
-+ * Protect the filesystem from being mounted more than once.
-+ */
-+int ext4_multi_mount_protect(struct super_block *sb,
-+                                   ext4_fsblk_t mmp_block)
-+{
-+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-+       struct buffer_head *bh = NULL;
-+       struct mmp_struct *mmp = NULL;
-+       struct mmpd_data *mmpd_data;
-+       u32 seq;
-+       unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
-+       unsigned int wait_time = 0;
-+       int retval;
-+
-+       if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
-+           mmp_block >= ext4_blocks_count(es)) {
-+               ext4_warning(sb, "Invalid MMP block in superblock");
-+               goto failed;
-+       }
-+
-+       retval = read_mmp_block(sb, &bh, mmp_block);
-+       if (retval)
-+               goto failed;
-+
-+       mmp = (struct mmp_struct *)(bh->b_data);
-+
-+       if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
-+               mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
-+
-+       /*
-+        * If check_interval in MMP block is larger, use that instead of
-+        * update_interval from the superblock.
-+        */
-+       if (mmp->mmp_check_interval > mmp_check_interval)
-+               mmp_check_interval = mmp->mmp_check_interval;
-+
-+       seq = le32_to_cpu(mmp->mmp_seq);
-+       if (seq == EXT4_MMP_SEQ_CLEAN)
-+               goto skip;
-+
-+       if (seq == EXT4_MMP_SEQ_FSCK) {
-+               dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
-+               goto failed;
-+       }
-+
-+       wait_time = min(mmp_check_interval * 2 + 1,
-+                       mmp_check_interval + 60);
-+
-+       /* Print MMP interval if more than 20 secs. */
-+       if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
-+               ext4_warning(sb, "MMP interval %u higher than expected, please"
-+                            " wait.\n", wait_time * 2);
-+
-+       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
-+               goto failed;
-+       }
-+
-+       retval = read_mmp_block(sb, &bh, mmp_block);
-+       if (retval)
-+               goto failed;
-+       mmp = (struct mmp_struct *)(bh->b_data);
-+       if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+               dump_mmp_msg(sb, mmp,
-+                            "Device is already active on another node.");
-+               goto failed;
-+       }
-+
-+skip:
-+       /*
-+        * write a new random sequence number.
-+        */
-+       mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
-+
-+       retval = write_mmp_block(bh);
-+       if (retval)
-+               goto failed;
-+
-+       /*
-+        * wait for MMP interval and check mmp_seq.
-+        */
-+       if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
-+               ext4_warning(sb, "MMP startup interrupted, failing mount\n");
-+               goto failed;
-+       }
-+
-+       retval = read_mmp_block(sb, &bh, mmp_block);
-+       if (retval)
-+               goto failed;
-+       mmp = (struct mmp_struct *)(bh->b_data);
-+       if (seq != le32_to_cpu(mmp->mmp_seq)) {
-+               dump_mmp_msg(sb, mmp,
-+                            "Device is already active on another node.");
-+               goto failed;
-+       }
-+
-+       mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
-+       if (!mmpd_data) {
-+               ext4_warning(sb, "not enough memory for mmpd_data");
-+               goto failed;
-+       }
-+       mmpd_data->sb = sb;
-+       mmpd_data->bh = bh;
-+
-+       /*
-+        * Start a kernel thread to update the MMP block periodically.
-+        */
-+       EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
-+                                            bdevname(bh->b_bdev,
-+                                                     mmp->mmp_bdevname));
-+       if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
-+               EXT4_SB(sb)->s_mmp_tsk = NULL;
-+               kfree(mmpd_data);
-+               ext4_warning(sb, "Unable to create kmmpd thread for %s.",
-+                            sb->s_id);
-+               goto failed;
-+       }
-+
-+       return 0;
-+
-+failed:
-+       brelse(bh);
-+       return 1;
-+}
-+
-+
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -40,6 +40,8 @@
- #include <linux/log2.h>
- #include <linux/crc16.h>
- #include <asm/uaccess.h>
-+#include <linux/kthread.h>
-+#include <linux/utsname.h>
- #include "ext4.h"
- #include "ext4_jbd2.h"
-@@ -698,6 +700,8 @@ static void ext4_put_super(struct super_
-               invalidate_bdev(sbi->journal_bdev, 0);
-               ext4_blkdev_remove(sbi);
-       }
-+      if (sbi->s_mmp_tsk)
-+              kthread_stop(sbi->s_mmp_tsk);
-       sb->s_fs_info = NULL;
-       /*
-        * Now that we are completely done shutting down the
-@@ -2810,6 +2814,11 @@ static int ext4_fill_super(struct super_
-                         EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                   EXT4_FEATURE_INCOMPAT_RECOVER));
-+      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
-+          !(sb->s_flags & MS_RDONLY))
-+              if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
-+                      goto failed_mount3;
-+
-       /*
-        * The first inode we look at is the journal inode.  Don't try
-        * root first: it may be modified in the journal!
-@@ -3048,6 +3057,8 @@ failed_mount3:
-       percpu_counter_destroy(&sbi->s_freeinodes_counter);
-       percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
-+      if (sbi->s_mmp_tsk)
-+              kthread_stop(sbi->s_mmp_tsk);
- failed_mount2:
-       for (i = 0; i < db_count; i++)
-               brelse(sbi->s_group_desc[i]);
-@@ -3557,7 +3568,7 @@ static int ext4_remount(struct super_blo
-       struct ext4_mount_options old_opts;
-       ext4_group_t g;
-       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
--      int err;
-+      int err = 0;
- #ifdef CONFIG_QUOTA
-       int i;
- #endif
-@@ -3676,6 +3687,13 @@ static int ext4_remount(struct super_blo
-                               goto restore_opts;
-                       if (!ext4_setup_super(sb, es, 0))
-                               sb->s_flags &= ~MS_RDONLY;
-+                      if (EXT4_HAS_INCOMPAT_FEATURE(sb,
-+                                              EXT4_FEATURE_INCOMPAT_MMP))
-+                              if (ext4_multi_mount_protect(sb,
-+                                      le64_to_cpu(es->s_mmp_block))) {
-+                                      err = -EROFS;
-+                                      goto restore_opts;
-+                              }
-               }
-       }
-       ext4_setup_system_zone(sb);
diff --git a/ldiskfs/kernel_patches/patches/ext4-osd-iam-exports.patch b/ldiskfs/kernel_patches/patches/ext4-osd-iam-exports.patch
deleted file mode 100644 (file)
index 6b65eb0..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-diff -rupN 2.6.27.21_2/fs/ext4/ext4.h 2.6.27.21_3/fs/ext4/ext4.h
---- 2.6.27.21_2/fs/ext4/ext4.h 2009-07-17 12:19:59.000000000 +0530
-+++ 2.6.27.21_3/fs/ext4/ext4.h 2009-07-17 12:38:59.000000000 +0530
-@@ -1181,6 +1181,9 @@ extern int ext4_orphan_add(handle_t *, s
- extern int ext4_orphan_del(handle_t *, struct inode *);
- extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-                               __u32 start_minor_hash, __u32 *next_hash);
-+extern struct buffer_head *ext4_append(handle_t *handle,
-+                                     struct inode *inode,
-+                                     ext4_lblk_t *block, int *err);
- /* resize.c */
- extern int ext4_group_add(struct super_block *sb,
-diff -rupN 2.6.27.21_2/fs/ext4/hash.c 2.6.27.21_3/fs/ext4/hash.c
---- 2.6.27.21_2/fs/ext4/hash.c 2009-07-17 12:12:56.000000000 +0530
-+++ 2.6.27.21_3/fs/ext4/hash.c 2009-07-17 12:40:22.000000000 +0530
-@@ -9,6 +9,7 @@
-  * License.
-  */
-+#include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/jbd2.h>
- #include <linux/cryptohash.h>
-@@ -206,3 +207,4 @@ int ext4fs_dirhash(const char *name, int
-       hinfo->minor_hash = minor_hash;
-       return 0;
- }
-+EXPORT_SYMBOL(ext4fs_dirhash);
-diff -rupN 2.6.27.21_2/fs/ext4/namei.c 2.6.27.21_3/fs/ext4/namei.c
---- 2.6.27.21_2/fs/ext4/namei.c        2009-07-17 12:23:51.000000000 +0530
-+++ 2.6.27.21_3/fs/ext4/namei.c        2009-07-17 12:37:59.000000000 +0530
-@@ -51,9 +51,9 @@
- #define NAMEI_RA_SIZE      (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
--static struct buffer_head *ext4_append(handle_t *handle,
--                                      struct inode *inode,
--                                      ext4_lblk_t *block, int *err)
-+struct buffer_head *ext4_append(handle_t *handle,
-+                              struct inode *inode,
-+                              ext4_lblk_t *block, int *err)
- {
-       struct buffer_head *bh;
-       struct ext4_inode_info *ei = EXT4_I(inode);
-@@ -72,6 +72,7 @@ static struct buffer_head *ext4_append(h
-       up(&ei->i_append_sem);
-       return bh;
- }
-+EXPORT_SYMBOL(ext4_append);
- #ifndef assert
- #define assert(test) J_ASSERT(test)
-diff -rupN 2.6.27.21_2/fs/ext4/super.c 2.6.27.21_3/fs/ext4/super.c
---- 2.6.27.21_2/fs/ext4/super.c        2009-07-17 12:12:57.000000000 +0530
-+++ 2.6.27.21_3/fs/ext4/super.c        2009-07-17 12:40:52.000000000 +0530
-@@ -377,6 +377,7 @@ void __ext4_std_error(struct super_block
-       ext4_handle_error(sb);
- }
-+EXPORT_SYMBOL(__ext4_std_error);
- /*
-  * ext4_abort is a much stronger failure handler than ext4_error.  The
diff --git a/ldiskfs/kernel_patches/patches/ext4-osd-iop-common.patch b/ldiskfs/kernel_patches/patches/ext4-osd-iop-common.patch
deleted file mode 100644 (file)
index bc2a345..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/ext4.h linux-2.6.27.21-0.1_2//fs/ext4/ext4.h
---- linux-2.6.27.21-0.1_1//fs/ext4/ext4.h      2009-08-24 15:32:00.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/ext4.h      2009-08-24 15:32:55.000000000 +0530
-@@ -1171,6 +1171,19 @@ extern int ext4_fiemap(struct inode *, s
- /* migrate.c */
- extern int ext4_ext_migrate(struct inode *);
- /* namei.c */
-+extern struct inode *ext4_create_inode(handle_t *handle,
-+                                        struct inode * dir, int mode);
-+extern int ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+                           struct inode *inode);
-+extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
-+                              struct ext4_dir_entry_2 * de_del,
-+                              struct buffer_head * bh);
-+extern struct buffer_head * ext4_find_entry(struct inode *dir,
-+                                          const struct qstr *d_name,
-+                                          struct ext4_dir_entry_2 ** res_dir);
-+#define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
-+extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
-+                                struct inode *inode);
- extern int ext4_orphan_add(handle_t *, struct inode *);
- extern int ext4_orphan_del(handle_t *, struct inode *);
- extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/namei.c linux-2.6.27.21-0.1_2//fs/ext4/namei.c
---- linux-2.6.27.21-0.1_1//fs/ext4/namei.c     2009-08-24 15:32:00.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/namei.c     2009-08-24 15:43:56.000000000 +0530
-@@ -24,6 +24,7 @@
-  *    Theodore Ts'o, 2002
-  */
-+#include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/pagemap.h>
- #include <linux/jbd2.h>
-@@ -882,9 +883,9 @@ static inline int search_dirblock(struct
-  * The returned buffer_head has ->b_count elevated.  The caller is expected
-  * to brelse() it when appropriate.
-  */
--static struct buffer_head * ext4_find_entry (struct inode *dir,
--                                      const struct qstr *d_name,
--                                      struct ext4_dir_entry_2 ** res_dir)
-+struct buffer_head * ext4_find_entry(struct inode *dir,
-+                                    const struct qstr *d_name,
-+                                    struct ext4_dir_entry_2 ** res_dir)
- {
-       struct super_block *sb;
-       struct buffer_head *bh_use[NAMEI_RA_SIZE];
-@@ -991,6 +992,7 @@ cleanup_and_exit:
-               brelse(bh_use[ra_ptr]);
-       return ret;
- }
-+EXPORT_SYMBOL(ext4_find_entry);
- static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
-                      struct ext4_dir_entry_2 **res_dir, int *err)
-@@ -1511,8 +1513,8 @@ static int make_indexed_dir(handle_t *ha
-  * may not sleep between calling this and putting something into
-  * the entry, as someone else might have used it while you slept.
-  */
--static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
--                        struct inode *inode)
-+int ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+                 struct inode *inode)
- {
-       struct inode *dir = dentry->d_parent->d_inode;
-       struct buffer_head *bh;
-@@ -1557,6 +1559,7 @@ static int ext4_add_entry(handle_t *hand
-       de->rec_len = ext4_rec_len_to_disk(blocksize);
-       return add_dirent_to_buf(handle, dentry, inode, de, bh);
- }
-+EXPORT_SYMBOL(ext4_add_entry);
- /*
-  * Returns 0 for success, or a negative error value
-@@ -1699,10 +1702,10 @@ cleanup:
-  * ext4_delete_entry deletes a directory entry by merging it with the
-  * previous entry
-  */
--static int ext4_delete_entry(handle_t *handle,
--                           struct inode *dir,
--                           struct ext4_dir_entry_2 *de_del,
--                           struct buffer_head *bh)
-+int ext4_delete_entry(handle_t *handle,
-+                    struct inode *dir,
-+                    struct ext4_dir_entry_2 *de_del,
-+                    struct buffer_head *bh)
- {
-       struct ext4_dir_entry_2 *de, *pde;
-       int i;
-@@ -1733,7 +1736,7 @@ static int ext4_delete_entry(handle_t *h
-       }
-       return -ENOENT;
- }
--
-+EXPORT_SYMBOL(ext4_delete_entry);
- /*
-  * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
-  * since this indicates that nlinks count was previously 1.
-@@ -1796,6 +1799,26 @@ static unsigned ext4_dentry_goal(struct
-       return inum;
- }
-+struct inode * ext4_create_inode(handle_t *handle, struct inode * dir, int mode)
-+{
-+      struct inode *inode;
-+
-+      inode = ext4_new_inode(handle, dir, mode, NULL, 0);
-+      if (!IS_ERR(inode)) {
-+              if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
-+#ifdef CONFIG_LDISKFS_FS_XATTR
-+                      inode->i_op = &ext4_special_inode_operations;
-+#endif
-+              } else {
-+                      inode->i_op = &ext4_file_inode_operations;
-+                      inode->i_fop = &ext4_file_operations;
-+                      ext4_set_aops(inode);
-+              }
-+      }
-+      return inode;
-+}
-+EXPORT_SYMBOL(ext4_create_inode);
-+
- /*
-  * By the time this is called, we already have created
-  * the directory cache entry for the new file, but it
-@@ -1872,40 +1895,32 @@ retry:
-       return err;
- }
--static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-+/* Initialize @inode as a subdirectory of @dir, and add the
-+ * "." and ".." entries into the first directory block. */
-+int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
-+                      struct inode *inode)
- {
--      handle_t *handle;
--      struct inode *inode;
--      struct buffer_head *dir_block;
--      struct ext4_dir_entry_2 *de;
-       unsigned int blocksize = dir->i_sb->s_blocksize;
--      int err, retries = 0;
--
--      if (EXT4_DIR_LINK_MAX(dir))
--              return -EMLINK;
-+      struct buffer_head * dir_block;
-+      struct ext4_dir_entry_2 * de;
-+      int err = 0;
--retry:
--      handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
--                                      EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
--                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-       if (IS_DIRSYNC(dir))
-               ext4_handle_sync(handle);
--      inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name,
--                             ext4_dentry_goal(dir->i_sb, dentry));
--      err = PTR_ERR(inode);
--      if (IS_ERR(inode))
--              goto out_stop;
--
-       inode->i_op = &ext4_dir_inode_operations;
-       inode->i_fop = &ext4_dir_operations;
-       inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-       dir_block = ext4_bread(handle, inode, 0, 1, &err);
--      if (!dir_block)
--              goto out_clear_inode;
-+      if (!dir_block) {
-+              clear_nlink(inode);
-+              ext4_mark_inode_dirty(handle, inode);
-+              iput (inode);
-+              goto get_out;
-+      }
-       BUFFER_TRACE(dir_block, "get_write_access");
-       ext4_journal_get_write_access(handle, dir_block);
-       de = (struct ext4_dir_entry_2 *) dir_block->b_data;
-@@ -1925,9 +1940,43 @@ retry:
-       ext4_journal_dirty_metadata(handle, dir_block);
-       brelse(dir_block);
-       ext4_mark_inode_dirty(handle, inode);
-+get_out:
-+      return err;
-+}
-+EXPORT_SYMBOL(ext4_add_dot_dotdot);
-+
-+
-+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-+{
-+      handle_t *handle;
-+      struct inode *inode;
-+      int err, retries = 0;
-+
-+      if (EXT4_DIR_LINK_MAX(dir))
-+              return -EMLINK;
-+
-+retry:
-+      handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
-+                                      EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-+                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
-+      if (IS_ERR(handle))
-+              return PTR_ERR(handle);
-+
-+      if (IS_DIRSYNC(dir))
-+              handle->h_sync = 1;
-+
-+      inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name,
-+                             ext4_dentry_goal(dir->i_sb, dentry));
-+      err = PTR_ERR(inode);
-+      if (IS_ERR(inode))
-+              goto out_stop;
-+
-+      err = ext4_add_dot_dotdot(handle, dir, inode);
-+      if (err)
-+              goto out_stop;
-+
-       err = ext4_add_entry(handle, dentry, inode);
-       if (err) {
--out_clear_inode:
-               clear_nlink(inode);
-               ext4_mark_inode_dirty(handle, inode);
-               iput(inode);
diff --git a/ldiskfs/kernel_patches/patches/ext4-pdir-fix.patch b/ldiskfs/kernel_patches/patches/ext4-pdir-fix.patch
deleted file mode 100644 (file)
index 32218cf..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/ext4_i.h linux-2.6.27.21-0.1_2//fs/ext4/ext4_i.h
---- linux-2.6.27.21-0.1_1//fs/ext4/ext4.h      2009-08-24 13:00:59.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/ext4.h      2009-08-24 13:01:25.000000000 +0530
-@@ -16,6 +16,7 @@
- #include <linux/blkdev.h>
- #include <linux/magic.h>
- #include <linux/jbd2.h>
-+#include <linux/dynlocks.h>
- #include <linux/quota.h>
- #include <linux/rwsem.h>
- #include <linux/rbtree.h>
-@@ -56,7 +57,9 @@ struct ext4_inode_info {
-       __u32   i_flags;
-       ext4_fsblk_t    i_file_acl;
-       __u32   i_dtime;
--
-+      /* following fields for parallel directory operations -bzzz */
-+      struct dynlock   i_htree_lock;
-+      struct semaphore i_append_sem;
-       /*
-        * i_block_group is the number of the block group which contains
-        * this file's inode.  Constant across the lifetime of the inode,
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/namei.c linux-2.6.27.21-0.1_2//fs/ext4/namei.c
---- linux-2.6.27.21-0.1_1//fs/ext4/namei.c     2009-08-24 13:00:59.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/namei.c     2009-08-24 13:03:45.000000000 +0530
-@@ -55,6 +55,11 @@ static struct buffer_head *ext4_append(h
-                                       ext4_lblk_t *block, int *err)
- {
-       struct buffer_head *bh;
-+      struct ext4_inode_info *ei = EXT4_I(inode);
-+
-+      /* with parallel dir operations all appends
-+      * have to be serialized -bzzz */
-+      down(&ei->i_append_sem);
-       *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-@@ -67,7 +72,9 @@ static struct buffer_head *ext4_append(h
-                       brelse(bh);
-                       bh = NULL;
-               }
-+              ei->i_disksize = inode->i_size;
-       }
-+      up(&ei->i_append_sem);
-       return bh;
- }
-diff -rupN linux-2.6.27.21-0.1_1//fs/ext4/super.c linux-2.6.27.21-0.1_2//fs/ext4/super.c
---- linux-2.6.27.21-0.1_1//fs/ext4/super.c     2009-08-24 13:00:59.000000000 +0530
-+++ linux-2.6.27.21-0.1_2//fs/ext4/super.c     2009-08-24 13:01:25.000000000 +0530
-@@ -635,6 +635,8 @@ static struct inode *ext4_alloc_inode(st
- #endif
-       ei->vfs_inode.i_version = 1;
-       ei->vfs_inode.i_data.writeback_index = 0;
-+      dynlock_init(&ei->i_htree_lock);
-+      sema_init(&ei->i_append_sem, 1);
-       memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
-       INIT_LIST_HEAD(&ei->i_prealloc_list);
-       spin_lock_init(&ei->i_prealloc_lock);
diff --git a/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch
deleted file mode 100644 (file)
index d7485f5..0000000
+++ /dev/null
@@ -1,378 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/ext4/super.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/super.c
-+++ linux-2.6.18-128.1.6/fs/ext4/super.c
-@@ -108,7 +108,8 @@
- EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
- EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
- EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
--EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
-+EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req);
-+EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
- EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
- EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size);
-@@ -108,7 +108,8 @@
-       ATTR_LIST(mb_max_to_scan),
-       ATTR_LIST(mb_min_to_scan),
-       ATTR_LIST(mb_order2_req),
--      ATTR_LIST(mb_stream_req),
-+      ATTR_LIST(mb_small_req),
-+      ATTR_LIST(mb_large_req),
-       ATTR_LIST(mb_group_prealloc),
-       ATTR_LIST(max_dir_size),
-       NULL,
-Index: linux-2.6.18-128.1.6/fs/ext4/ext4.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h   2009-05-28 17:16:51.000000000 +0530
-+++ linux-2.6.18-128.1.6/fs/ext4/ext4.h        2009-05-28 17:16:52.000000000 +0530
-@@ -108,11 +108,14 @@
-       /* tunables */
-       unsigned long s_stripe;
--      unsigned int s_mb_stream_request;
-+      unsigned long s_mb_small_req;
-+      unsigned long s_mb_large_req;
-       unsigned int s_mb_max_to_scan;
-       unsigned int s_mb_min_to_scan;
-       unsigned int s_mb_stats;
-       unsigned int s_mb_order2_reqs;
-+      unsigned long *s_mb_prealloc_table;
-+      unsigned long s_mb_prealloc_table_size;
-       unsigned int s_mb_group_prealloc;
-       /* where last allocation was done - for stream allocation */
-       unsigned long s_mb_last_group;
-Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/mballoc.c        2009-05-28 17:16:51.000000000 +0530
-+++ linux-2.6.18-128.1.6/fs/ext4/mballoc.c     2009-05-28 17:19:57.000000000 +0530
-@@ -2284,6 +2284,26 @@
-       }
- }
-+static void ext4_mb_prealloc_table_add(struct ext4_sb_info *sbi, int value)
-+{
-+      int i;
-+
-+      if (value > (sbi->s_blocks_per_group - 1 - 1 - sbi->s_itb_per_group))
-+              return;
-+
-+      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++) {
-+              if (sbi->s_mb_prealloc_table[i] == 0) {
-+                      sbi->s_mb_prealloc_table[i] = value;
-+                      return;
-+              }
-+
-+              /* they should add values in order */
-+              if (value <= sbi->s_mb_prealloc_table[i])
-+                      return;
-+      }
-+}
-+
-+
- static int ext4_mb_good_group(struct ext4_allocation_context *ac,
-                               ext4_group_t group, int cr)
- {
-@@ -2325,6 +2389,80 @@
-       .llseek         = seq_lseek,
-       .release        = seq_release,
- };
-+
-+#define EXT4_MB_PREALLOC_TABLE          "prealloc_table"
-+
-+static int ext4_mb_prealloc_table_proc_read(char *page, char **start, off_t off,
-+                                          int count, int *eof, void *data)
-+{
-+      struct ext4_sb_info *sbi = data;
-+      int len = 0;
-+      int i;
-+
-+      *eof = 1;
-+      if (off != 0)
-+              return 0;
-+
-+      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++)
-+              len += sprintf(page + len, "%ld ",
-+                             sbi->s_mb_prealloc_table[i]);
-+      len += sprintf(page + len, "\n");
-+
-+      *start = page;
-+      return len;
-+}
-+
-+static int ext4_mb_prealloc_table_proc_write(struct file *file,
-+                                           const char __user *buf,
-+                                           unsigned long cnt, void *data)
-+{
-+      struct ext4_sb_info *sbi = data;
-+      unsigned long value;
-+      unsigned long prev = 0;
-+      char str[128];
-+      char *cur;
-+      char *end;
-+      unsigned long *new_table;
-+      int num = 0;
-+      int i = 0;
-+
-+      if (cnt >= sizeof(str))
-+              return -EINVAL;
-+      if (copy_from_user(str, buf, cnt))
-+              return -EFAULT;
-+
-+      num = 0;
-+      cur = str;
-+      end = str + cnt;
-+      while (cur < end) {
-+              while ((cur < end) && (*cur == ' ')) cur++;
-+              value = simple_strtol(cur, &cur, 0);
-+              if (value == 0)
-+                      break;
-+              if (value <= prev)
-+                      return -EINVAL;
-+              prev = value;
-+              num++;
-+      }
-+
-+      new_table = kmalloc(num * sizeof(*new_table), GFP_KERNEL);
-+      if (new_table == NULL)
-+              return -ENOMEM;
-+      kfree(sbi->s_mb_prealloc_table);
-+      memset(new_table, 0, num * sizeof(*new_table));
-+      sbi->s_mb_prealloc_table = new_table;
-+      sbi->s_mb_prealloc_table_size = num;
-+      cur = str;
-+      end = str + cnt;
-+      while (cur < end && i < num) {
-+      while ((cur < end) && (*cur == ' ')) cur++;
-+              value = simple_strtol(cur, &cur, 0);
-+              ext4_mb_prealloc_table_add(sbi, value);
-+              i++;
-+      }
-+
-+      return cnt;
-+}
- static void ext4_mb_history_release(struct super_block *sb)
- {
-@@ -2400,6 +2400,7 @@
-               remove_proc_entry("mb_groups", sbi->s_proc);
-               if (sbi->s_mb_history_max)
-                       remove_proc_entry("mb_history", sbi->s_proc);
-+              remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc);
-       }
-       kfree(sbi->s_mb_history);
- }
-@@ -2408,6 +2446,13 @@
-                       p->proc_fops = &ext4_mb_seq_groups_fops;
-                       p->data = sb;
-               }
-+              p = create_proc_entry(EXT4_MB_PREALLOC_TABLE, S_IFREG |
-+                                    S_IRUGO | S_IWUSR, sbi->s_proc);
-+              if (p) {
-+                      p->data = sbi;
-+                      p->read_proc = ext4_mb_prealloc_table_proc_read;
-+                      p->write_proc = ext4_mb_prealloc_table_proc_write;
-+              }
-       }
-
-       sbi->s_mb_history_cur = 0;
-@@ -2542,13 +2562,57 @@
-       sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
-       sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
-       sbi->s_mb_stats = MB_DEFAULT_STATS;
--      sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
-       sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-       sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
--      sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
-+
-+      if (sbi->s_stripe == 0) {
-+              sbi->s_mb_prealloc_table_size = 10;
-+              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
-+              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
-+              if (sbi->s_mb_prealloc_table == NULL) {
-+                              kfree(sbi->s_mb_offsets);
-+                              kfree(sbi->s_mb_maxs);
-+                              return -ENOMEM;
-+              }
-+              memset(sbi->s_mb_prealloc_table, 0, i);
-+
-+              ext4_mb_prealloc_table_add(sbi, 4);
-+              ext4_mb_prealloc_table_add(sbi, 8);
-+              ext4_mb_prealloc_table_add(sbi, 16);
-+              ext4_mb_prealloc_table_add(sbi, 32);
-+              ext4_mb_prealloc_table_add(sbi, 64);
-+              ext4_mb_prealloc_table_add(sbi, 128);
-+              ext4_mb_prealloc_table_add(sbi, 256);
-+              ext4_mb_prealloc_table_add(sbi, 512);
-+              ext4_mb_prealloc_table_add(sbi, 1024);
-+              ext4_mb_prealloc_table_add(sbi, 2048);
-+
-+              sbi->s_mb_small_req = 256;
-+              sbi->s_mb_large_req = 1024;
-+              sbi->s_mb_group_prealloc = 512;
-+      } else {
-+              sbi->s_mb_prealloc_table_size = 3;
-+              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
-+              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
-+              if (sbi->s_mb_prealloc_table == NULL) {
-+                      kfree(sbi->s_mb_offsets);
-+                      kfree(sbi->s_mb_maxs);
-+                      return -ENOMEM;
-+              }
-+              memset(sbi->s_mb_prealloc_table, 0, i);
-+
-+              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe);
-+              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 2);
-+              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 4);
-+
-+              sbi->s_mb_small_req = sbi->s_stripe;
-+              sbi->s_mb_large_req = sbi->s_stripe * 8;
-+              sbi->s_mb_group_prealloc = sbi->s_stripe * 4;
-+      }
-       sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
-       if (sbi->s_locality_groups == NULL) {
-+              kfree(sbi->s_mb_prealloc_table);
-               kfree(sbi->s_mb_offsets);
-               kfree(sbi->s_mb_maxs);
-               return -ENOMEM;
-@@ -3032,11 +3186,12 @@
- ext4_mb_normalize_request(struct ext4_allocation_context *ac,
-                               struct ext4_allocation_request *ar)
- {
--      int bsbits, max;
-+      int bsbits, i, wind;
-       ext4_lblk_t end;
--      loff_t size, orig_size, start_off;
-+      loff_t size, orig_size;
-       ext4_lblk_t start, orig_start;
-       struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
-+      struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-       struct ext4_prealloc_space *pa;
-       /* do normalize only data requests, metadata requests
-@@ -3066,49 +3221,35 @@
-       size = size << bsbits;
-       if (size < i_size_read(ac->ac_inode))
-               size = i_size_read(ac->ac_inode);
-+      size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits;
--      /* max size of free chunks */
--      max = 2 << bsbits;
-+      start = wind = 0;
--#define NRL_CHECK_SIZE(req, size, max, chunk_size)    \
--              (req <= (size) || max <= (chunk_size))
-+      /* let's choose preallocation window depending on file size */
-+      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++) {
-+              if (size <= sbi->s_mb_prealloc_table[i]) {
-+                      wind = sbi->s_mb_prealloc_table[i];
-+                      break;
-+              }
-+      }
-+      size = wind;
--      /* first, try to predict filesize */
--      /* XXX: should this table be tunable? */
--      start_off = 0;
--      if (size <= 16 * 1024) {
--              size = 16 * 1024;
--      } else if (size <= 32 * 1024) {
--              size = 32 * 1024;
--      } else if (size <= 64 * 1024) {
--              size = 64 * 1024;
--      } else if (size <= 128 * 1024) {
--              size = 128 * 1024;
--      } else if (size <= 256 * 1024) {
--              size = 256 * 1024;
--      } else if (size <= 512 * 1024) {
--              size = 512 * 1024;
--      } else if (size <= 1024 * 1024) {
--              size = 1024 * 1024;
--      } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
--              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
--                                              (21 - bsbits)) << 21;
--              size = 2 * 1024 * 1024;
--      } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
--              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
--                                                      (22 - bsbits)) << 22;
--              size = 4 * 1024 * 1024;
--      } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
--                                      (8<<20)>>bsbits, max, 8 * 1024)) {
--              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
--                                                      (23 - bsbits)) << 23;
--              size = 8 * 1024 * 1024;
--      } else {
--              start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
--              size      = ac->ac_o_ex.fe_len << bsbits;
-+      if (wind == 0) {
-+              __u64 tstart, tend;
-+              /* file is quite large, we now preallocate with
-+               * the biggest configured window with regart to
-+               * logical offset */
-+              wind = sbi->s_mb_prealloc_table[i - 1];
-+              tstart = ac->ac_o_ex.fe_logical;
-+              do_div(tstart, wind);
-+              start = tstart * wind;
-+              tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1;
-+              do_div(tend, wind);
-+              tend = tend * wind + wind;
-+              size = tend - start;
-       }
--      orig_size = size = size >> bsbits;
--      orig_start = start = start_off >> bsbits;
-+      orig_size = size;
-+      orig_start = start;
-       /* don't cover already allocated blocks in selected range */
-       if (ar->pleft && start <= ar->lleft) {
-@@ -3185,7 +3326,6 @@
-       }
-       BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
-                       start > ac->ac_o_ex.fe_logical);
--      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
-       /* now prepare goal request */
-@@ -4077,11 +4217,17 @@
-       /* don't use group allocation for large files */
-       size = max(size, isize);
--      if (size > sbi->s_mb_stream_request) {
-+      if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) ||
-+          (size >= sbi->s_mb_large_req)) {
-               ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
-               return;
-       }
-+      /* request is so large that we don't care about
-+       * streaming - it overweights any possible seek */
-+      if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req)
-+              return;
-+
-       BUG_ON(ac->ac_lg != NULL);
-       /*
-        * locality group prealloc space are per cpu. The reason for having
-Index: linux-2.6.27.21-0.1/fs/ext4/inode.c
-===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c   2009-05-28 11:12:42.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/inode.c        2009-05-28 11:16:48.000000000 +0530
-@@ -2442,14 +2442,14 @@
-               return -EROFS;
-       /*
--       * Make sure nr_to_write is >= sbi->s_mb_stream_request
-+       * Make sure nr_to_write is >= sbi->s_mb_small_req
-        * This make sure small files blocks are allocated in
-        * single attempt. This ensure that small files
-        * get less fragmented.
-        */
--      if (wbc->nr_to_write < sbi->s_mb_stream_request) {
--              nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
--              wbc->nr_to_write = sbi->s_mb_stream_request;
-+      if (wbc->nr_to_write < sbi->s_mb_small_req) {
-+              nr_to_writebump = sbi->s_mb_small_req - wbc->nr_to_write;
-+              wbc->nr_to_write = sbi->s_mb_small_req;
-       }
-       if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-               range_whole = 1;
diff --git a/ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel5.patch
deleted file mode 100644 (file)
index 66ffd44..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-Index: linux-2.6.18.i386/fs/ext4/namei.c
-===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/namei.c
-+++ linux-2.6.18.i386/fs/ext4/namei.c
-@@ -374,8 +374,9 @@ dx_probe(struct dentry *dentry, struct i
-       if (root->info.hash_version != DX_HASH_TEA &&
-           root->info.hash_version != DX_HASH_HALF_MD4 &&
-           root->info.hash_version != DX_HASH_LEGACY) {
--              ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
--                           root->info.hash_version);
-+              ext4_warning(dir->i_sb, "Unrecognised inode hash code %d"
-+                           "for directory #%lu",
-+                           root->info.hash_version, dir->i_ino);
-               brelse(bh);
-               *err = ERR_BAD_DX_DIR;
-               goto fail;
diff --git a/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-quota-minimal-rhel5.patch
deleted file mode 100644 (file)
index 1e98c8f..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-Index: linux-2.6.18-238.12.1/fs/ext4/ext4.h
-===================================================================
---- linux-2.6.18-238.12.1.orig/fs/ext4/ext4.h  2011-09-21 17:55:44.627741549 +0200
-+++ linux-2.6.18-238.12.1/fs/ext4/ext4.h       2011-09-21 18:05:20.974106450 +0200
-@@ -971,6 +971,7 @@
- #ifdef CONFIG_QUOTA
-       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
-       int s_jquota_fmt;                       /* Format of quota to use */
-+      unsigned long s_qf_inums[MAXQUOTAS];    /* Quota file inodes */
- #endif
-       unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
-       struct rb_root system_blks;
-@@ -1171,6 +1172,7 @@
- #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM               0x0010
- #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
- #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE    0x0040
-+#define EXT4_FEATURE_RO_COMPAT_QUOTA          0x0100
- #define EXT4_FEATURE_INCOMPAT_COMPRESSION     0x0001
- #define EXT4_FEATURE_INCOMPAT_FILETYPE                0x0002
diff --git a/ldiskfs/kernel_patches/patches/ext4-version-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-version-2.6-rhel5.patch
deleted file mode 100644 (file)
index fe9cfeb..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/ext4/super.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/ext4/super.c  2009-07-24 01:33:54.000000000 -0400
-+++ linux-2.6.18-128.1.6/fs/ext4/super.c       2009-07-24 01:35:28.000000000 -0400
-@@ -3461,6 +3461,8 @@ static int __init init_ext4_fs(void)
-               goto out;
-       }
- #endif
-+
-+      printk(KERN_INFO "ldiskfs created from ""ext""4-2.6-rhel5\n");
-       return 0;
- out:
-       destroy_inodecache();
---- /dev/null   2009-09-21 17:11:24.467285554 +0800
-+++ linux-2.6.27.21-0.1/fs/ext4/fiemap.h
-@@ -0,0 +1,2 @@
-+
-+#include_next <fiemap.h>
diff --git a/ldiskfs/kernel_patches/patches/ext4-vmalloc-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-vmalloc-rhel5.patch
deleted file mode 100644 (file)
index e1fa436..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -662,7 +662,12 @@ static void ext4_put_super(struct super_
-       for (i = 0; i < sbi->s_gdb_count; i++)
-               brelse(sbi->s_group_desc[i]);
--      kfree(sbi->s_group_desc);
-+
-+      if (is_vmalloc_addr(sbi->s_group_desc))
-+              vfree(sbi->s_group_desc);
-+      else
-+              kfree(sbi->s_group_desc);
-+
-       if (is_vmalloc_addr(sbi->s_flex_groups))
-               vfree(sbi->s_flex_groups);
-       else
-@@ -2402,12 +2407,13 @@ static int ext4_fill_super(struct super_
-       unsigned long offset = 0;
-       unsigned long journal_devnum = 0;
-       unsigned long def_mount_opts;
--      struct inode *root;
-+      struct inode *root = NULL;
-       char *cp;
-       const char *descr;
-       int ret = -EINVAL;
-       int blocksize;
-       unsigned int db_count;
-+      size_t size;
-       unsigned int i;
-       int needs_recovery, has_huge_files;
-       __u64 blocks_count;
-@@ -2718,10 +2724,16 @@ static int ext4_fill_super(struct super_
-                       (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
-       db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
-                  EXT4_DESC_PER_BLOCK(sb);
--      sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
--                                  GFP_KERNEL);
-+      size = (size_t) db_count * sizeof(struct buffer_head *);
-+      sbi->s_group_desc = kzalloc(size, GFP_KERNEL);
-+      if (sbi->s_group_desc == NULL) {
-+              sbi->s_group_desc = vmalloc(size);
-+              if (sbi->s_group_desc != NULL)
-+                      memset(sbi->s_group_desc, 0, size);
-+      }
-       if (sbi->s_group_desc == NULL) {
--              ext4_msg(sb, KERN_ERR, "not enough memory");
-+              ext4_msg(sb, KERN_ERR, "not enough memory for %u groups (%u)\n",
-+                      sbi->s_groups_count, (unsigned int) size);
-               goto failed_mount;
-       }
-@@ -2907,17 +2919,16 @@ no_journal:
-       if (IS_ERR(root)) {
-               ext4_msg(sb, KERN_ERR, "get root inode failed");
-               ret = PTR_ERR(root);
-+              root = NULL;
-               goto failed_mount4;
-       }
-       if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
--              iput(root);
-               ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
-               goto failed_mount4;
-       }
-       sb->s_root = d_alloc_root(root);
-       if (!sb->s_root) {
-               ext4_msg(sb, KERN_ERR, "get root dentry failed");
--              iput(root);
-               ret = -ENOMEM;
-               goto failed_mount4;
-       }
-@@ -2968,6 +2979,7 @@ no_journal:
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",
-                        err);
-+              ret = err;
-               goto failed_mount4;
-       }
-@@ -3011,6 +3023,8 @@ cantfind_ext4:
-       goto failed_mount;
- failed_mount4:
-+      iput(root);
-+      sb->s_root = NULL;
-       ext4_msg(sb, KERN_ERR, "mount failed");
-       destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
- failed_mount_wq:
-@@ -3033,7 +3047,11 @@ failed_mount3:
- failed_mount2:
-       for (i = 0; i < db_count; i++)
-               brelse(sbi->s_group_desc[i]);
--      kfree(sbi->s_group_desc);
-+
-+      if (is_vmalloc_addr(sbi->s_group_desc))
-+              vfree(sbi->s_group_desc);
-+      else
-+              kfree(sbi->s_group_desc);
- failed_mount:
-       if (sbi->s_proc) {
-               remove_proc_entry(sb->s_id, ext4_proc_root);
-Index: linux-stage/fs/ext4/mballoc.c
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.c
-+++ linux-stage/fs/ext4/mballoc.c
-@@ -2607,10 +2607,21 @@ static int ext4_mb_init_backend(struct s
-       while (array_size < sizeof(*sbi->s_group_info) *
-              num_meta_group_infos_max)
-               array_size = array_size << 1;
--      /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
--       * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
--       * So a two level scheme suffices for now. */
--      sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
-+
-+      /*
-+       * A 16TB filesystem with 64-bit pointers requires an 8192 byte
-+       * kmalloc(). Filesystems larger than 2^32 blocks (16TB normally)
-+       * have group descriptors at least twice as large (64 bytes or
-+       * more vs. 32 bytes for traditional ext3 filesystems, so a 128TB
-+       * filesystem needs a 128kB allocation, which may need vmalloc().
-+       */
-+      sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
-+      if (sbi->s_group_info == NULL) {
-+              sbi->s_group_info = vmalloc(array_size);
-+              if (sbi->s_group_info != NULL)
-+                      memset(sbi->s_group_info, 0, array_size);
-+      }
-+
-       if (sbi->s_group_info == NULL) {
-               printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
-               return -ENOMEM;
-@@ -2620,6 +2631,11 @@ static int ext4_mb_init_backend(struct s
-               printk(KERN_ERR "EXT4-fs: can't get new inode\n");
-               goto err_freesgi;
-       }
-+      /*
-+       * To avoid colliding with an valid on-disk inode number,
-+       * EXT4_BAD_INO is used here as the number of the buddy cache inode.
-+       */
-+      sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
-       EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
-       for (i = 0; i < ngroups; i++) {
-               desc = ext4_get_group_desc(sb, i, NULL);
-@@ -2642,7 +2658,10 @@ err_freebuddy:
-               kfree(sbi->s_group_info[i]);
-       iput(sbi->s_buddy_cache);
- err_freesgi:
--      kfree(sbi->s_group_info);
-+      if (is_vmalloc_addr(sbi->s_group_info))
-+              vfree(sbi->s_group_info);
-+      else
-+              kfree(sbi->s_group_info);
-       return -ENOMEM;
- }
-@@ -2683,14 +2702,6 @@ int ext4_mb_init(struct super_block *sb,
-               i++;
-       } while (i <= sb->s_blocksize_bits + 1);
--      /* init file for buddy data */
--      ret = ext4_mb_init_backend(sb);
--      if (ret != 0) {
--              kfree(sbi->s_mb_offsets);
--              kfree(sbi->s_mb_maxs);
--              return ret;
--      }
--
-       spin_lock_init(&sbi->s_md_lock);
-       spin_lock_init(&sbi->s_bal_lock);
-@@ -2717,6 +2728,14 @@ int ext4_mb_init(struct super_block *sb,
-               spin_lock_init(&lg->lg_prealloc_lock);
-       }
-+      /* init file for buddy data */
-+      ret = ext4_mb_init_backend(sb);
-+      if (ret != 0) {
-+              kfree(sbi->s_mb_offsets);
-+              kfree(sbi->s_mb_maxs);
-+              return ret;
-+      }
-+
-       ext4_mb_history_init(sb);
-       if (sbi->s_journal)
-@@ -2766,7 +2785,10 @@ int ext4_mb_release(struct super_block *
-                       EXT4_DESC_PER_BLOCK_BITS(sb);
-               for (i = 0; i < num_meta_group_infos; i++)
-                       kfree(sbi->s_group_info[i]);
--              kfree(sbi->s_group_info);
-+              if (is_vmalloc_addr(sbi->s_group_info))
-+                      vfree(sbi->s_group_info);
-+              else
-+                      kfree(sbi->s_group_info);
-       }
-       kfree(sbi->s_mb_offsets);
-       kfree(sbi->s_mb_maxs);
diff --git a/ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel5.patch
deleted file mode 100644 (file)
index 20c2c38..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-Index: linux-2.6.18-194.3.1/fs/ext4/namei.c
-===================================================================
---- linux-2.6.18-194.3.1.orig/fs/ext4/namei.c
-+++ linux-2.6.18-194.3.1/fs/ext4/namei.c
-@@ -148,6 +148,17 @@ struct dx_map_entry
-       u16 size;
- };
-+/*
-+ * dentry_param used by ext4_new_inode_wantedi()
-+ */
-+#define LVFS_DENTRY_PARAM_MAGIC               20070216UL
-+struct lvfs_dentry_params
-+{
-+      unsigned long   ldp_inum;
-+      unsigned long   ldp_flags;
-+      u32             ldp_magic;
-+};
-+
- static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
- static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
- static inline unsigned dx_get_hash(struct dx_entry *entry);
-@@ -1761,6 +1772,19 @@ static int ext4_add_nondir(handle_t *han
-       return err;
- }
-+static unsigned ext4_dentry_goal(struct super_block *sb, struct dentry *dentry)
-+{
-+      unsigned inum = EXT4_SB(sb)->s_inode_goal;
-+
-+      if (dentry->d_fsdata != NULL) {
-+              struct lvfs_dentry_params *param = dentry->d_fsdata;
-+
-+              if (param->ldp_magic == LVFS_DENTRY_PARAM_MAGIC)
-+                      inum = param->ldp_inum;
-+      }
-+      return inum;
-+}
-+
- /*
-  * By the time this is called, we already have created
-  * the directory cache entry for the new file, but it
-@@ -1786,7 +1810,8 @@ retry:
-       if (IS_DIRSYNC(dir))
-               ext4_handle_sync(handle);
--      inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
-+      inode = ext4_new_inode(handle, dir, mode, &dentry->d_name,
-+                             ext4_dentry_goal(dir->i_sb, dentry));
-       err = PTR_ERR(inode);
-       if (!IS_ERR(inode)) {
-               inode->i_op = &ext4_file_inode_operations;
-@@ -1820,7 +1845,8 @@ retry:
-       if (IS_DIRSYNC(dir))
-               ext4_handle_sync(handle);
--      inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0);
-+      inode = ext4_new_inode(handle, dir, mode, &dentry->d_name,
-+                             ext4_dentry_goal(dir->i_sb, dentry));
-       err = PTR_ERR(inode);
-       if (!IS_ERR(inode)) {
-               init_special_inode(inode, inode->i_mode, rdev);
-@@ -1857,8 +1883,8 @@ retry:
-       if (IS_DIRSYNC(dir))
-               ext4_handle_sync(handle);
--      inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
--                             &dentry->d_name, 0);
-+      inode = ext4_new_inode(handle, dir, S_IFDIR | mode, &dentry->d_name,
-+                             ext4_dentry_goal(dir->i_sb, dentry));
-       err = PTR_ERR(inode);
-       if (IS_ERR(inode))
-               goto out_stop;
-@@ -2270,8 +2296,8 @@ retry:
-       if (IS_DIRSYNC(dir))
-               ext4_handle_sync(handle);
--      inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
--                             &dentry->d_name, 0);
-+      inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name,
-+                             ext4_dentry_goal(dir->i_sb, dentry));
-       err = PTR_ERR(inode);
-       if (IS_ERR(inode))
-               goto out_stop;
diff --git a/ldiskfs/kernel_patches/patches/ext4_data_in_dirent.patch b/ldiskfs/kernel_patches/patches/ext4_data_in_dirent.patch
deleted file mode 100644 (file)
index 42568cd..0000000
+++ /dev/null
@@ -1,546 +0,0 @@
-this patch implements feature which allows ext4 fs users (e.g. Lustre)
-to store data in ext4 dirent.
-data is stored in ext4 dirent after file-name, this space is accounted
-in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
-is present.
-
-make use of dentry->d_fsdata to pass fid to ext4. so no
-changes in ext4_add_entry() interface required.
-
-Index: linux-stage/fs/ext4/dir.c
-===================================================================
---- linux-stage.orig/fs/ext4/dir.c
-+++ linux-stage/fs/ext4/dir.c
-@@ -53,11 +53,18 @@ const struct file_operations ext4_dir_op
- static unsigned char get_dtype(struct super_block *sb, int filetype)
- {
-+      int fl_index = filetype & EXT4_FT_MASK;
-+
-       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
--          (filetype >= EXT4_FT_MAX))
-+          (fl_index >= EXT4_FT_MAX))
-               return DT_UNKNOWN;
--      return (ext4_filetype_table[filetype]);
-+      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
-+              return (ext4_filetype_table[fl_index]);
-+
-+      return (ext4_filetype_table[fl_index]) |
-+              (filetype & EXT4_DIRENT_LUFID);
-+
- }
-@@ -70,11 +77,11 @@ int ext4_check_dir_entry(const char *fun
-       const int rlen = ext4_rec_len_from_disk(de->rec_len,
-                                               dir->i_sb->s_blocksize);
--      if (rlen < EXT4_DIR_REC_LEN(1))
-+      if (rlen < __EXT4_DIR_REC_LEN(1))
-               error_msg = "rec_len is smaller than minimal";
-       else if (rlen % 4 != 0)
-               error_msg = "rec_len % 4 != 0";
--      else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
-+      else if (rlen < EXT4_DIR_REC_LEN(de))
-               error_msg = "rec_len is too small for name_len";
-       else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
-               error_msg = "directory entry across blocks";
-@@ -179,7 +186,7 @@ revalidate:
-                                * failure will be detected in the
-                                * dirent test below. */
-                               if (ext4_rec_len_from_disk(de->rec_len,
--                                      sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
-+                                      sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
-                                       break;
-                               i += ext4_rec_len_from_disk(de->rec_len,
-                                                           sb->s_blocksize);
-@@ -342,12 +349,17 @@ int ext4_htree_store_dirent(struct file 
-       struct fname *fname, *new_fn;
-       struct dir_private_info *info;
-       int len;
-+      int extra_data = 1;
-       info = (struct dir_private_info *) dir_file->private_data;
-       p = &info->root.rb_node;
-       /* Create and allocate the fname structure */
--      len = sizeof(struct fname) + dirent->name_len + 1;
-+      if (dirent->file_type & EXT4_DIRENT_LUFID)
-+              extra_data = ext4_get_dirent_data_len(dirent);
-+
-+      len = sizeof(struct fname) + dirent->name_len + extra_data;
-+
-       new_fn = kzalloc(len, GFP_KERNEL);
-       if (!new_fn)
-               return -ENOMEM;
-@@ -356,7 +368,7 @@ int ext4_htree_store_dirent(struct file 
-       new_fn->inode = le32_to_cpu(dirent->inode);
-       new_fn->name_len = dirent->name_len;
-       new_fn->file_type = dirent->file_type;
--      memcpy(new_fn->name, dirent->name, dirent->name_len);
-+      memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
-       new_fn->name[dirent->name_len] = 0;
-       while (*p) {
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -1172,6 +1172,7 @@ static inline void ext4_clear_inode_stat
- #define EXT4_FEATURE_INCOMPAT_64BIT           0x0080
- #define EXT4_FEATURE_INCOMPAT_MMP               0x0100
- #define EXT4_FEATURE_INCOMPAT_FLEX_BG         0x0200
-+#define EXT4_FEATURE_INCOMPAT_DIRDATA         0x1000
- #define EXT4_FEATURE_COMPAT_SUPP      EXT2_FEATURE_COMPAT_EXT_ATTR
- #define EXT4_FEATURE_INCOMPAT_SUPP    (EXT4_FEATURE_INCOMPAT_FILETYPE| \
-@@ -1180,7 +1181,9 @@ static inline void ext4_clear_inode_stat
-                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
-                                        EXT4_FEATURE_INCOMPAT_64BIT| \
-                                        EXT4_FEATURE_INCOMPAT_FLEX_BG| \
--                                       EXT4_FEATURE_INCOMPAT_MMP)
-+                                       EXT4_FEATURE_INCOMPAT_MMP| \
-+                                       EXT4_FEATURE_INCOMPAT_DIRDATA)
-+
- #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
-                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
-@@ -1262,6 +1265,43 @@ struct ext4_dir_entry_2 {
- #define EXT4_FT_SYMLINK               7
- #define EXT4_FT_MAX           8
-+#define EXT4_FT_MASK          0xf
-+
-+#if EXT4_FT_MAX > EXT4_FT_MASK
-+#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
-+#endif
-+
-+/*
-+ * d_type has 4 unused bits, so it can hold four types data. these different
-+ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
-+ * stored, in flag order, after file-name in ext4 dirent.
-+*/
-+/*
-+ * this flag is added to d_type if ext4 dirent has extra data after
-+ * filename. this data length is variable and length is stored in first byte
-+ * of data. data start after filename NUL byte.
-+ * This is used by Lustre FS.
-+  */
-+#define EXT4_DIRENT_LUFID             0x10
-+
-+#define EXT4_LUFID_MAGIC    0xAD200907UL
-+struct ext4_dentry_param {
-+      __u32  edp_magic;       /* EXT4_LUFID_MAGIC */
-+      char   edp_len;         /* size of edp_data in bytes */
-+      char   edp_data[0];     /* packed array of data */
-+} __attribute__((packed));
-+
-+static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
-+              struct ext4_dentry_param* p)
-+
-+{
-+      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
-+              return NULL;
-+      if (p && p->edp_magic == EXT4_LUFID_MAGIC)
-+              return &p->edp_len;
-+      else
-+              return NULL;
-+}
- /*
-  * EXT4_DIR_PAD defines the directory entries boundaries
-@@ -1270,8 +1310,11 @@ struct ext4_dir_entry_2 {
-  */
- #define EXT4_DIR_PAD                  4
- #define EXT4_DIR_ROUND                        (EXT4_DIR_PAD - 1)
--#define EXT4_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT4_DIR_ROUND) & \
-+#define __EXT4_DIR_REC_LEN(name_len)  (((name_len) + 8 + EXT4_DIR_ROUND) & \
-                                        ~EXT4_DIR_ROUND)
-+#define EXT4_DIR_REC_LEN(de)          (__EXT4_DIR_REC_LEN(de->name_len +\
-+                                      ext4_get_dirent_data_len(de)))
-+
- #define EXT4_MAX_REC_LEN              ((1<<16)-1)
- static inline unsigned int
-@@ -1611,7 +1654,7 @@ extern struct buffer_head * ext4_find_en
-                                           struct ext4_dir_entry_2 ** res_dir);
- #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
- extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
--                                struct inode *inode);
-+                        struct inode *inode, const void *, const void *);
- extern int ext4_orphan_add(handle_t *, struct inode *);
- extern int ext4_orphan_del(handle_t *, struct inode *);
- extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
-@@ -1809,6 +1852,28 @@ static inline void ext4_update_i_disksiz
-       up_write(&EXT4_I(inode)->i_data_sem);
-       return ;
- }
-+/*
-+ * Compute the total directory entry data length.
-+ * This includes the filename and an implicit NUL terminator (always present),
-+ * and optional extensions.  Each extension has a bit set in the high 4 bits of
-+ * de->file_type, and the extension length is the first byte in each entry.
-+ */
-+
-+static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
-+{
-+      char *len = de->name + de->name_len + 1 /* NUL terminator */;
-+      int dlen = 0;
-+      __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
-+
-+      while (extra_data_flags) {
-+              if (extra_data_flags & 1) {
-+                      dlen += *len + (dlen == 0);
-+                      len += *len;
-+              }
-+              extra_data_flags >>= 1;
-+      }
-+      return dlen;
-+}
- struct ext4_group_info {
-       unsigned long   bb_state;
-Index: linux-stage/fs/ext4/namei.c
-===================================================================
---- linux-stage.orig/fs/ext4/namei.c
-+++ linux-stage/fs/ext4/namei.c
-@@ -173,7 +173,8 @@ static unsigned dx_get_count(struct dx_e
- static unsigned dx_get_limit(struct dx_entry *entries);
- static void dx_set_count(struct dx_entry *entries, unsigned value);
- static void dx_set_limit(struct dx_entry *entries, unsigned value);
--static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
-+static inline unsigned dx_root_limit(__u32 blocksize,
-+              struct ext4_dir_entry_2 *dot_de, unsigned infosize);
- static unsigned dx_node_limit(struct inode *dir);
- static struct dx_frame *dx_probe(const struct qstr *d_name,
-                                struct inode *dir,
-@@ -216,11 +217,12 @@ ext4_next_entry(struct ext4_dir_entry_2 
-  */
- struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
- {
--       /* get dotdot first */
--       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
-+      BUG_ON(de->name_len != 1);
-+      /* get dotdot first */
-+      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
--       /* dx root info is after dotdot entry */
--       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
-+      /* dx root info is after dotdot entry */
-+      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
-        return (struct dx_root_info *) de;
- }
-@@ -265,16 +267,23 @@ static inline void dx_set_limit(struct d
-       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
- }
--static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
-+static inline unsigned dx_root_limit(__u32 blocksize,
-+              struct ext4_dir_entry_2 *dot_de, unsigned infosize)
- {
--      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
--              EXT4_DIR_REC_LEN(2) - infosize;
-+      struct ext4_dir_entry_2 *dotdot_de;
-+      unsigned entry_space;
-+
-+      BUG_ON(dot_de->name_len != 1);
-+      dotdot_de = ext4_next_entry(dot_de, blocksize);
-+      entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) -
-+                       EXT4_DIR_REC_LEN(dotdot_de) - infosize;
-+
-       return entry_space / sizeof(struct dx_entry);
- }
- static inline unsigned dx_node_limit(struct inode *dir)
- {
--      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
-+      unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
-       return entry_space / sizeof(struct dx_entry);
- }
-@@ -321,7 +330,7 @@ static struct stats dx_show_leaf(struct 
-                               printk(":%x.%u ", h.hash,
-                                      ((char *) de - base));
-                       }
--                      space += EXT4_DIR_REC_LEN(de->name_len);
-+                      space += EXT4_DIR_REC_LEN(de);
-                       names++;
-               }
-               de = ext4_next_entry(de, size);
-@@ -424,7 +433,8 @@ dx_probe(const struct qstr *d_name, stru
-       entries = (struct dx_entry *) (((char *)info) + info->info_length);
--      if (dx_get_limit(entries) != dx_root_limit(dir,
-+      if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
-+                                                 (struct ext4_dir_entry_2*)bh->b_data,
-                                                  info->info_length)) {
-               ext4_warning(dir->i_sb, "dx entry: limit != root limit");
-               brelse(bh);
-@@ -480,14 +490,17 @@ dx_probe(const struct qstr *d_name, stru
-               if (!indirect--) return frame;
-               if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
-                       goto fail2;
--              at = entries = ((struct dx_node *) bh->b_data)->entries;
-+              entries = ((struct dx_node *) bh->b_data)->entries;
-               if (dx_get_limit(entries) != dx_node_limit (dir)) {
-                       ext4_warning(dir->i_sb,
--                                   "dx entry: limit != node limit");
-+                                    "block %u(%lu): limit %u != node limit %u",
-+                                    dx_get_block(at), (long)bh->b_blocknr,
-+                                    dx_get_limit(entries), dx_node_limit(dir));
-                       brelse(bh);
-                       *err = ERR_BAD_DX_DIR;
-                       goto fail2;
-               }
-+              at = entries;
-               frame++;
-               frame->bh = NULL;
-       }
-@@ -613,7 +626,7 @@ static int htree_dirblock_to_tree(struct
-       de = (struct ext4_dir_entry_2 *) bh->b_data;
-       top = (struct ext4_dir_entry_2 *) ((char *) de +
-                                          dir->i_sb->s_blocksize -
--                                         EXT4_DIR_REC_LEN(0));
-+                                         __EXT4_DIR_REC_LEN(0));
-       for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
-               if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
-                                       (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
-@@ -1025,7 +1038,7 @@ static struct buffer_head * ext4_dx_find
-                       goto errout;
-               de = (struct ext4_dir_entry_2 *) bh->b_data;
-               top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
--                                     EXT4_DIR_REC_LEN(0));
-+                                      __EXT4_DIR_REC_LEN(0));
-               for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
-                       int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
-                                 + ((char *) de - bh->b_data);
-@@ -1186,7 +1199,7 @@ dx_move_dirents(char *from, char *to, st
-       while (count--) {
-               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
-                                               (from + (map->offs<<2));
--              rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+              rec_len = EXT4_DIR_REC_LEN(de);
-               memcpy (to, de, rec_len);
-               ((struct ext4_dir_entry_2 *) to)->rec_len =
-                               ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1210,7 +1223,7 @@ static struct ext4_dir_entry_2* dx_pack_
-       while ((char*)de < base + blocksize) {
-               next = ext4_next_entry(de, blocksize);
-               if (de->inode && de->name_len) {
--                      rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+                      rec_len = EXT4_DIR_REC_LEN(de);
-                       if (de > to)
-                               memmove(to, de, rec_len);
-                       to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1340,10 +1353,16 @@ static int add_dirent_to_buf(handle_t *h
-       unsigned int    offset = 0;
-       unsigned int    blocksize = dir->i_sb->s_blocksize;
-       unsigned short  reclen;
--      int             nlen, rlen, err;
-+      int             nlen, rlen, err, dlen = 0;
-+      unsigned char   *data;
-       char            *top;
--      reclen = EXT4_DIR_REC_LEN(namelen);
-+      data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
-+                                              dentry->d_fsdata);
-+      if (data)
-+              dlen = (*data) + 1;
-+
-+      reclen = __EXT4_DIR_REC_LEN(namelen + dlen);
-       if (!de) {
-               de = (struct ext4_dir_entry_2 *)bh->b_data;
-               top = bh->b_data + blocksize - reclen;
-@@ -1353,7 +1372,7 @@ static int add_dirent_to_buf(handle_t *h
-                               return -EIO;
-                       if (ext4_match(namelen, name, de))
-                               return -EEXIST;
--                      nlen = EXT4_DIR_REC_LEN(de->name_len);
-+                      nlen = EXT4_DIR_REC_LEN(de);
-                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
-                       if ((de->inode? rlen - nlen: rlen) >= reclen)
-                               break;
-@@ -1371,7 +1390,7 @@ static int add_dirent_to_buf(handle_t *h
-       }
-       /* By now the buffer is marked for journaling */
--      nlen = EXT4_DIR_REC_LEN(de->name_len);
-+      nlen = EXT4_DIR_REC_LEN(de);
-       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
-       if (de->inode) {
-               struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
-@@ -1387,6 +1406,12 @@ static int add_dirent_to_buf(handle_t *h
-               de->inode = 0;
-       de->name_len = namelen;
-       memcpy(de->name, name, namelen);
-+      if (data) {
-+              de->name[namelen] = 0;
-+              memcpy(&de->name[namelen + 1], data, *(char *) data);
-+              de->file_type |= EXT4_DIRENT_LUFID;
-+      }
-+
-       /*
-        * XXX shouldn't update any times until successful
-        * completion of syscall, but too many callers depend
-@@ -1485,7 +1510,8 @@ static int make_indexed_dir(handle_t *ha
-       dx_set_block(entries, 1);
-       dx_set_count(entries, 1);
--      dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
-+      dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize,
-+                                       dot_de, sizeof(*dx_info)));
-       /* Initialize as for dx_probe */
-       hinfo.hash_version = dx_info->hash_version;
-@@ -1516,6 +1542,8 @@ static int ext4_update_dotdot(handle_t *
-       struct buffer_head * dir_block;
-       struct ext4_dir_entry_2 * de;
-       int len, journal = 0, err = 0;
-+      int dlen = 0;
-+      char *data;
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -1531,19 +1559,24 @@ static int ext4_update_dotdot(handle_t *
-       /* the first item must be "." */
-       assert(de->name_len == 1 && de->name[0] == '.');
-       len = le16_to_cpu(de->rec_len);
--      assert(len >= EXT4_DIR_REC_LEN(1));
--      if (len > EXT4_DIR_REC_LEN(1)) {
-+      assert(len >= __EXT4_DIR_REC_LEN(1));
-+      if (len > __EXT4_DIR_REC_LEN(1)) {
-               BUFFER_TRACE(dir_block, "get_write_access");
-               err = ext4_journal_get_write_access(handle, dir_block);
-               if (err)
-                       goto out_journal;
-               journal = 1;
--              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
-+              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
-       }
--      len -= EXT4_DIR_REC_LEN(1);
--      assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
-+      len -= EXT4_DIR_REC_LEN(de);
-+      data = ext4_dentry_get_data(dir->i_sb,
-+                      (struct ext4_dentry_param *) dentry->d_fsdata);
-+      if (data)
-+              dlen = *data + 1;
-+      assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
-+
-       de = (struct ext4_dir_entry_2 *)
-                       ((char *) de + le16_to_cpu(de->rec_len));
-       if (!journal) {
-@@ -1557,10 +1590,15 @@ static int ext4_update_dotdot(handle_t *
-       if (len > 0)
-               de->rec_len = cpu_to_le16(len);
-       else
--              assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
-+              assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
-       de->name_len = 2;
-       strcpy (de->name, "..");
-       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+      if (data) {
-+              de->name[2] = 0;
-+              memcpy(&de->name[2 + 1], data, dlen);
-+              de->file_type |= EXT4_DIRENT_LUFID;
-+      }
- out_journal:
-       if (journal) {
-@@ -1982,12 +2020,13 @@ retry:
- /* Initialize @inode as a subdirectory of @dir, and add the
-  * "." and ".." entries into the first directory block. */
- int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
--                      struct inode *inode)
-+                       struct inode *inode,
-+                       const void *data1, const void *data2)
- {
-       unsigned int blocksize = dir->i_sb->s_blocksize;
-       struct buffer_head * dir_block;
-       struct ext4_dir_entry_2 * de;
--      int err = 0;
-+      int err = 0, dot_reclen;
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-@@ -1999,28 +2038,42 @@ int ext4_add_dot_dotdot(handle_t *handle
-       inode->i_fop = &ext4_dir_operations;
-       inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-       dir_block = ext4_bread(handle, inode, 0, 1, &err);
--      if (!dir_block) {
--              clear_nlink(inode);
--              ext4_mark_inode_dirty(handle, inode);
--              iput (inode);
-+      if (!dir_block)
-               goto get_out;
--      }
-+
-       BUFFER_TRACE(dir_block, "get_write_access");
-       ext4_journal_get_write_access(handle, dir_block);
-       de = (struct ext4_dir_entry_2 *) dir_block->b_data;
-       de->inode = cpu_to_le32(inode->i_ino);
-       de->name_len = 1;
--      de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
--                                         blocksize);
-       strcpy(de->name, ".");
-       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+      /* get packed fid data*/
-+      data1 = ext4_dentry_get_data(dir->i_sb,
-+                              (struct ext4_dentry_param *) data1);
-+      if (data1) {
-+              de->name[1] = 0;
-+              memcpy(&de->name[2], data1, *(char *) data1);
-+              de->file_type |= EXT4_DIRENT_LUFID;
-+      }
-+      de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
-+      dot_reclen = cpu_to_le16(de->rec_len);
-+
-       de = ext4_next_entry(de, blocksize);
-       de->inode = cpu_to_le32(dir->i_ino);
--      de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
-+      de->rec_len = ext4_rec_len_to_disk(blocksize - dot_reclen,
-                                          blocksize);
-       de->name_len = 2;
-       strcpy(de->name, "..");
-       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+      data2 = ext4_dentry_get_data(dir->i_sb,
-+                      (struct ext4_dentry_param *) data2);
-+      if (data2) {
-+              de->name[2] = 0;
-+              memcpy(&de->name[3], data2, *(char *) data2);
-+              de->file_type |= EXT4_DIRENT_LUFID;
-+      }
-+
-       inode->i_nlink = 2;
-       BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-       ext4_handle_dirty_metadata(handle, dir, dir_block);
-@@ -2057,9 +2110,14 @@ retry:
-       if (IS_ERR(inode))
-               goto out_stop;
--      err = ext4_add_dot_dotdot(handle, dir, inode);
--      if (err)
-+      err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL);
-+      if (err) {
-+              clear_nlink(inode);
-+              unlock_new_inode(inode);
-+              ext4_mark_inode_dirty(handle, inode);
-+              iput (inode);
-               goto out_stop;
-+      }
-       err = ext4_add_entry(handle, dentry, inode);
-       if (err) {
-@@ -2093,7 +2151,7 @@ static int empty_dir(struct inode *inode
-       int err = 0;
-       sb = inode->i_sb;
--      if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
-+      if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) ||
-           !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
-               if (err)
-                       ext4_error(inode->i_sb,
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
deleted file mode 100644 (file)
index abf7009..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-ext4-version-2.6-rhel5.patch
-ext4-wantedi-2.6-rhel5.patch
-ext4-map_inode_page-2.6.18-rhel5.patch
-export-ext4-2.6-rhel5.patch
-ext4-remove-cond_resched-calls-rhel5.patch
-ext4-nlink-2.6-rhel5.patch
-ext4-inode-version-rhel5.patch
-ext4-mmp-rhel5.patch
-ext4-lookup-dotdot-rhel5.patch
-ext4-max-dir-size-rhel5.patch
-ext4-print-inum-in-htree-warning-rhel5.patch
-ext4-xattr-no-update-ctime-rhel5.patch
-ext4-prealloc-rhel5.patch
-ext4-mballoc-extra-checks-rhel5.patch
-ext4-misc-rhel5.patch
-ext4-big-endian-check-2.6-rhel5.patch
-ext4-alloc-policy-2.6-rhel5.patch
-ext4-force_over_128tb-rhel5.patch
-ext4-pdir-fix.patch
-ext4-osd-iop-common.patch
-ext4-osd-iam-exports.patch
-ext4-dynlocks-common.patch
-ext4-dynlocks-2.6-rhel5.patch
-ext4-hash-indexed-dir-dotdot-update-rhel5.patch
-ext4-ext_generation-sles11.patch
-ext4-kill-dx_root.patch
-ext4-fiemap-2.6-rhel5.patch
-ext4-mballoc-pa_free-mismatch.patch
-ext4_data_in_dirent.patch
-ext4-large-eas.patch
-ext4-disable-mb-cache-rhel5.patch
-ext4-disable-delalloc-rhel5.patch
-ext4-back-dquot-to-rhel54.patch
-ext4-nocmtime-2.6-rhel5.patch
-ext4-failed-mount-b23368.patch
-ext4-export-64bit-name-hash.patch
-ext4-vmalloc-rhel5.patch
-ext4-mballoc-group_check-rhel5.patch
-ext4-journal-callback-rhel5.patch
-ext4-store-tree-generation-at-find.patch
-ext4-quota-minimal-rhel5.patch
diff --git a/lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel5.patch b/lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel5.patch
deleted file mode 100644 (file)
index 3874794..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-Index: linux-2.6.18-164.11.1/include/linux/blkdev.h
-===================================================================
---- linux-2.6.18-164.11.1.orig/include/linux/blkdev.h
-+++ linux-2.6.18-164.11.1/include/linux/blkdev.h
-@@ -788,10 +788,10 @@ extern void blk_free_tags(struct blk_que
- extern void blk_rq_bio_prep(request_queue_t *, struct request *, struct bio *);
- extern int blkdev_issue_flush(struct block_device *, sector_t *);
--#define MAX_PHYS_SEGMENTS 128
--#define MAX_HW_SEGMENTS 128
-+#define MAX_PHYS_SEGMENTS 256
-+#define MAX_HW_SEGMENTS 256
- #define SAFE_MAX_SECTORS 255
--#define BLK_DEF_MAX_SECTORS 1024
-+#define BLK_DEF_MAX_SECTORS 2048
- #define MAX_SEGMENT_SIZE      65536
-Index: linux-2.6.18-164.11.1/include/scsi/scsi_host.h
-===================================================================
---- linux-2.6.18-164.11.1.orig/include/scsi/scsi_host.h
-+++ linux-2.6.18-164.11.1/include/scsi/scsi_host.h
-@@ -30,7 +30,7 @@ struct blk_queue_tags;
-  *     used in one scatter-gather request.
-  */
- #define SG_NONE 0
--#define SG_ALL 0xff
-+#define SG_ALL 256
- #define DISABLE_CLUSTERING 0
-Index: linux-2.6.18-164.11.1/drivers/scsi/lpfc/lpfc.h
-===================================================================
---- linux-2.6.18-164.11.1.orig/drivers/scsi/lpfc/lpfc.h
-+++ linux-2.6.18-164.11.1/drivers/scsi/lpfc/lpfc.h
-@@ -38,7 +38,7 @@
- #define LPFC_MAX_NS_RETRY     3       /* Number of retry attempts to contact
-                                          the NameServer  before giving up. */
- #define LPFC_CMD_PER_LUN      3       /* max outstanding cmds per lun */
--#define LPFC_DEFAULT_SG_SEG_CNT       64      /* sg element count per scsi cmnd */
-+#define LPFC_DEFAULT_SG_SEG_CNT       256     /* sg element count per scsi cmnd */
- #define LPFC_MAX_SG_SEG_CNT   256     /* sg element count per scsi cmnd */
- #define LPFC_IOCB_LIST_CNT    2250    /* list of IOCBs for fast-path usage. */
- #define LPFC_Q_RAMP_UP_INTERVAL 120     /* lun q_depth ramp up interval */
diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/dev_read_only-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index a12fb3f..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-This patch is no longer needed for Lustre.  It is only included
-for testing and ease of using the same kernel with older Lustre
-versions.  This testing functionality was replaced in Linux 3.0
-by the dm-flakey driver.
-
-This functionality is mainly used during testing, in order to
-simulate a server crash for ldiskfs by discarding all of the
-writes to the filesystem.  For recovery testing we could simulate
-this by using a special loopback or DM device that also discards
-writes to the device.
-
-This functionality is also used by target "failback" in order
-to speed up service shutdown and takeover by the other node
-during controlled operation.  However, it would also be possible
-to do this by simply allowing all of the in-flight requests to
-complete and then waiting for the service to stop.  This will
-also be needed by the DMU-OSD, because discarding of writes on
-a DMU-based target is not safe as it could trigger a storage
-failure if the data is ever read from disk again and the
-checksum does not match that expected by the block pointer.
-
-Index: linux-2.6.18.1/block/ll_rw_blk.c
-===================================================================
---- linux-2.6.18.1.orig/block/ll_rw_blk.c
-+++ linux-2.6.18.1/block/ll_rw_blk.c
-@@ -3067,6 +3067,8 @@ static void handle_bad_sector(struct bio
-       set_bit(BIO_EOF, &bio->bi_flags);
- }
-+int dev_check_rdonly(struct block_device *bdev);
-+
- /**
-  * generic_make_request: hand a buffer to its device driver for I/O
-  * @bio:  The bio describing the location in memory and on the device.
-@@ -3151,6 +3153,12 @@ end_io:
-               if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
-                       goto end_io;
-
-+               /* this is cfs's dev_rdonly check */
-+               if (bio_rw(bio) == WRITE && dev_check_rdonly(bio->bi_bdev)) {
-+                       bio_endio(bio, bio->bi_size, 0);
-+                       break;
-+               }
-+
-               /*
-                * If this device has partitions, remap block n
-@@ -3765,6 +3773,91 @@ void swap_io_context(struct io_context *
-       *ioc2 = temp;
- }
- EXPORT_SYMBOL(swap_io_context);
-+ /*
-+ * Debug code for turning block devices "read-only" (will discard writes
-+ * silently).  This is for filesystem crash/recovery testing.
-+ */
-+struct deventry {
-+      dev_t dev;
-+      struct deventry *next;
-+};
-+
-+static struct deventry *devlist = NULL;
-+static spinlock_t devlock = SPIN_LOCK_UNLOCKED; 
-+
-+int dev_check_rdonly(struct block_device *bdev) 
-+{
-+      struct deventry *cur;
-+      if (!bdev) return 0;
-+      spin_lock(&devlock);
-+      cur = devlist;
-+      while(cur) {
-+              if (bdev->bd_dev == cur->dev) {
-+                      spin_unlock(&devlock);
-+                      return 1;
-+      }
-+              cur = cur->next;
-+      }
-+      spin_unlock(&devlock);
-+      return 0;
-+}
-+
-+void dev_set_rdonly(struct block_device *bdev)
-+{
-+      struct deventry *newdev, *cur;
-+
-+      if (!bdev) 
-+              return;
-+      newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL);
-+      if (!newdev) 
-+              return;
-+      
-+      spin_lock(&devlock);
-+      cur = devlist;
-+      while(cur) {
-+              if (bdev->bd_dev == cur->dev) {
-+                      spin_unlock(&devlock);
-+                      kfree(newdev);
-+                      return;
-+              }
-+              cur = cur->next;
-+      }
-+      newdev->dev = bdev->bd_dev;
-+      newdev->next = devlist;
-+      devlist = newdev;
-+      spin_unlock(&devlock);
-+      printk(KERN_WARNING "Turning device %s (%#x) read-only\n",
-+             bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev);
-+}
-+
-+void dev_clear_rdonly(struct block_device *bdev) 
-+{
-+      struct deventry *cur, *last = NULL;
-+      if (!bdev) return;
-+      spin_lock(&devlock);
-+      cur = devlist;
-+      while(cur) {
-+              if (bdev->bd_dev == cur->dev) {
-+                      if (last) 
-+                              last->next = cur->next;
-+                      else
-+                              devlist = cur->next;
-+                      spin_unlock(&devlock);
-+                      kfree(cur);
-+                      printk(KERN_WARNING "Removing read-only on %s (%#x)\n",
-+                             bdev->bd_disk ? bdev->bd_disk->disk_name :
-+                                             "unknown block", bdev->bd_dev);
-+                      return;
-+              }
-+              last = cur;
-+              cur = cur->next;
-+      }
-+      spin_unlock(&devlock);
-+}
-+
-+EXPORT_SYMBOL(dev_set_rdonly);
-+EXPORT_SYMBOL(dev_clear_rdonly);
-+EXPORT_SYMBOL(dev_check_rdonly);
- /*
-  * sysfs parts below
-Index: linux-2.6.18.1/fs/block_dev.c
-===================================================================
---- linux-2.6.18.1.orig/fs/block_dev.c
-+++ linux-2.6.18.1/fs/block_dev.c
-@@ -1059,6 +1059,7 @@ static int __blkdev_put(struct block_dev
-               if (bdev != bdev->bd_contains)
-                       victim = bdev->bd_contains;
-               bdev->bd_contains = NULL;
-+              dev_clear_rdonly(bdev);
-       }
-       unlock_kernel();
-       mutex_unlock(&bdev->bd_mutex);
-Index: linux-2.6.18.1/include/linux/fs.h
-===================================================================
---- linux-2.6.18.1.orig/include/linux/fs.h
-+++ linux-2.6.18.1/include/linux/fs.h
-@@ -1685,6 +1685,10 @@ extern void file_kill(struct file *f);
- struct bio;
- extern void submit_bio(int, struct bio *);
- extern int bdev_read_only(struct block_device *);
-+#define HAVE_CLEAR_RDONLY_ON_PUT
-+void dev_set_rdonly(struct block_device *bdev);
-+int dev_check_rdonly(struct block_device *bdev);
-+void dev_clear_rdonly(struct block_device *bdev);
- extern int set_blocksize(struct block_device *, int);
- extern int sb_set_blocksize(struct super_block *, int);
- extern int sb_min_blocksize(struct super_block *, int);
diff --git a/lustre/kernel_patches/patches/export-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/export-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index 9727ea4..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-Allow starting the commit of a journal transaction, without waiting for
-it to complete.  This is a performance enhancement for OST IO so that
-the journal commit can run concurrently with the file IO.  It isn't
-necessary if the client can handle bulk IO recovery (bug 16919).
-
-Index: linux-2.6/fs/jbd/journal.c
-===================================================================
---- linux-2.6.orig/fs/jbd/journal.c    2006-07-15 16:13:50.000000000 +0800
-+++ linux-2.6/fs/jbd/journal.c 2006-07-15 16:22:04.000000000 +0800
-@@ -74,6 +74,7 @@ EXPORT_SYMBOL(journal_abort);
- EXPORT_SYMBOL(journal_errno);
- EXPORT_SYMBOL(journal_ack_err);
- EXPORT_SYMBOL(journal_clear_err);
-+EXPORT_SYMBOL(log_start_commit);
- EXPORT_SYMBOL(log_wait_commit);
- EXPORT_SYMBOL(journal_start_commit);
- EXPORT_SYMBOL(journal_force_commit_nested);
diff --git a/lustre/kernel_patches/patches/export_symbol_numa-2.6-fc5.patch b/lustre/kernel_patches/patches/export_symbol_numa-2.6-fc5.patch
deleted file mode 100644 (file)
index 095c1de..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-Index: linux-2.6.16.i686/arch/i386/kernel/smpboot.c
-===================================================================
---- linux-2.6.16.i686.orig/arch/i386/kernel/smpboot.c  2006-05-30 15:47:03.000000000 +0800
-+++ linux-2.6.16.i686/arch/i386/kernel/smpboot.c       2006-05-30 21:22:02.000000000 +0800
-@@ -579,6 +579,7 @@
- /* which logical CPUs are on which nodes */
- cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
-                               { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-+EXPORT_SYMBOL(node_2_cpu_mask);
- /* which node each logical CPU is on */
- int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
- EXPORT_SYMBOL(cpu_2_node);
diff --git a/lustre/kernel_patches/patches/export_symbols-2.6.12.patch b/lustre/kernel_patches/patches/export_symbols-2.6.12.patch
deleted file mode 100644 (file)
index 26f02c2..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-This patch is not needed for 2.x, but is kept to allow the same kernel
-to be used between 1.8.x and 2.0.x for ease of upgrade.
-
-Index: linux-2.6.12-rc6/fs/dcache.c
-===================================================================
---- linux-2.6.12-rc6.orig/fs/dcache.c  2005-06-14 15:53:19.812195198 +0200
-+++ linux-2.6.12-rc6/fs/dcache.c       2005-06-14 15:53:58.385436913 +0200
-@@ -1581,6 +1581,7 @@
-       return result;
- }
-+EXPORT_SYMBOL(is_subdir);
- void d_genocide(struct dentry *root)
- {
diff --git a/lustre/kernel_patches/patches/fix-forever-in-do_get_write_access.patch b/lustre/kernel_patches/patches/fix-forever-in-do_get_write_access.patch
deleted file mode 100644 (file)
index 57d12ee..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-commit 229309caebe4508d650bb6d8f7d51f2b116f5bbd
-Author: Jan Kara <jack@suse.cz>
-Date:   Sun May 8 19:09:53 2011 -0400
-
-jbd2: Fix forever sleeping process in do_get_write_access()
-
-In do_get_write_access() we wait on BH_Unshadow bit for buffer to get
-from shadow state. The waking code in journal_commit_transaction() has
-a bug because it does not issue a memory barrier after the buffer is
-moved from the shadow state and before wake_up_bit() is called. Thus a
-waitqueue check can happen before the buffer is actually moved from
-the shadow state and waiting process may never be woken. Fix the
-problem by issuing proper barrier.
-
-Reported-by: Tao Ma <boyu.mt@taobao.com>
-Signed-off-by: Jan Kara <jack@suse.cz>
-Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
----
- fs/jbd2/commit.c |    9 +++++++--
- 1 files changed, 7 insertions(+), 2 deletions(-)
-
-Index: linux-2.6.18.4/fs/jbd2/commit.c
-===================================================================
---- linux-2.6.18.4.orig/fs/jbd2/commit.c
-+++ linux-2.6.18.4/fs/jbd2/commit.c
-@@ -788,8 +788,13 @@ wait_for_iobuf:
-                    required. */
-               JBUFFER_TRACE(jh, "file as BJ_Forget");
-               jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
--              /* Wake up any transactions which were waiting for this
--                 IO to complete */
-+              /*
-+               * Wake up any transactions which were waiting for this IO to
-+               * complete. The barrier must be here so that changes by
-+               * jbd2_journal_file_buffer() take effect before wake_up_bit()
-+               * does the waitqueue check.
-+               */
-+              smp_mb();
-               wake_up_bit(&bh->b_state, BH_Unshadow);
-               JBUFFER_TRACE(jh, "brelse shadowed buffer");
-               __brelse(bh);
diff --git a/lustre/kernel_patches/patches/jbd-jcberr-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/jbd-jcberr-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index 0276d59..0000000
+++ /dev/null
@@ -1,238 +0,0 @@
-Implement a JBD per-transaction commit callback.  Users can attach arbitrary
-callbacks to a journal handle, which are propagated to the transaction at
-journal handle stop time.  The commit callbacks are run when the transaction
-is finished commit, and will be passed a non-zero error code if there was
-a commit error.
-
-Signed-off-by: Andreas Dilger <adilger@sun.com>
-
-
-Index: linux-2.6/include/linux/jbd.h
-===================================================================
---- linux-2.6.orig/include/linux/jbd.h 2006-07-15 16:08:35.000000000 +0800
-+++ linux-2.6/include/linux/jbd.h      2006-07-15 16:13:01.000000000 +0800
-@@ -356,6 +356,27 @@ static inline void jbd_unlock_bh_journal
-       bit_spin_unlock(BH_JournalHead, &bh->b_state);
- }
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+/**
-+ * struct journal_callback - Base structure for callback information
-+ * @jcb_list: list information for other callbacks attached to the same handle
-+ * @jcb_func: Function to call with this callback structure
-+ *
-+ * This struct is a 'seed' structure for a using with your own callback
-+ * structs. If you are using callbacks you must allocate one of these
-+ * or another struct of your own definition which has this struct 
-+ * as it's first element and pass it to journal_callback_set().
-+ *
-+ * This is used internally by jbd to maintain callback information.
-+ *
-+ * See journal_callback_set for more information.
-+ **/
-+struct journal_callback {
-+      struct list_head jcb_list;              /* t_jcb_lock */
-+      void (*jcb_func)(struct journal_callback *jcb, int error);
-+      /* caller data goes here */
-+};
-+
- struct jbd_revoke_table_s;
- /**
-@@ -364,6 +385,7 @@ struct jbd_revoke_table_s;
-  * @h_transaction: Which compound transaction is this update a part of?
-  * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
-  * @h_ref: Reference count on this handle
-+ * @h_jcb: List of application registered callbacks for this handle.
-  * @h_err: Field for caller's use to track errors through large fs operations
-  * @h_sync: flag for sync-on-close
-  * @h_jdata: flag to force data journaling
-@@ -389,6 +411,13 @@ struct handle_s 
-       /* operations */
-       int                     h_err;
-+      /*
-+       * List of application registered callbacks for this handle. The
-+       * function(s) will be called after the transaction that this handle is
-+       * part of has been committed to disk. [t_jcb_lock]
-+       */
-+      struct list_head        h_jcb;
-+
-       /* Flags [no locking] */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-@@ -430,6 +459,8 @@ struct handle_s 
-  *    j_state_lock
-  *    ->j_list_lock                   (journal_unmap_buffer)
-  *
-+ *    t_handle_lock
-+ *    ->t_jcb_lock
-  */
- struct transaction_s 
-@@ -559,6 +590,15 @@ struct transaction_s 
-        */
-       int t_handle_count;
-+      /*
-+       * Protects the callback list
-+       */
-+      spinlock_t              t_jcb_lock;
-+      /*
-+       * List of registered callback functions for this transaction.
-+       * Called when the transaction is committed. [t_jcb_lock]
-+       */
-+      struct list_head        t_jcb;
- };
- /**
-@@ -906,6 +946,10 @@ extern void        journal_invalidatepage(jour
- extern int     journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
- extern int     journal_stop(handle_t *);
- extern int     journal_flush (journal_t *);
-+extern void    journal_callback_set(handle_t *handle,
-+                                    void (*fn)(struct journal_callback *,int),
-+                                    struct journal_callback *jcb);
-+
- extern void    journal_lock_updates (journal_t *);
- extern void    journal_unlock_updates (journal_t *);
-Index: linux-2.6/fs/jbd/checkpoint.c
-===================================================================
---- linux-2.6.orig/fs/jbd/checkpoint.c 2006-07-15 16:08:36.000000000 +0800
-+++ linux-2.6/fs/jbd/checkpoint.c      2006-07-15 16:13:01.000000000 +0800
-@@ -688,6 +688,7 @@ void __journal_drop_transaction(journal_
-       J_ASSERT(transaction->t_checkpoint_list == NULL);
-       J_ASSERT(transaction->t_checkpoint_io_list == NULL);
-       J_ASSERT(transaction->t_updates == 0);
-+      J_ASSERT(list_empty(&transaction->t_jcb));
-       J_ASSERT(journal->j_committing_transaction != transaction);
-       J_ASSERT(journal->j_running_transaction != transaction);
-Index: linux-2.6/fs/jbd/commit.c
-===================================================================
---- linux-2.6.orig/fs/jbd/commit.c     2006-07-15 16:08:36.000000000 +0800
-+++ linux-2.6/fs/jbd/commit.c  2006-07-15 16:13:01.000000000 +0800
-@@ -708,6 +708,32 @@ wait_for_iobuf:
-            transaction can be removed from any checkpoint list it was on
-            before. */
-+      /*
-+       * Call any callbacks that had been registered for handles in this
-+       * transaction.  It is up to the callback to free any allocated
-+       * memory.
-+       *
-+       * Locking not strictly required, since this is the only process
-+       * touching this transaction anymore, but is done to keep code
-+       * checkers happy and has no contention in any case.
-+       */
-+      spin_lock(&commit_transaction->t_jcb_lock);
-+      if (!list_empty(&commit_transaction->t_jcb)) {
-+              struct list_head *p, *n;
-+              int error = is_journal_aborted(journal);
-+
-+              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+                      struct journal_callback *jcb;
-+
-+                      jcb = list_entry(p, struct journal_callback, jcb_list);
-+                      list_del_init(p);
-+                      spin_unlock(&commit_transaction->t_jcb_lock);
-+                      jcb->jcb_func(jcb, error);
-+                      spin_lock(&commit_transaction->t_jcb_lock);
-+              }
-+      }
-+      spin_unlock(&commit_transaction->t_jcb_lock);
-+
-       jbd_debug(3, "JBD: commit phase 7\n");
-       J_ASSERT(commit_transaction->t_sync_datalist == NULL);
-Index: linux-2.6/fs/jbd/journal.c
-===================================================================
---- linux-2.6.orig/fs/jbd/journal.c    2006-07-15 16:08:36.000000000 +0800
-+++ linux-2.6/fs/jbd/journal.c 2006-07-15 16:13:01.000000000 +0800
-@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
- #endif
- EXPORT_SYMBOL(journal_flush);
- EXPORT_SYMBOL(journal_revoke);
-+EXPORT_SYMBOL(journal_callback_set);
- EXPORT_SYMBOL(journal_init_dev);
- EXPORT_SYMBOL(journal_init_inode);
-@@ -80,6 +81,7 @@ EXPORT_SYMBOL(journal_wipe);
- EXPORT_SYMBOL(journal_blocks_per_page);
- EXPORT_SYMBOL(journal_invalidatepage);
- EXPORT_SYMBOL(journal_try_to_free_buffers);
-+EXPORT_SYMBOL(journal_bmap);
- EXPORT_SYMBOL(journal_force_commit);
- static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
-Index: linux-2.6/fs/jbd/transaction.c
-===================================================================
---- linux-2.6.orig/fs/jbd/transaction.c        2006-07-15 16:08:35.000000000 +0800
-+++ linux-2.6/fs/jbd/transaction.c     2006-07-15 16:13:01.000000000 +0800
-@@ -50,7 +50,9 @@ get_transaction(journal_t *journal, tran
-       transaction->t_state = T_RUNNING;
-       transaction->t_tid = journal->j_transaction_sequence++;
-       transaction->t_expires = jiffies + journal->j_commit_interval;
-+      INIT_LIST_HEAD(&transaction->t_jcb);
-       spin_lock_init(&transaction->t_handle_lock);
-+      spin_lock_init(&transaction->t_jcb_lock);
-       /* Set up the commit timer for the new transaction. */
-       journal->j_commit_timer.expires = transaction->t_expires;
-@@ -241,6 +243,7 @@ static handle_t *new_handle(int nblocks)
-       memset(handle, 0, sizeof(*handle));
-       handle->h_buffer_credits = nblocks;
-       handle->h_ref = 1;
-+      INIT_LIST_HEAD(&handle->h_jcb);
-       return handle;
- }
-@@ -1291,6 +1294,35 @@ drop:
- }
- /**
-+ * void journal_callback_set() -  Register a callback function for this handle.
-+ * @handle: handle to attach the callback to.
-+ * @func: function to callback.
-+ * @jcb:  structure with additional information required by func() , and
-+ *        some space for jbd internal information.
-+ * 
-+ * The function will be called when the transaction that this handle is
-+ * part of has been committed to disk with the original callback data
-+ * struct and the error status of the journal as parameters.  There is no
-+ * guarantee of ordering between handles within a single transaction, nor
-+ * between callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations.  The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void journal_callback_set(handle_t *handle,
-+                      void (*func)(struct journal_callback *jcb, int error),
-+                      struct journal_callback *jcb)
-+{
-+      jcb->jcb_func = func;
-+      spin_lock(&handle->h_transaction->t_jcb_lock);
-+      list_add_tail(&jcb->jcb_list, &handle->h_jcb);
-+      spin_unlock(&handle->h_transaction->t_jcb_lock);
-+}
-+
-+/**
-  * int journal_stop() - complete a transaction
-  * @handle: tranaction to complete.
-  * 
-@@ -1363,6 +1396,11 @@ int journal_stop(handle_t *handle)
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
-+      /* Move callbacks from the handle to the transaction. */
-+      spin_lock(&transaction->t_jcb_lock);
-+      list_splice(&handle->h_jcb, &transaction->t_jcb);
-+      spin_unlock(&transaction->t_jcb_lock);
-+
-       /*
-        * If the handle is marked SYNC, we need to set another commit
-        * going!  We also want to force a commit if the current
diff --git a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch
deleted file mode 100644 (file)
index 5f26c0b..0000000
+++ /dev/null
@@ -1,637 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/jbd/commit.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/commit.c  2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/commit.c       2009-06-02 23:26:07.000000000 -0600
-@@ -22,6 +22,7 @@
- #include <linux/mm.h>
- #include <linux/pagemap.h>
- #include <linux/smp_lock.h>
-+#include <linux/crc32.h>
- /*
-@@ -95,19 +96,23 @@
-       return 1;
- }
--/* Done it all: now write the commit record.  We should have
-+/*
-+ * Done it all: now submit the commit record.  We should have
-  * cleaned up our previous buffers by now, so if we are in abort
-  * mode we can now just skip the rest of the journal write
-  * entirely.
-  *
-  * Returns 1 if the journal needs to be aborted or 0 on success
-  */
--static int journal_write_commit_record(journal_t *journal,
--                                      transaction_t *commit_transaction)
-+static int journal_submit_commit_record(journal_t *journal,
-+                                      transaction_t *commit_transaction,
-+                                      struct buffer_head **cbh,
-+                                      __u32 crc32_sum)
- {
-       struct journal_head *descriptor;
-+      struct commit_header *tmp;
-       struct buffer_head *bh;
--      int i, ret;
-+      int ret;
-       int barrier_done = 0;
-       if (is_journal_aborted(journal))
-@@ -119,21 +124,34 @@
-       bh = jh2bh(descriptor);
--      /* AKPM: buglet - add `i' to tmp! */
--      for (i = 0; i < bh->b_size; i += 512) {
--              journal_header_t *tmp = (journal_header_t*)bh->b_data;
--              tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
--              tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
--              tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-+      tmp = (struct commit_header *)bh->b_data;
-+      tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-+      tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
-+      tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-+                              
-+      if (JFS_HAS_COMPAT_FEATURE(journal,
-+                              JFS_FEATURE_COMPAT_CHECKSUM)) {
-+              tmp->h_chksum_type      = JFS_CRC32_CHKSUM;
-+              tmp->h_chksum_size      = JFS_CRC32_CHKSUM_SIZE;
-+              tmp->h_chksum[0]        = cpu_to_be32(crc32_sum);
-       }
--      JBUFFER_TRACE(descriptor, "write commit block");
-+      JBUFFER_TRACE(descriptor, "submit commit block");
-+      lock_buffer(bh);
-+
-       set_buffer_dirty(bh);
--      if (journal->j_flags & JFS_BARRIER) {
-+      set_buffer_uptodate(bh);
-+      bh->b_end_io = journal_end_buffer_io_sync;
-+
-+      if (journal->j_flags & JFS_BARRIER &&
-+              !JFS_HAS_INCOMPAT_FEATURE(journal,
-+                                       JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+
-               set_buffer_ordered(bh);
-               barrier_done = 1;
-       }
--      ret = sync_dirty_buffer(bh);
-+      ret = submit_bh(WRITE, bh);
-+
-       /* is it possible for another commit to fail at roughly
-        * the same time as this one?  If so, we don't want to
-        * trust the barrier flag in the super, but instead want
-@@ -154,12 +172,70 @@
-               clear_buffer_ordered(bh);
-               set_buffer_uptodate(bh);
-               set_buffer_dirty(bh);
--              ret = sync_dirty_buffer(bh);
-+              ret = submit_bh(WRITE, bh);
-       }
--      put_bh(bh);             /* One for getblk() */
--      journal_put_journal_head(descriptor);
-+      *cbh = bh;
-+      return ret;
-+}
-+
-+/*
-+ * This function along with journal_submit_commit_record
-+ * allows to write the commit record asynchronously.
-+ */
-+static int journal_wait_on_commit_record(struct buffer_head *bh)
-+{
-+      int ret = 0;
-+
-+      clear_buffer_dirty(bh);
-+      wait_on_buffer(bh);
-+      
-+      if (unlikely(!buffer_uptodate(bh)))
-+              ret = -EIO;
-+      put_bh(bh);            /* One for getblk() */
-+      journal_put_journal_head(bh2jh(bh));
-+      
-+      return ret;
-+}
-+
-+/*
-+ * Wait for all submitted IO to complete.
-+ */
-+static int journal_wait_on_locked_list(journal_t *journal,
-+                                     transaction_t *commit_transaction)
-+{
-+      int ret = 0;
-+      struct journal_head *jh;
--      return (ret == -EIO);
-+      while (commit_transaction->t_locked_list) {
-+              struct buffer_head *bh;
-+
-+              jh = commit_transaction->t_locked_list->b_tprev;
-+              bh = jh2bh(jh);
-+              get_bh(bh);
-+              if (buffer_locked(bh)) {
-+                      spin_unlock(&journal->j_list_lock);
-+                      wait_on_buffer(bh);
-+                      if (unlikely(!buffer_uptodate(bh)))
-+                              ret = -EIO;
-+                      spin_lock(&journal->j_list_lock);
-+              }
-+              if (!inverted_lock(journal, bh)) {
-+                      put_bh(bh);
-+                      spin_lock(&journal->j_list_lock);
-+                      continue;
-+              }
-+              if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-+                      __journal_unfile_buffer(jh);
-+                      jbd_unlock_bh_state(bh);
-+                      journal_remove_journal_head(bh);
-+                      put_bh(bh);
-+              } else {
-+                      jbd_unlock_bh_state(bh);
-+              }
-+              put_bh(bh);
-+              cond_resched_lock(&journal->j_list_lock);
-+      }
-+      return ret;
- }
- void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
-@@ -282,6 +358,20 @@
-       return err;
- }
-+static inline __u32 jbd_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
-+{
-+      struct page *page = bh->b_page;
-+      char *addr;
-+      __u32 checksum;
-+
-+      addr = kmap_atomic(page, KM_USER0);
-+      checksum = crc32_be(crc32_sum,
-+                          (void *)(addr + offset_in_page(bh->b_data)),
-+                          bh->b_size);
-+      kunmap_atomic(addr, KM_USER0);
-+      return checksum;
-+}
-+
- /*
-  * journal_commit_transaction
-  *
-@@ -305,6 +395,8 @@
-       int first_tag = 0;
-       int tag_flag;
-       int i;
-+      struct buffer_head *cbh = NULL; /* For transactional checksums */
-+      __u32 crc32_sum = ~0;
-       /*
-        * First job: lock down the current transaction and wait for
-@@ -431,39 +523,14 @@
-       err = journal_submit_data_buffers(journal, commit_transaction);
-       /*
--       * Wait for all previously submitted IO to complete.
-+       * Wait for all previously submitted IO to complete if commit
-+       * record is to be written synchronously.
-        */
-       spin_lock(&journal->j_list_lock);
--      while (commit_transaction->t_locked_list) {
--              struct buffer_head *bh;
--
--              jh = commit_transaction->t_locked_list->b_tprev;
--              bh = jh2bh(jh);
--              get_bh(bh);
--              if (buffer_locked(bh)) {
--                      spin_unlock(&journal->j_list_lock);
--                      wait_on_buffer(bh);
--                      spin_lock(&journal->j_list_lock);
--              }
--              if (unlikely(!buffer_uptodate(bh)))
--                      err = -EIO;
--
--              if (!inverted_lock(journal, bh)) {
--                      put_bh(bh);
--                      spin_lock(&journal->j_list_lock);
--                      continue;
--              }
--              if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
--                      __journal_unfile_buffer(jh);
--                      jbd_unlock_bh_state(bh);
--                      journal_remove_journal_head(bh);
--                      put_bh(bh);
--              } else {
--                      jbd_unlock_bh_state(bh);
--              }
--              release_data_buffer(bh);
--              cond_resched_lock(&journal->j_list_lock);
--      }
-+      if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+              JFS_FEATURE_INCOMPAT_ASYNC_COMMIT))
-+              err = journal_wait_on_locked_list(journal,
-+                                                commit_transaction);
-       spin_unlock(&journal->j_list_lock);
-       if (err)
-@@ -642,6 +709,16 @@
- start_journal_io:
-                       for (i = 0; i < bufs; i++) {
-                               struct buffer_head *bh = wbuf[i];
-+                              /*
-+                               * Compute checksum.
-+                               */
-+                              if (JFS_HAS_COMPAT_FEATURE(journal,
-+                                      JFS_FEATURE_COMPAT_CHECKSUM)) {
-+                                      crc32_sum =
-+                                              jbd_checksum_data(crc32_sum,
-+                                                                 bh);
-+                              }
-+
-                               lock_buffer(bh);
-                               clear_buffer_dirty(bh);
-                               set_buffer_uptodate(bh);
-@@ -658,6 +735,23 @@
-               }
-       }
-+      /* Done it all: now write the commit record asynchronously. */
-+
-+      if (JFS_HAS_INCOMPAT_FEATURE(journal,
-+              JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+              err = journal_submit_commit_record(journal, commit_transaction,
-+                                               &cbh, crc32_sum);
-+              if (err)
-+                      __journal_abort_hard(journal);
-+
-+              spin_lock(&journal->j_list_lock);
-+              err = journal_wait_on_locked_list(journal,
-+                                              commit_transaction);
-+              spin_unlock(&journal->j_list_lock);
-+              if (err)
-+                      __journal_abort_hard(journal);
-+      }
-+
-       /* Lo and behold: we have just managed to send a transaction to
-            the log.  Before we can commit it, wait for the IO so far to
-            complete.  Control buffers being written are on the
-@@ -759,9 +853,15 @@
-               journal_abort(journal, err);
-       jbd_debug(3, "JBD: commit phase 6\n");
--
--      if (journal_write_commit_record(journal, commit_transaction))
--              err = -EIO;
-+              
-+      if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+              JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+              err = journal_submit_commit_record(journal, commit_transaction,
-+                                              &cbh, crc32_sum);
-+              if (err)
-+                      __journal_abort_hard(journal);
-+      }
-+      err = journal_wait_on_commit_record(cbh);
-       if (err)
-               journal_abort(journal, err);
-Index: linux-2.6.18-128.1.6/fs/jbd/recovery.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/recovery.c        2009-04-14 21:05:39.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/recovery.c     2009-06-02 23:26:07.000000000 -0600
-@@ -21,6 +21,7 @@
- #include <linux/jbd.h>
- #include <linux/errno.h>
- #include <linux/slab.h>
-+#include <linux/crc32.h>
- #endif
- /*
-@@ -310,6 +311,38 @@
-       return err;
- }
-+/*
-+ * calc_chksums calculates the checksums for the blocks described in the
-+ * descriptor block.
-+ */
-+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
-+                     unsigned long *next_log_block, __u32 *crc32_sum)
-+{
-+      int i, num_blks, err;
-+      unsigned long io_block;
-+      struct buffer_head *obh;
-+
-+      num_blks = count_tags(bh, journal->j_blocksize);
-+      /* Calculate checksum of the descriptor block. */
-+      *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
-+
-+      for (i = 0; i < num_blks; i++) {
-+              io_block = (*next_log_block)++;
-+              wrap(journal, *next_log_block);
-+              err = jread(&obh, journal, io_block);
-+              if (err) {
-+                      printk(KERN_ERR "JBD: IO error %d recovering block "
-+                              "%lu in log\n", err, io_block);
-+                      return 1;
-+              } else {
-+                      *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
-+                                   obh->b_size);
-+              }
-+              put_bh(obh);
-+      }
-+      return 0;
-+}
-+
- static int do_one_pass(journal_t *journal,
-                       struct recovery_info *info, enum passtype pass)
- {
-@@ -321,6 +354,7 @@
-       struct buffer_head *    bh;
-       unsigned int            sequence;
-       int                     blocktype;
-+      __u32                   crc32_sum = ~0; /* Transactional Checksums */
-       /* Precompute the maximum metadata descriptors in a descriptor block */
-       int                     MAX_BLOCKS_PER_DESC;
-@@ -412,9 +446,24 @@
-               switch(blocktype) {
-               case JFS_DESCRIPTOR_BLOCK:
-                       /* If it is a valid descriptor block, replay it
--                       * in pass REPLAY; otherwise, just skip over the
--                       * blocks it describes. */
-+                       * in pass REPLAY; if journal_checksums enabled, then
-+                       * calculate checksums in PASS_SCAN, otherwise,
-+                       * just skip over the blocks it describes. */
-                       if (pass != PASS_REPLAY) {
-+                              if (pass == PASS_SCAN &&
-+                                  JFS_HAS_COMPAT_FEATURE(journal,
-+                                          JFS_FEATURE_COMPAT_CHECKSUM) &&
-+                                  !info->end_transaction) {
-+                                      if (calc_chksums(journal, bh,
-+                                                      &next_log_block,
-+                                                      &crc32_sum)) {
-+                                              put_bh(bh);
-+                                              break;
-+                                      }
-+                                      put_bh(bh);
-+                                      continue;
-+                              }
-+
-                               next_log_block +=
-                                       count_tags(bh, journal->j_blocksize);
-                               wrap(journal, next_log_block);
-@@ -509,9 +558,97 @@
-                       continue;
-               case JFS_COMMIT_BLOCK:
--                      /* Found an expected commit block: not much to
--                       * do other than move on to the next sequence
-+                      /*     How to differentiate between interrupted commit
-+                       *               and journal corruption ?
-+                       *
-+                       * {nth transaction}
-+                       *        Checksum Verification Failed
-+                       *                       |
-+                       *               ____________________
-+                       *              |                    |
-+                       *      async_commit             sync_commit
-+                       *              |                    |
-+                       *              | GO TO NEXT    "Journal Corruption"
-+                       *              | TRANSACTION
-+                       *              |
-+                       * {(n+1)th transanction}
-+                       *              |
-+                       *       _______|______________
-+                       *      |                     |
-+                       * Commit block found   Commit block not found
-+                       *      |                     |
-+                       * "Journal Corruption"       |
-+                       *               _____________|__________
-+                       *              |                       |
-+                       *      nth trans corrupt       OR   nth trans
-+                       *      and (n+1)th interrupted     interrupted 
-+                       *      before commit block
-+                       *      could reach the disk.
-+                       *      (Cannot find the difference in above
-+                       *       mentioned conditions. Hence assume
-+                       *       "Interrupted Commit".)
-+                       */
-+
-+                      /* Found an expected commit block: if checksums
-+                       * are present verify them in PASS_SCAN; else not
-+                       * much to do other than move on to the next sequence
-                        * number. */
-+                      if (pass == PASS_SCAN &&
-+                          JFS_HAS_COMPAT_FEATURE(journal,
-+                                  JFS_FEATURE_COMPAT_CHECKSUM)) {
-+                              int chksum_err, chksum_seen;
-+                              struct commit_header *cbh =
-+                                      (struct commit_header *)bh->b_data;
-+                              unsigned found_chksum =
-+                                              be32_to_cpu(cbh->h_chksum[0]);
-+
-+                              chksum_err = chksum_seen = 0;
-+
-+                              if (info->end_transaction) {
-+                                      printk(KERN_ERR "JBD: Transaction %u "
-+                                              "found to be corrupt.\n",
-+                                              next_commit_ID - 1);
-+                                      brelse(bh);
-+                                      break;
-+                              }
-+
-+                              if (crc32_sum == found_chksum &&
-+                                  cbh->h_chksum_type == JFS_CRC32_CHKSUM &&
-+                                  cbh->h_chksum_size ==
-+                                              JFS_CRC32_CHKSUM_SIZE) {
-+                                     chksum_seen = 1;
-+                              } else if (!(cbh->h_chksum_type == 0 &&
-+                                           cbh->h_chksum_size == 0 &&
-+                                           found_chksum == 0 &&
-+                                           !chksum_seen)) {
-+                              /*
-+                               * If fs is mounted using an old kernel and then
-+                               * kernel with journal_chksum is used then we
-+                               * get a situation where the journal flag has
-+                               * checksum flag set but checksums are not
-+                               * present i.e chksum = 0, in the individual
-+                               * commit blocks.
-+                               * Hence to avoid checksum failures, in this
-+                               * situation, this extra check is added.
-+                               */
-+                                              chksum_err = 1;
-+                              }
-+
-+                              if (chksum_err) {
-+                                      info->end_transaction = next_commit_ID;
-+
-+                                      if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+                                          JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){
-+                                              printk(KERN_ERR
-+                                                     "JBD: Transaction %u "
-+                                                     "found to be corrupt.\n",
-+                                                     next_commit_ID);
-+                                              brelse(bh);
-+                                              break;
-+                                      }
-+                              }
-+                              crc32_sum = ~0;
-+                      }
-                       brelse(bh);
-                       next_commit_ID++;
-                       continue;
-@@ -547,9 +684,10 @@
-        * transaction marks the end of the valid log.
-        */
--      if (pass == PASS_SCAN)
--              info->end_transaction = next_commit_ID;
--      else {
-+      if (pass == PASS_SCAN) {
-+              if (!info->end_transaction)
-+                      info->end_transaction = next_commit_ID;
-+      } else {
-               /* It's really bad news if different passes end up at
-                * different places (but possible due to IO errors). */
-               if (info->end_transaction != next_commit_ID) {
-Index: linux-2.6.18-128.1.6/fs/jbd/journal.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/journal.c 2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/journal.c      2009-06-02 23:26:07.000000000 -0600
-@@ -67,6 +67,7 @@
- EXPORT_SYMBOL(journal_check_used_features);
- EXPORT_SYMBOL(journal_check_available_features);
- EXPORT_SYMBOL(journal_set_features);
-+EXPORT_SYMBOL(journal_clear_features);
- EXPORT_SYMBOL(journal_create);
- EXPORT_SYMBOL(journal_load);
- EXPORT_SYMBOL(journal_destroy);
-@@ -1583,6 +1584,33 @@
-       return 1;
- }
-+/**
-+ * int journal_clear_features () - Clear a given journal feature in the superblock
-+ * @journal: Journal to act on.
-+ * @compat: bitmask of compatible features
-+ * @ro: bitmask of features that force read-only mount
-+ * @incompat: bitmask of incompatible features
-+ *
-+ * Clear a given journal feature as present on the
-+ * superblock.  Returns true if the requested features could be reset.
-+ *
-+ */
-+int journal_clear_features (journal_t *journal, unsigned long compat,
-+                        unsigned long ro, unsigned long incompat)
-+{
-+      journal_superblock_t *sb;
-+
-+      jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
-+                compat, ro, incompat);
-+
-+      sb = journal->j_superblock;
-+
-+      sb->s_feature_compat    &= ~cpu_to_be32(compat);
-+      sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
-+      sb->s_feature_incompat  &= ~cpu_to_be32(incompat);
-+
-+      return 1;
-+}
- /**
-  * int journal_update_format () - Update on-disk journal structure.
-Index: linux-2.6.18-128.1.6/fs/Kconfig
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/Kconfig       2009-04-14 21:05:39.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/Kconfig    2009-06-02 23:26:07.000000000 -0600
-@@ -206,6 +206,7 @@
- config JBD
-       tristate
-+      select CRC32
-       help
-         This is a generic journaling layer for block devices.  It is
-         currently used by the ext3 and OCFS2 file systems, but it could
-Index: linux-2.6.18-128.1.6/include/linux/jbd.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/jbd.h      2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/jbd.h   2009-06-02 23:26:07.000000000 -0600
-@@ -148,6 +148,29 @@
-       __be32          h_sequence;
- } journal_header_t;
-+/*
-+ * Checksum types.
-+ */
-+#define JFS_CRC32_CHKSUM   1
-+#define JFS_MD5_CHKSUM     2
-+#define JFS_SHA1_CHKSUM    3
-+
-+#define JFS_CRC32_CHKSUM_SIZE 4
-+
-+#define JFS_CHECKSUM_BYTES (32 / sizeof(u32))
-+/*
-+ * Commit block header for storing transactional checksums:
-+ */
-+struct commit_header
-+{
-+      __be32          h_magic;
-+      __be32          h_blocktype;
-+      __be32          h_sequence;
-+      unsigned char   h_chksum_type;
-+      unsigned char   h_chksum_size;
-+      unsigned char   h_padding[2];
-+      __be32          h_chksum[JFS_CHECKSUM_BYTES];
-+};
- /* 
-  * The block tag: used to describe a single buffer in the journal 
-@@ -234,12 +257,16 @@
-       ((j)->j_format_version >= 2 &&                                  \
-        ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
--#define JFS_FEATURE_INCOMPAT_REVOKE   0x00000001
-+#define JFS_FEATURE_COMPAT_CHECKSUM   0x00000001
-+
-+#define JFS_FEATURE_INCOMPAT_REVOKE           0x00000001
-+#define JFS_FEATURE_INCOMPAT_ASYNC_COMMIT     0x00000004
- /* Features known to this kernel version: */
--#define JFS_KNOWN_COMPAT_FEATURES     0
-+#define JFS_KNOWN_COMPAT_FEATURES     JFS_FEATURE_COMPAT_CHECKSUM
- #define JFS_KNOWN_ROCOMPAT_FEATURES   0
--#define JFS_KNOWN_INCOMPAT_FEATURES   JFS_FEATURE_INCOMPAT_REVOKE
-+#define JFS_KNOWN_INCOMPAT_FEATURES   (JFS_FEATURE_INCOMPAT_REVOKE | \
-+                                      JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)
- #ifdef __KERNEL__
-@@ -1053,6 +1080,8 @@
-                  (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int       journal_set_features 
-                  (journal_t *, unsigned long, unsigned long, unsigned long);
-+extern int       journal_clear_features
-+                 (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int       journal_create     (journal_t *);
- extern int       journal_load       (journal_t *journal);
- #ifndef __GENKSYMS__
-Index: linux-2.6.18-128.1.6/Documentation/filesystems/ext3.txt
-===================================================================
---- linux-2.6.18-128.1.6.orig/Documentation/filesystems/ext3.txt       2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/Documentation/filesystems/ext3.txt    2009-06-02 23:26:07.000000000 -0600
-@@ -14,6 +14,16 @@
- When mounting an ext3 filesystem, the following option are accepted:
- (*) == default
-+journal_checksum      Enable checksumming of the journal transactions.
-+                      This will allow the recovery code in e2fsck and the
-+                      kernel to detect corruption in the kernel.  It is a
-+                      compatible change and will be ignored by older kernels.
-+
-+journal_async_commit  Commit block can be written to disk without waiting
-+                      for descriptor blocks. If enabled older kernels cannot
-+                      mount the device. This will enable 'journal_checksum'
-+                      internally.
-+
- journal=update                Update the ext3 file system's journal to the current
-                       format.
diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch
deleted file mode 100644 (file)
index 67832a6..0000000
+++ /dev/null
@@ -1,743 +0,0 @@
-Index: linux-2.6.18-128.1.6/include/linux/jbd.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/jbd.h      2009-06-02 23:22:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/jbd.h   2009-06-02 23:24:00.000000000 -0600
-@@ -428,6 +428,16 @@
- };
-+/*
-+ * Some stats for checkpoint phase
-+ */
-+struct transaction_chp_stats_s {
-+      unsigned long           cs_chp_time;
-+      unsigned long           cs_forced_to_close;
-+      unsigned long           cs_written;
-+      unsigned long           cs_dropped;
-+};
-+
- /* The transaction_t type is the guts of the journaling mechanism.  It
-  * tracks a compound transaction through its various states:
-  *
-@@ -565,6 +575,21 @@
-       spinlock_t              t_handle_lock;
-       /*
-+       * Longest time some handle had to wait for running transaction
-+       */
-+      unsigned long           t_max_wait;
-+
-+      /*
-+       * When transaction started
-+       */
-+      unsigned long           t_start;
-+
-+      /*
-+       * Checkpointing stats [j_checkpoint_sem]
-+       */
-+      struct transaction_chp_stats_s t_chp_stats;
-+
-+      /*
-        * Number of outstanding updates running on this transaction
-        * [t_handle_lock]
-        */
-@@ -604,6 +629,57 @@
-       struct list_head        t_jcb;
- };
-+struct transaction_run_stats_s {
-+      unsigned long           rs_wait;
-+      unsigned long           rs_running;
-+      unsigned long           rs_locked;
-+      unsigned long           rs_flushing;
-+      unsigned long           rs_logging;
-+
-+      unsigned long           rs_handle_count;
-+      unsigned long           rs_blocks;
-+      unsigned long           rs_blocks_logged;
-+};
-+
-+struct transaction_stats_s
-+{
-+      int                     ts_type;
-+      unsigned long           ts_tid;
-+      union {
-+              struct transaction_run_stats_s run;
-+              struct transaction_chp_stats_s chp;
-+      } u;
-+};
-+
-+#define JBD_STATS_RUN         1
-+#define JBD_STATS_CHECKPOINT  2
-+
-+#define ts_wait                       u.run.rs_wait
-+#define ts_running            u.run.rs_running
-+#define ts_locked             u.run.rs_locked
-+#define ts_flushing           u.run.rs_flushing
-+#define ts_logging            u.run.rs_logging
-+#define ts_handle_count               u.run.rs_handle_count
-+#define ts_blocks             u.run.rs_blocks
-+#define ts_blocks_logged      u.run.rs_blocks_logged
-+
-+#define ts_chp_time           u.chp.cs_chp_time
-+#define ts_forced_to_close    u.chp.cs_forced_to_close
-+#define ts_written            u.chp.cs_written
-+#define ts_dropped            u.chp.cs_dropped
-+
-+#define CURRENT_MSECS         (jiffies_to_msecs(jiffies))
-+
-+static inline unsigned int
-+jbd_time_diff(unsigned int start, unsigned int end)
-+{
-+      if (unlikely(start > end))
-+              end = end + (~0UL - start);
-+      else
-+              end -= start;
-+      return end;
-+}
-+
- /**
-  * struct journal_s - The journal_s type is the concrete type associated with
-  *     journal_t.
-@@ -857,6 +933,16 @@
-       pid_t                   j_last_sync_writer;
-       /*
-+       *
-+       */
-+      struct transaction_stats_s *j_history;
-+      int                     j_history_max;
-+      int                     j_history_cur;
-+      spinlock_t              j_history_lock;
-+      struct proc_dir_entry   *j_proc_entry;
-+      struct transaction_stats_s j_stats;
-+
-+      /*
-        * An opaque pointer to fs-private information.  ext3 puts its
-        * superblock pointer here
-        */
-Index: linux-2.6.18-128.1.6/fs/jbd/transaction.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/transaction.c     2009-06-02 23:22:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/transaction.c  2009-06-02 23:24:00.000000000 -0600
-@@ -60,6 +60,8 @@
-       J_ASSERT(journal->j_running_transaction == NULL);
-       journal->j_running_transaction = transaction;
-+      transaction->t_max_wait = 0;
-+      transaction->t_start = CURRENT_MSECS;
-       return transaction;
- }
-@@ -86,6 +88,7 @@
-       int nblocks = handle->h_buffer_credits;
-       transaction_t *new_transaction = NULL;
-       int ret = 0;
-+      unsigned long ts = CURRENT_MSECS;
-       if (nblocks > journal->j_max_transaction_buffers) {
-               printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
-@@ -219,6 +222,12 @@
-       /* OK, account for the buffers that this operation expects to
-        * use and add the handle to the running transaction. */
-+      if (time_after(transaction->t_start, ts)) {
-+              ts = jbd_time_diff(ts, transaction->t_start);
-+              if (ts > transaction->t_max_wait)
-+                      transaction->t_max_wait= ts;
-+      }
-+
-       handle->h_transaction = transaction;
-       transaction->t_outstanding_credits += nblocks;
-       transaction->t_updates++;
-Index: linux-2.6.18-128.1.6/fs/jbd/journal.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/journal.c 2009-06-02 23:23:03.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/journal.c      2009-06-02 23:24:00.000000000 -0600
-@@ -36,6 +36,7 @@
- #include <linux/kthread.h>
- #include <linux/poison.h>
- #include <linux/proc_fs.h>
-+#include <linux/seq_file.h>
- #include <asm/uaccess.h>
- #include <asm/page.h>
-@@ -638,6 +639,300 @@
-       return journal_add_journal_head(bh);
- }
-+struct jbd_stats_proc_session {
-+      journal_t *journal;
-+      struct transaction_stats_s *stats;
-+      int start;
-+      int max;
-+};
-+
-+static void *jbd_history_skip_empty(struct jbd_stats_proc_session *s,
-+                                      struct transaction_stats_s *ts,
-+                                      int first)
-+{
-+      if (ts == s->stats + s->max)
-+              ts = s->stats;
-+      if (!first && ts == s->stats + s->start)
-+              return NULL;
-+      while (ts->ts_type == 0) {
-+              ts++;
-+              if (ts == s->stats + s->max)
-+                      ts = s->stats;
-+              if (ts == s->stats + s->start)
-+                      return NULL;
-+      }
-+      return ts;
-+
-+}
-+
-+static void *jbd_seq_history_start(struct seq_file *seq, loff_t *pos)
-+{
-+      struct jbd_stats_proc_session *s = seq->private;
-+      struct transaction_stats_s *ts;
-+      int l = *pos;
-+
-+      if (l == 0)
-+              return SEQ_START_TOKEN;
-+      ts = jbd_history_skip_empty(s, s->stats + s->start, 1);
-+      if (!ts)
-+              return NULL;
-+      while (--l && (ts = jbd_history_skip_empty(s, ++ts, 0)) != NULL);
-+      return ts;
-+}
-+
-+static void *jbd_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
-+{
-+      struct jbd_stats_proc_session *s = seq->private;
-+      struct transaction_stats_s *ts = v;
-+
-+      ++*pos;
-+      if (v == SEQ_START_TOKEN)
-+              return jbd_history_skip_empty(s, s->stats + s->start, 1);
-+      else
-+              return jbd_history_skip_empty(s, ++ts, 0);
-+}
-+
-+static int jbd_seq_history_show(struct seq_file *seq, void *v)
-+{
-+      struct transaction_stats_s *ts = v;
-+      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
-+                              "%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
-+                              "wait", "run", "lock", "flush", "log", "hndls",
-+                              "block", "inlog", "ctime", "write", "drop",
-+                              "close");
-+              return 0;
-+      }
-+      if (ts->ts_type == JBD_STATS_RUN)
-+              seq_printf(seq, "%-4s %-5lu %-5lu %-5lu %-5lu %-5lu %-5lu "
-+                              "%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
-+                              ts->ts_wait, ts->ts_running, ts->ts_locked,
-+                              ts->ts_flushing, ts->ts_logging,
-+                              ts->ts_handle_count, ts->ts_blocks,
-+                              ts->ts_blocks_logged);
-+      else if (ts->ts_type == JBD_STATS_CHECKPOINT)
-+              seq_printf(seq, "%-4s %-5lu %48s %-5lu %-5lu %-5lu %-5lu\n",
-+                              "C", ts->ts_tid, " ", ts->ts_chp_time,
-+                              ts->ts_written, ts->ts_dropped,
-+                              ts->ts_forced_to_close);
-+      else
-+              J_ASSERT(0);
-+      return 0;
-+}
-+
-+static void jbd_seq_history_stop(struct seq_file *seq, void *v)
-+{
-+}
-+
-+static struct seq_operations jbd_seq_history_ops = {
-+      .start  = jbd_seq_history_start,
-+      .next   = jbd_seq_history_next,
-+      .stop   = jbd_seq_history_stop,
-+      .show   = jbd_seq_history_show,
-+};
-+
-+static int jbd_seq_history_open(struct inode *inode, struct file *file)
-+{
-+      journal_t *journal = PDE(inode)->data;
-+      struct jbd_stats_proc_session *s;
-+      int rc, size;
-+
-+      s = kmalloc(sizeof(*s), GFP_KERNEL);
-+      if (s == NULL)
-+              return -EIO;
-+      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-+      s->stats = kmalloc(size, GFP_KERNEL);
-+      if (s->stats == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
-+      spin_lock(&journal->j_history_lock);
-+      memcpy(s->stats, journal->j_history, size);
-+      s->max = journal->j_history_max;
-+      s->start = journal->j_history_cur % s->max;
-+      spin_unlock(&journal->j_history_lock);
-+
-+      rc = seq_open(file, &jbd_seq_history_ops);
-+      if (rc == 0) {
-+              struct seq_file *m = (struct seq_file *)file->private_data;
-+              m->private = s;
-+      } else {
-+              kfree(s->stats);
-+              kfree(s);
-+      }
-+      return rc;
-+
-+}
-+
-+static int jbd_seq_history_release(struct inode *inode, struct file *file)
-+{
-+      struct seq_file *seq = (struct seq_file *)file->private_data;
-+      struct jbd_stats_proc_session *s = seq->private;
-+      kfree(s->stats);
-+      kfree(s);
-+      return seq_release(inode, file);
-+}
-+
-+static struct file_operations jbd_seq_history_fops = {
-+      .owner          = THIS_MODULE,
-+      .open           = jbd_seq_history_open,
-+      .read           = seq_read,
-+      .llseek         = seq_lseek,
-+      .release        = jbd_seq_history_release,
-+};
-+
-+static void *jbd_seq_info_start(struct seq_file *seq, loff_t *pos)
-+{
-+      return *pos ? NULL : SEQ_START_TOKEN;
-+}
-+
-+static void *jbd_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
-+{
-+      return NULL;
-+}
-+
-+static int jbd_seq_info_show(struct seq_file *seq, void *v)
-+{
-+      struct jbd_stats_proc_session *s = seq->private;
-+      if (v != SEQ_START_TOKEN)
-+              return 0;
-+      seq_printf(seq, "%lu transaction, each upto %u blocks\n",
-+                      s->stats->ts_tid,
-+                      s->journal->j_max_transaction_buffers);
-+      if (s->stats->ts_tid == 0)
-+              return 0;
-+      seq_printf(seq, "average: \n  %lums waiting for transaction\n",
-+                      s->stats->ts_wait / s->stats->ts_tid);
-+      seq_printf(seq, "  %lums running transaction\n",
-+                      s->stats->ts_running / s->stats->ts_tid);
-+      seq_printf(seq, "  %lums transaction was being locked\n",
-+                      s->stats->ts_locked / s->stats->ts_tid);
-+      seq_printf(seq, "  %lums flushing data (in ordered mode)\n",
-+                      s->stats->ts_flushing / s->stats->ts_tid);
-+      seq_printf(seq, "  %lums logging transaction\n",
-+                      s->stats->ts_logging / s->stats->ts_tid);
-+      seq_printf(seq, "  %lu handles per transaction\n",
-+                      s->stats->ts_handle_count / s->stats->ts_tid);
-+      seq_printf(seq, "  %lu blocks per transaction\n",
-+                      s->stats->ts_blocks / s->stats->ts_tid);
-+      seq_printf(seq, "  %lu logged blocks per transaction\n",
-+                      s->stats->ts_blocks_logged / s->stats->ts_tid);
-+      return 0;
-+}
-+
-+static void jbd_seq_info_stop(struct seq_file *seq, void *v)
-+{
-+}
-+
-+static struct seq_operations jbd_seq_info_ops = {
-+      .start  = jbd_seq_info_start,
-+      .next   = jbd_seq_info_next,
-+      .stop   = jbd_seq_info_stop,
-+      .show   = jbd_seq_info_show,
-+};
-+
-+static int jbd_seq_info_open(struct inode *inode, struct file *file)
-+{
-+      journal_t *journal = PDE(inode)->data;
-+      struct jbd_stats_proc_session *s;
-+      int rc, size;
-+
-+      s = kmalloc(sizeof(*s), GFP_KERNEL);
-+      if (s == NULL)
-+              return -EIO;
-+      size = sizeof(struct transaction_stats_s);
-+      s->stats = kmalloc(size, GFP_KERNEL);
-+      if (s->stats == NULL) {
-+              kfree(s);
-+              return -EIO;
-+      }
-+      spin_lock(&journal->j_history_lock);
-+      memcpy(s->stats, &journal->j_stats, size);
-+      s->journal = journal;
-+      spin_unlock(&journal->j_history_lock);
-+
-+      rc = seq_open(file, &jbd_seq_info_ops);
-+      if (rc == 0) {
-+              struct seq_file *m = (struct seq_file *)file->private_data;
-+              m->private = s;
-+      } else {
-+              kfree(s->stats);
-+              kfree(s);
-+      }
-+      return rc;
-+
-+}
-+
-+static int jbd_seq_info_release(struct inode *inode, struct file *file)
-+{
-+      struct seq_file *seq = (struct seq_file *)file->private_data;
-+      struct jbd_stats_proc_session *s = seq->private;
-+      kfree(s->stats);
-+      kfree(s);
-+      return seq_release(inode, file);
-+}
-+
-+static struct file_operations jbd_seq_info_fops = {
-+      .owner          = THIS_MODULE,
-+      .open           = jbd_seq_info_open,
-+      .read           = seq_read,
-+      .llseek         = seq_lseek,
-+      .release        = jbd_seq_info_release,
-+};
-+
-+static struct proc_dir_entry *proc_jbd_stats = NULL;
-+
-+static void jbd_stats_proc_init(journal_t *journal)
-+{
-+      char name[64];
-+
-+      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
-+      journal->j_proc_entry = proc_mkdir(name, proc_jbd_stats);
-+      if (journal->j_proc_entry) {
-+              struct proc_dir_entry *p;
-+              p = create_proc_entry("history", S_IRUGO,
-+                              journal->j_proc_entry);
-+              if (p) {
-+                      p->proc_fops = &jbd_seq_history_fops;
-+                      p->data = journal;
-+                      p = create_proc_entry("info", S_IRUGO,
-+                                              journal->j_proc_entry);
-+                      if (p) {
-+                              p->proc_fops = &jbd_seq_info_fops;
-+                              p->data = journal;
-+                      }
-+              }
-+      }
-+}
-+
-+static void jbd_stats_proc_exit(journal_t *journal)
-+{
-+      char name[64];
-+
-+      snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
-+      remove_proc_entry("info", journal->j_proc_entry);
-+      remove_proc_entry("history", journal->j_proc_entry);
-+      remove_proc_entry(name, proc_jbd_stats);
-+}
-+
-+static void journal_init_stats(journal_t *journal)
-+{
-+      int size;
-+
-+      if (proc_jbd_stats == NULL)
-+              return;
-+
-+      journal->j_history_max = 100;
-+      size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-+      journal->j_history = kmalloc(size, GFP_KERNEL);
-+      if (journal->j_history == NULL) {
-+              journal->j_history_max = 0;
-+              return;
-+      }
-+      memset(journal->j_history, 0, size);
-+      spin_lock_init(&journal->j_history_lock);
-+}
-+
- /*
-  * Management for journal control blocks: functions to create and
-  * destroy journal_t structures, and to initialise and read existing
-@@ -680,6 +975,9 @@
-               kfree(journal);
-               goto fail;
-       }
-+
-+      journal_init_stats(journal);
-+
-       return journal;
- fail:
-       return NULL;
-@@ -723,6 +1021,7 @@
-       journal->j_blk_offset = start;
-       journal->j_maxlen = len;
-       journal->j_blocksize = blocksize;
-+      jbd_stats_proc_init(journal);
-       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
-       J_ASSERT(bh != NULL);
-@@ -772,6 +1071,7 @@
-       journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
-       journal->j_blocksize = inode->i_sb->s_blocksize;
-+      jbd_stats_proc_init(journal);
-       /* journal descriptor can store up to n blocks -bzzz */
-       n = journal->j_blocksize / sizeof(journal_block_tag_t);
-@@ -1168,6 +1468,8 @@
-               brelse(journal->j_sb_buffer);
-       }
-+      if (journal->j_proc_entry)
-+              jbd_stats_proc_exit(journal);
-       if (journal->j_inode)
-               iput(journal->j_inode);
-       if (journal->j_revoke)
-@@ -2015,6 +2317,28 @@
- #endif
-+#if defined(CONFIG_PROC_FS)
-+
-+#define JBD_STATS_PROC_NAME "fs/jbd"
-+
-+static void __init create_jbd_stats_proc_entry(void)
-+{
-+      proc_jbd_stats = proc_mkdir(JBD_STATS_PROC_NAME, NULL);
-+}
-+
-+static void __exit remove_jbd_stats_proc_entry(void)
-+{
-+      if (proc_jbd_stats)
-+              remove_proc_entry(JBD_STATS_PROC_NAME, NULL);
-+}
-+
-+#else
-+
-+#define create_jbd_stats_proc_entry() do {} while (0)
-+#define remove_jbd_stats_proc_entry() do {} while (0)
-+
-+#endif
-+
- kmem_cache_t *jbd_handle_cache;
- static int __init journal_init_handle_cache(void)
-@@ -2078,6 +2402,7 @@
-       if (ret != 0)
-               journal_destroy_caches();
-       create_jbd_proc_entry();
-+      create_jbd_stats_proc_entry();
-       return ret;
- }
-@@ -2089,6 +2414,7 @@
-               printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
- #endif
-       remove_jbd_proc_entry();
-+      remove_jbd_stats_proc_entry();
-       journal_destroy_caches();
- }
-Index: linux-2.6.18-128.1.6/fs/jbd/checkpoint.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/checkpoint.c      2009-06-02 23:22:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/checkpoint.c   2009-06-02 23:24:00.000000000 -0600
-@@ -242,7 +242,7 @@
-  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
-  */
- static int __process_buffer(journal_t *journal, struct journal_head *jh,
--                      struct buffer_head **bhs, int *batch_count)
-+                      struct buffer_head **bhs, int *batch_count, transaction_t *transaction)
- {
-       struct buffer_head *bh = jh2bh(jh);
-       int ret = 0;
-@@ -260,6 +260,7 @@
-               transaction_t *t = jh->b_transaction;
-               tid_t tid = t->t_tid;
-+              transaction->t_chp_stats.cs_forced_to_close++;
-               spin_unlock(&journal->j_list_lock);
-               jbd_unlock_bh_state(bh);
-               log_start_commit(journal, tid);
-@@ -291,6 +292,7 @@
-               bhs[*batch_count] = bh;
-               __buffer_relink_io(jh);
-               jbd_unlock_bh_state(bh);
-+              transaction->t_chp_stats.cs_written++;
-               (*batch_count)++;
-               if (*batch_count == NR_BATCH) {
-                       spin_unlock(&journal->j_list_lock);
-@@ -336,6 +338,8 @@
-       if (!journal->j_checkpoint_transactions)
-               goto out;
-       transaction = journal->j_checkpoint_transactions;
-+      if (transaction->t_chp_stats.cs_chp_time == 0)
-+              transaction->t_chp_stats.cs_chp_time = CURRENT_MSECS;
-       this_tid = transaction->t_tid;
- restart:
-       /*
-@@ -360,7 +364,8 @@
-                               retry = 1;
-                               break;
-                       }
--                      retry = __process_buffer(journal, jh, bhs,&batch_count);
-+                      retry = __process_buffer(journal, jh, bhs,&batch_count,
-+                                               transaction);
-                       if (retry < 0 && !result)
-                               result = retry;
-                       if (!retry && lock_need_resched(&journal->j_list_lock)){
-@@ -692,6 +697,8 @@
- void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
- {
-+      struct transaction_stats_s stats;
-+
-       assert_spin_locked(&journal->j_list_lock);
-       if (transaction->t_cpnext) {
-               transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
-@@ -718,5 +725,25 @@
-       J_ASSERT(journal->j_running_transaction != transaction);
-       jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
-+
-+      /*
-+       * File the transaction for history
-+       */
-+      if (transaction->t_chp_stats.cs_written != 0 ||
-+                      transaction->t_chp_stats.cs_chp_time != 0) {
-+              stats.ts_type = JBD_STATS_CHECKPOINT;
-+              stats.ts_tid = transaction->t_tid;
-+              stats.u.chp = transaction->t_chp_stats;
-+              if (stats.ts_chp_time)
-+                      stats.ts_chp_time =
-+                              jbd_time_diff(stats.ts_chp_time, CURRENT_MSECS);
-+              spin_lock(&journal->j_history_lock);
-+              memcpy(journal->j_history + journal->j_history_cur, &stats,
-+                              sizeof(stats));
-+              if (++journal->j_history_cur == journal->j_history_max)
-+                      journal->j_history_cur = 0;
-+              spin_unlock(&journal->j_history_lock);
-+      }
-+
-       kfree(transaction);
- }
-Index: linux-2.6.18-128.1.6/fs/jbd/commit.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/commit.c  2009-06-02 23:22:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/commit.c       2009-06-02 23:24:00.000000000 -0600
-@@ -13,6 +13,7 @@
-  * part of the ext2fs journaling system.
-  */
-+#include <linux/jiffies.h>
- #include <linux/time.h>
- #include <linux/fs.h>
- #include <linux/jbd.h>
-@@ -22,6 +23,7 @@
- #include <linux/pagemap.h>
- #include <linux/smp_lock.h>
-+
- /*
-  * Default IO end handler for temporary BJ_IO buffer_heads.
-  */
-@@ -288,6 +290,7 @@
-  */
- void journal_commit_transaction(journal_t *journal)
- {
-+      struct transaction_stats_s stats;
-       transaction_t *commit_transaction;
-       struct journal_head *jh, *new_jh, *descriptor;
-       struct buffer_head **wbuf = journal->j_wbuf;
-@@ -334,6 +337,11 @@
-       spin_lock(&journal->j_state_lock);
-       commit_transaction->t_state = T_LOCKED;
-+      stats.ts_wait = commit_transaction->t_max_wait;
-+      stats.ts_locked = CURRENT_MSECS;
-+      stats.ts_running = jbd_time_diff(commit_transaction->t_start,
-+                                              stats.ts_locked);
-+
-       spin_lock(&commit_transaction->t_handle_lock);
-       while (commit_transaction->t_updates) {
-               DEFINE_WAIT(wait);
-@@ -404,6 +412,9 @@
-        */
-       journal_switch_revoke_table(journal);
-+      stats.ts_flushing = CURRENT_MSECS;
-+      stats.ts_locked = jbd_time_diff(stats.ts_locked, stats.ts_flushing);
-+
-       commit_transaction->t_state = T_FLUSH;
-       journal->j_committing_transaction = commit_transaction;
-       journal->j_running_transaction = NULL;
-@@ -484,6 +495,11 @@
-       J_ASSERT(commit_transaction->t_nr_buffers <=
-                commit_transaction->t_outstanding_credits);
-+      stats.ts_logging = CURRENT_MSECS;
-+      stats.ts_flushing = jbd_time_diff(stats.ts_flushing, stats.ts_logging);
-+      stats.ts_blocks = commit_transaction->t_outstanding_credits;
-+      stats.ts_blocks_logged = 0;
-+
-       descriptor = NULL;
-       bufs = 0;
-       while (commit_transaction->t_buffers) {
-@@ -633,6 +649,7 @@
-                               submit_bh(WRITE, bh);
-                       }
-                       cond_resched();
-+                      stats.ts_blocks_logged += bufs;
-                       /* Force a new descriptor to be generated next
-                            time round the loop. */
-@@ -832,6 +849,7 @@
-               cp_transaction = jh->b_cp_transaction;
-               if (cp_transaction) {
-                       JBUFFER_TRACE(jh, "remove from old cp transaction");
-+                      cp_transaction->t_chp_stats.cs_dropped++;
-                       __journal_remove_checkpoint(jh);
-               }
-@@ -908,6 +926,36 @@
-       J_ASSERT(commit_transaction->t_state == T_COMMIT);
-+      commit_transaction->t_start = CURRENT_MSECS;
-+      stats.ts_logging = jbd_time_diff(stats.ts_logging,
-+                                       commit_transaction->t_start);
-+
-+      /*
-+       * File the transaction for history
-+       */
-+      stats.ts_type = JBD_STATS_RUN;
-+      stats.ts_tid = commit_transaction->t_tid;
-+      stats.ts_handle_count = commit_transaction->t_handle_count;
-+      spin_lock(&journal->j_history_lock);
-+      memcpy(journal->j_history + journal->j_history_cur, &stats,
-+                      sizeof(stats));
-+      if (++journal->j_history_cur == journal->j_history_max)
-+              journal->j_history_cur = 0;
-+
-+      /*
-+       * Calculate overall stats
-+       */
-+      journal->j_stats.ts_tid++;
-+      journal->j_stats.ts_wait += stats.ts_wait;
-+      journal->j_stats.ts_running += stats.ts_running;
-+      journal->j_stats.ts_locked += stats.ts_locked;
-+      journal->j_stats.ts_flushing += stats.ts_flushing;
-+      journal->j_stats.ts_logging += stats.ts_logging;
-+      journal->j_stats.ts_handle_count += stats.ts_handle_count;
-+      journal->j_stats.ts_blocks += stats.ts_blocks;
-+      journal->j_stats.ts_blocks_logged += stats.ts_blocks_logged;
-+      spin_unlock(&journal->j_history_lock);
-+
-       commit_transaction->t_state = T_FINISHED;
-       J_ASSERT(commit_transaction == journal->j_committing_transaction);
-       journal->j_commit_sequence = commit_transaction->t_tid;
diff --git a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
deleted file mode 100644 (file)
index 4a57ec3..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-This patch is no longer needed for Lustre, since Lustre 2.2.  It is kept
-in the kernel patch series for compatibility with older Lustre releases
-to simplify the upgrade process so that both the kernel and Lustre do
-not need to be upgraded at the same time.  See Jira issue LU-433.
-
-Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h     2009-04-15 08:35:28.000000000 +0530
-+++ linux-2.6.18-128.1.6/include/linux/jbd2.h  2009-05-28 15:10:18.000000000 +0530
-@@ -381,6 +381,27 @@
-       bit_spin_unlock(BH_JournalHead, &bh->b_state);
- }
-+#define HAVE_JOURNAL_CALLBACK_STATUS
-+/**
-+ *   struct journal_callback - Base structure for callback information.
-+ *   @jcb_list: list information for other callbacks attached to the same handle.
-+ *   @jcb_func: Function to call with this callback structure.
-+ *
-+ *   This struct is a 'seed' structure for a using with your own callback
-+ *   structs. If you are using callbacks you must allocate one of these
-+ *   or another struct of your own definition which has this struct
-+ *   as it's first element and pass it to journal_callback_set().
-+ *
-+ *   This is used internally by jbd2 to maintain callback information.
-+ *
-+ *   See journal_callback_set for more information.
-+ **/
-+struct journal_callback {
-+      struct list_head jcb_list;              /* t_jcb_lock */
-+      void (*jcb_func)(struct journal_callback *jcb, int error);
-+      /* user data goes here */
-+};
-+
- struct jbd2_revoke_table_s;
- /**
-@@ -389,6 +410,7 @@
-  * @h_transaction: Which compound transaction is this update a part of?
-  * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
-  * @h_ref: Reference count on this handle
-+ * @h_jcb: List of application registered callbacks for this handle.
-  * @h_err: Field for caller's use to track errors through large fs operations
-  * @h_sync: flag for sync-on-close
-  * @h_jdata: flag to force data journaling
-@@ -414,6 +436,13 @@
-       /* operations */
-       int                     h_err;
-+      /*
-+       * List of application registered callbacks for this handle. The
-+       * function(s) will be called after the transaction that this handle is
-+       * part of has been committed to disk. [t_jcb_lock]
-+       */
-+      struct list_head        h_jcb;
-+
-       /* Flags [no locking] */
-       unsigned int    h_sync:         1;      /* sync-on-close */
-       unsigned int    h_jdata:        1;      /* force data journaling */
-@@ -469,6 +498,8 @@
-  *    j_state_lock
-  *    ->j_list_lock                   (journal_unmap_buffer)
-  *
-+ *    t_handle_lock
-+ *    ->t_jcb_lock
-  */
- struct transaction_s
-@@ -615,6 +646,15 @@
-        */
-       int t_handle_count;
-+      /*
-+       * Protects the callback list
-+       */
-+      spinlock_t              t_jcb_lock;
-+      /*
-+       * List of registered callback functions for this transaction.
-+       * Called when the transaction is committed. [t_jcb_lock]
-+       */
-+      struct list_head        t_jcb;
-       /*
-        * For use by the filesystem to store fs-specific data
-        * structures associated with the transaction
-@@ -1018,6 +1058,9 @@
- extern int     jbd2_journal_flush (journal_t *);
- extern void    jbd2_journal_lock_updates (journal_t *);
- extern void    jbd2_journal_unlock_updates (journal_t *);
-+extern void    jbd2_journal_callback_set(handle_t *handle,
-+                                      void (*fn)(struct journal_callback *,int),
-+                                      struct journal_callback *jcb);
- extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
-                               struct block_device *fs_dev,
-Index: linux-2.6.18-128.1.6/fs/jbd2/checkpoint.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd2/checkpoint.c     2009-04-15 08:35:28.000000000 +0530
-+++ linux-2.6.18-128.1.6/fs/jbd2/checkpoint.c  2009-05-28 15:10:18.000000000 +0530
-@@ -695,6 +695,7 @@
-       J_ASSERT(transaction->t_checkpoint_list == NULL);
-       J_ASSERT(transaction->t_checkpoint_io_list == NULL);
-       J_ASSERT(transaction->t_updates == 0);
-+      J_ASSERT(list_empty(&transaction->t_jcb));
-       J_ASSERT(journal->j_committing_transaction != transaction);
-       J_ASSERT(journal->j_running_transaction != transaction);
-Index: linux-2.6.18-128.1.6/fs/jbd2/commit.c
-===================================================================
---- linux-2.6.18-164.6.1/fs/jbd2/commit.c      2010-01-21 11:24:52.000000000 +0530
-+++ linux-2.6.18-164.6.1_new/fs/jbd2/commit.c  2010-01-21 11:26:36.000000000 +0530
-@@ -832,6 +832,29 @@ wait_for_iobuf:
-            processing: any buffers committed as a result of this
-            transaction can be removed from any checkpoint list it was on
-            before. */
-+      /*
-+       * Call any callbacks that had been registered for handles in this
-+       * transaction.  It is up to the callback to free any allocated
-+       * memory.
-+       *
-+       * The spinlocking (t_jcb_lock) here is surely unnecessary...
-+       */
-+      spin_lock(&commit_transaction->t_jcb_lock);
-+      if (!list_empty(&commit_transaction->t_jcb)) {
-+              struct list_head *p, *n;
-+              int error = is_journal_aborted(journal);
-+
-+              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
-+                      struct journal_callback *jcb;
-+
-+                      jcb = list_entry(p, struct journal_callback, jcb_list);
-+                      list_del(p);
-+                      spin_unlock(&commit_transaction->t_jcb_lock);
-+                      jcb->jcb_func(jcb, error);
-+                      spin_lock(&commit_transaction->t_jcb_lock);
-+              }
-+      }
-+      spin_unlock(&commit_transaction->t_jcb_lock);
-       jbd_debug(3, "JBD: commit phase 6\n");
-Index: linux-2.6.18-128.1.6/fs/jbd2/journal.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd2/journal.c        2009-04-15 08:35:28.000000000 +0530
-+++ linux-2.6.18-128.1.6/fs/jbd2/journal.c     2009-05-28 17:13:35.000000000 +0530
-@@ -80,6 +80,8 @@
- EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
- EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
- EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
-+EXPORT_SYMBOL(jbd2_journal_callback_set);
-+EXPORT_SYMBOL(jbd2_journal_bmap);
- static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
- static void __journal_abort_soft (journal_t *journal, int errno);
-Index: linux-2.6.18-128.1.6/fs/jbd2/transaction.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd2/transaction.c    2009-04-15 08:35:28.000000000 +0530
-+++ linux-2.6.18-128.1.6/fs/jbd2/transaction.c 2009-05-28 15:11:28.000000000 +0530
-@@ -51,6 +51,9 @@
-       spin_lock_init(&transaction->t_handle_lock);
-       INIT_LIST_HEAD(&transaction->t_inode_list);
-       INIT_LIST_HEAD(&transaction->t_private_list);
-+      INIT_LIST_HEAD(&transaction->t_jcb);
-+      spin_lock_init(&transaction->t_jcb_lock);
-+
-       /* Set up the commit timer for the new transaction. */
-       journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
-@@ -251,6 +254,7 @@
-       memset(handle, 0, sizeof(*handle));
-       handle->h_buffer_credits = nblocks;
-       handle->h_ref = 1;
-+      INIT_LIST_HEAD(&handle->h_jcb);
-       lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
-                                               &jbd2_handle_key, 0);
-@@ -1349,6 +1353,36 @@
- }
- /**
-+ * void jbd2_journal_callback_set() -  Register a callback function for this handle.
-+ * @handle: handle to attach the callback to.
-+ * @func: function to callback.
-+ * @jcb:  structure with additional information required by func() , and
-+ *    some space for jbd2 internal information.
-+ *
-+ * The function will be
-+ * called when the transaction that this handle is part of has been
-+ * committed to disk with the original callback data struct and the
-+ * error status of the journal as parameters.  There is no guarantee of
-+ * ordering between handles within a single transaction, nor between
-+ * callbacks registered on the same handle.
-+ *
-+ * The caller is responsible for allocating the journal_callback struct.
-+ * This is to allow the caller to add as much extra data to the callback
-+ * as needed, but reduce the overhead of multiple allocations.  The caller
-+ * allocated struct must start with a struct journal_callback at offset 0,
-+ * and has the caller-specific data afterwards.
-+ */
-+void jbd2_journal_callback_set(handle_t *handle,
-+                    void (*func)(struct journal_callback *jcb, int error),
-+                    struct journal_callback *jcb)
-+{
-+      spin_lock(&handle->h_transaction->t_jcb_lock);
-+      list_add_tail(&jcb->jcb_list, &handle->h_jcb);
-+      spin_unlock(&handle->h_transaction->t_jcb_lock);
-+      jcb->jcb_func = func;
-+}
-+
-+/**
-  * int jbd2_journal_stop() - complete a transaction
-  * @handle: tranaction to complete.
-  *
-@@ -1422,6 +1456,11 @@
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
-+      /* Move callbacks from the handle to the transaction. */
-+      spin_lock(&transaction->t_jcb_lock);
-+      list_splice(&handle->h_jcb, &transaction->t_jcb);
-+      spin_unlock(&transaction->t_jcb_lock);
-+
-       /*
-        * If the handle is marked SYNC, we need to set another commit
-        * going!  We also want to force a commit if the current
diff --git a/lustre/kernel_patches/patches/jbd2_stats_proc_init-wrong-place.patch b/lustre/kernel_patches/patches/jbd2_stats_proc_init-wrong-place.patch
deleted file mode 100644 (file)
index a37c894..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-From 42e140bf105aea1c9679b1cd128aebc35196e6fc Mon Sep 17 00:00:00 2001
-From: yangsheng <sheng.yang@oracle.com>
-Date: Mon, 15 Nov 2010 21:26:35 +0800
-Subject: [PATCH]  jbd2_stats_proc_init wrong place.
-
- The jbd2_stats_proc_init() was placed on wrong location in
- jbd2_journal_init_dev().  This may cause /proc/fs/jdb2/<dev>/*
- cannot be created when using external journal device.
-
- Reviewed-by: Andreas Dilger <andreas.dilger@oracle.com>
-
----
----
- fs/jbd2/journal.c |   16 ++++++++--------
- 1 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
-index c590d15..f837ba9 100644
---- a/fs/jbd2/journal.c
-+++ b/fs/jbd2/journal.c
-@@ -899,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
-       /* journal descriptor can store up to n blocks -bzzz */
-       journal->j_blocksize = blocksize;
-+      journal->j_dev = bdev;
-+      journal->j_fs_dev = fs_dev;
-+      journal->j_blk_offset = start;
-+      journal->j_maxlen = len;
-+      bdevname(journal->j_dev, journal->j_devname);
-+      p = journal->j_devname;
-+      while ((p = strchr(p, '/')))
-+              *p = '!';
-       jbd2_stats_proc_init(journal);
-       n = journal->j_blocksize / sizeof(journal_block_tag_t);
-       journal->j_wbufsize = n;
-@@ -908,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
-                       __func__);
-               goto out_err;
-       }
--      journal->j_dev = bdev;
--      journal->j_fs_dev = fs_dev;
--      journal->j_blk_offset = start;
--      journal->j_maxlen = len;
--      bdevname(journal->j_dev, journal->j_devname);
--      p = journal->j_devname;
--      while ((p = strchr(p, '/')))
--              *p = '!';
-       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
-       if (!bh) {
--- 
-1.7.2.3
-
diff --git a/lustre/kernel_patches/patches/lustre_iser_max_sectors_tuning_lustre2.0.patch b/lustre/kernel_patches/patches/lustre_iser_max_sectors_tuning_lustre2.0.patch
deleted file mode 100644 (file)
index 9f3c500..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-diff -Naur base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.c linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.c
---- base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.c  2010-09-09 16:57:15.000000000 -0400
-+++ linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.c       2010-09-09 17:02:17.000000000 -0400
-@@ -586,18 +586,25 @@
-       iser_conn_terminate(ib_conn);
- }
-+static int iscsi_iser_slave_configure(struct scsi_device *sdev)
-+{
-+      blk_queue_dma_alignment(sdev->request_queue, 0);
-+      return 0;
-+}
-+
- static struct scsi_host_template iscsi_iser_sht = {
-       .module                 = THIS_MODULE,
-       .name                   = "iSCSI Initiator over iSER, v." DRV_VER,
-       .queuecommand           = iscsi2_queuecommand,
-       .change_queue_depth     = iscsi2_change_queue_depth,
-       .sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
--      .max_sectors            = 1024,
-+      .max_sectors            = 0xffff,
-       .cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
-       .eh_abort_handler       = iscsi2_eh_abort,
-       .eh_device_reset_handler= iscsi2_eh_device_reset,
-       .eh_host_reset_handler= iscsi2_eh_target_reset,
-       .use_clustering         = DISABLE_CLUSTERING,
-+      .slave_configure        = iscsi_iser_slave_configure,
-       .proc_name              = "iscsi_iser",
-       .this_id                = -1,
- };
-diff -Naur base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.h linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.h
---- base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.h  2010-09-09 16:57:15.000000000 -0400
-+++ linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iscsi_iser.h       2010-09-09 17:03:17.000000000 -0400
-@@ -92,7 +92,8 @@
- #define MASK_4K       (~(SIZE_4K-1))
-                                       /* support upto 512KB in one RDMA */
--#define ISCSI_ISER_SG_TABLESIZE         (0x80000 >> SHIFT_4K)
-+/* FMR space for 1 MB of 4k-page transfers, plus 1 if not page aligned */
-+#define ISCSI_ISER_SG_TABLESIZE                (((1<<20) >> SHIFT_4K) + 1)
- #define ISER_DEF_CMD_PER_LUN          128
- /* QP settings */
-diff -Naur base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iser_verbs.c linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iser_verbs.c
---- base.linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iser_verbs.c  2010-09-09 16:57:15.000000000 -0400
-+++ linux-2.6.18.x86_64/drivers/infiniband/ulp/iser/iser_verbs.c       2010-09-09 17:04:44.000000000 -0400
-@@ -137,7 +137,7 @@
-       device = ib_conn->device;
-       ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
--                                  (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
-+                                  sizeof(u64) * ISCSI_ISER_SG_TABLESIZE,
-                                   GFP_KERNEL);
-       if (!ib_conn->page_vec) {
-               ret = -ENOMEM;
-@@ -146,9 +146,7 @@
-       ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
-       params.page_shift        = SHIFT_4K;
--      /* when the first/last SG element are not start/end *
--       * page aligned, the map whould be of N+1 pages     */
--      params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
-+      params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE;
-       /* make the pool size twice the max number of SCSI commands *
-        * the ML is expected to queue, watermark for unmap at 50%  */
-       params.pool_size         = ISCSI_DEF_XMIT_CMDS_MAX * 2;
-diff -Naur base.linux-2.6.18.x86_64/include/scsi/libiscsi2.h linux-2.6.18.x86_64/include/scsi/libiscsi2.h
---- base.linux-2.6.18.x86_64/include/scsi/libiscsi2.h  2010-09-09 16:57:35.000000000 -0400
-+++ linux-2.6.18.x86_64/include/scsi/libiscsi2.h       2010-09-09 17:05:34.000000000 -0400
-@@ -43,7 +43,7 @@
- struct iscsi_nopin;
- struct device;
--#define ISCSI_DEF_XMIT_CMDS_MAX       128     /* must be power of 2 */
-+#define ISCSI_DEF_XMIT_CMDS_MAX       256     /* must be power of 2 */
- #define ISCSI_MGMT_CMDS_MAX   15
- #define ISCSI_DEF_CMD_PER_LUN 32
diff --git a/lustre/kernel_patches/patches/md-avoid-bug_on-when-bmc-overflow.patch b/lustre/kernel_patches/patches/md-avoid-bug_on-when-bmc-overflow.patch
deleted file mode 100644 (file)
index f64557e..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-diff .prev/drivers/md/bitmap.c ./drivers/md/bitmap.c
---- .prev/drivers/md/bitmap.c  2007-02-07 13:03:56.000000000 +1100
-+++ ./drivers/md/bitmap.c      2007-02-07 21:34:47.000000000 +1100
-@@ -1160,6 +1160,22 @@ int bitmap_startwrite(struct bitmap *bit
-                       return 0;
-               }
-+              if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) {
-+                      DEFINE_WAIT(__wait);
-+                      /* note that it is safe to do the prepare_to_wait
-+                       * after the test as long as we do it before dropping
-+                       * the spinlock.
-+                       */
-+                      prepare_to_wait(&bitmap->overflow_wait, &__wait,
-+                                      TASK_UNINTERRUPTIBLE);
-+                      spin_unlock_irq(&bitmap->lock);
-+                      bitmap->mddev->queue
-+                              ->unplug_fn(bitmap->mddev->queue);
-+                      schedule();
-+                      finish_wait(&bitmap->overflow_wait, &__wait);
-+                      continue;
-+              }
-+
-               switch(*bmc) {
-               case 0:
-                       bitmap_file_set_bit(bitmap, offset);
-@@ -1169,7 +1185,7 @@ int bitmap_startwrite(struct bitmap *bit
-               case 1:
-                       *bmc = 2;
-               }
--              BUG_ON((*bmc & COUNTER_MAX) == COUNTER_MAX);
-+
-               (*bmc)++;
-               spin_unlock_irq(&bitmap->lock);
-@@ -1207,6 +1223,9 @@ void bitmap_endwrite(struct bitmap *bitm
-               if (!success && ! (*bmc & NEEDED_MASK))
-                       *bmc |= NEEDED_MASK;
-+              if ((*bmc & COUNTER_MAX) == COUNTER_MAX)
-+                      wake_up(&bitmap->overflow_wait);
-+
-               (*bmc)--;
-               if (*bmc <= 2) {
-                       set_page_attr(bitmap,
-@@ -1431,6 +1450,7 @@ int bitmap_create(mddev_t *mddev)
-       spin_lock_init(&bitmap->lock);
-       atomic_set(&bitmap->pending_writes, 0);
-       init_waitqueue_head(&bitmap->write_wait);
-+      init_waitqueue_head(&bitmap->overflow_wait);
-       bitmap->mddev = mddev;
-diff .prev/include/linux/raid/bitmap.h ./include/linux/raid/bitmap.h
---- .prev/include/linux/raid/bitmap.h  2007-02-07 13:03:56.000000000 +1100
-+++ ./include/linux/raid/bitmap.h      2007-02-07 20:57:57.000000000 +1100
-@@ -247,6 +247,7 @@ struct bitmap {
-       atomic_t pending_writes; /* pending writes to the bitmap file */
-       wait_queue_head_t write_wait;
-+      wait_queue_head_t overflow_wait;
- };
diff --git a/lustre/kernel_patches/patches/md-rebuild-policy.patch b/lustre/kernel_patches/patches/md-rebuild-policy.patch
deleted file mode 100644 (file)
index d42dae4..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-Index: linux-2.6.18-128.1.6/drivers/md/md.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/drivers/md/md.c  2009-04-14 21:05:26.000000000 -0600
-+++ linux-2.6.18-128.1.6/drivers/md/md.c       2009-06-02 23:25:31.000000000 -0600
-@@ -90,6 +90,8 @@
- static int sysctl_speed_limit_min = 1000;
- static int sysctl_speed_limit_max = 200000;
-+static int sysctl_rebuild_window_size = 256;
-+static int sysctl_disk_idle_size = 4096;
- static inline int speed_min(mddev_t *mddev)
- {
-       return mddev->sync_speed_min ?
-@@ -121,6 +123,22 @@
-               .mode           = S_IRUGO|S_IWUSR,
-               .proc_handler   = &proc_dointvec,
-       },
-+      {
-+              .ctl_name       = DEV_RAID_REBUILD_WINDOW,
-+              .procname       = "rebuild_window_size",
-+              .data           = &sysctl_rebuild_window_size,
-+              .maxlen         = sizeof(int),
-+              .mode           = S_IRUGO|S_IWUSR,
-+              .proc_handler   = &proc_dointvec,
-+      },
-+      {
-+              .ctl_name       = DEV_RAID_DISK_IDLE_SIZE,
-+              .procname       = "disk_idle_size",
-+              .data           = &sysctl_disk_idle_size,
-+              .maxlen         = sizeof(int),
-+              .mode           = S_IRUGO|S_IWUSR,
-+              .proc_handler   = &proc_dointvec,
-+      },
-       { .ctl_name = 0 }
- };
-@@ -5009,15 +5027,16 @@
- {
-       mdk_rdev_t * rdev;
-       int idle;
--      unsigned long curr_events;
-+      unsigned long rw, sync;
-       idle = 1;
-       rcu_read_lock();
-       rdev_for_each_rcu(rdev, mddev) {
-               struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
--              curr_events = disk_stat_read(disk, sectors[0]) + 
--                              disk_stat_read(disk, sectors[1]) - 
--                              atomic_read(&disk->sync_io);
-+
-+              rw = disk_stat_read(disk, sectors[READ])+disk_stat_read(disk, sectors[WRITE]);
-+              sync = atomic_read(&disk->sync_io);
-+
-               /* The difference between curr_events and last_events
-                * will be affected by any new non-sync IO (making
-                * curr_events bigger) and any difference in the amount of
-@@ -5031,9 +5050,9 @@
-                *
-                * Note: the following is an unsigned comparison.
-                */
--              if ((curr_events - rdev->last_events + 4096) > 8192) {
--                      rdev->last_events = curr_events;
-+              if (rw - rdev->last_events > sync + sysctl_disk_idle_size) {
-                       idle = 0;
-+                      rdev->last_events = rw - sync;
-               }
-       }
-       rcu_read_unlock();
-@@ -5100,8 +5119,7 @@
- void md_do_sync(mddev_t *mddev)
- {
-       mddev_t *mddev2;
--      unsigned int currspeed = 0,
--               window;
-+      unsigned int currspeed = 0;
-       sector_t max_sectors,j, io_sectors;
-       unsigned long mark[SYNC_MARKS];
-       sector_t mark_cnt[SYNC_MARKS];
-@@ -5221,9 +5239,8 @@
-       /*
-        * Tune reconstruction:
-        */
--      window = 32*(PAGE_SIZE/512);
-       printk(KERN_INFO "md: using %dk window, over a total of %llu blocks.\n",
--              window/2,(unsigned long long) max_sectors/2);
-+              sysctl_rebuild_window_size/2,(unsigned long long) max_sectors/2);
-       atomic_set(&mddev->recovery_active, 0);
-       init_waitqueue_head(&mddev->recovery_wait);
-@@ -5261,7 +5278,7 @@
-                        */
-                       md_new_event(mddev);
--              if (last_check + window > io_sectors || j == max_sectors)
-+              if (last_check + sysctl_rebuild_window_size > io_sectors || j == max_sectors)
-                       continue;
-               last_check = io_sectors;
-@@ -5282,7 +5299,6 @@
-                       last_mark = next;
-               }
--
-               if (kthread_should_stop()) {
-                       /*
-                        * got a signal, exit.
-@@ -5306,10 +5322,16 @@
-               currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
-                       /((jiffies-mddev->resync_mark)/HZ +1) +1;
--
-               if (currspeed > speed_min(mddev)) {
-                       if ((currspeed > speed_max(mddev)) ||
-                                       !is_mddev_idle(mddev)) {
-+                              static unsigned long next_report;
-+                              if (time_after(jiffies, next_report)) {
-+                                      printk(KERN_INFO "md: rebuild %s throttled due to IO\n",
-+                                              mdname(mddev));
-+                                      /* once per 10 minutes */
-+                                      next_report = jiffies + 600 * HZ;
-+                              }
-                               msleep(500);
-                               goto repeat;
-                       }
-Index: linux-2.6.18-128.1.6/include/linux/sysctl.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/sysctl.h   2009-04-14 21:05:41.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/sysctl.h        2009-06-02 23:25:31.000000000 -0600
-@@ -928,7 +928,9 @@
- /* /proc/sys/dev/raid */
- enum {
-       DEV_RAID_SPEED_LIMIT_MIN=1,
--      DEV_RAID_SPEED_LIMIT_MAX=2
-+      DEV_RAID_SPEED_LIMIT_MAX=2,
-+      DEV_RAID_REBUILD_WINDOW=3,
-+      DEV_RAID_DISK_IDLE_SIZE=4
- };
- /* /proc/sys/dev/parport/default */
diff --git a/lustre/kernel_patches/patches/mpt-fusion-max-sge.patch b/lustre/kernel_patches/patches/mpt-fusion-max-sge.patch
deleted file mode 100644 (file)
index 3fa6c48..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-diff -Nrup linux-2.6.18-92.1.10.orig/drivers/message/fusion/Kconfig linux-2.6.18-92.1.10/drivers/message/fusion/Kconfig
---- linux-2.6.18-92.1.10.orig/drivers/message/fusion/Kconfig   2008-12-11 10:27:02.000000000 +1100
-+++ linux-2.6.18-92.1.10/drivers/message/fusion/Kconfig        2008-12-11 10:28:42.000000000 +1100
-@@ -59,10 +59,10 @@ config FUSION_SAS
-         LSISAS1078
- config FUSION_MAX_SGE
--      int "Maximum number of scatter gather entries (16 - 128)"
-+      int "Maximum number of scatter gather entries (16 - 256)"
-       depends on FUSION
--      default "128"
--      range 16 128
-+      default "256"
-+      range 16 256
-       help
-         This option allows you to specify the maximum number of scatter-
-         gather entries per I/O. The driver default is 128, which matches
-diff -Nrup linux-2.6.18-92.1.10.orig/drivers/message/fusion/mptbase.h linux-2.6.18-92.1.10/drivers/message/fusion/mptbase.h
---- linux-2.6.18-92.1.10.orig/drivers/message/fusion/mptbase.h 2008-12-11 10:27:03.000000000 +1100
-+++ linux-2.6.18-92.1.10/drivers/message/fusion/mptbase.h      2008-12-11 10:30:55.000000000 +1100
-@@ -166,8 +166,8 @@
- #ifdef  CONFIG_FUSION_MAX_SGE
- #if     CONFIG_FUSION_MAX_SGE  < 16
- #define MPT_SCSI_SG_DEPTH     16
--#elif   CONFIG_FUSION_MAX_SGE  > 128
--#define MPT_SCSI_SG_DEPTH     128
-+#elif   CONFIG_FUSION_MAX_SGE  > 256 
-+#define MPT_SCSI_SG_DEPTH     256
- #else
- #define MPT_SCSI_SG_DEPTH     CONFIG_FUSION_MAX_SGE
- #endif
diff --git a/lustre/kernel_patches/patches/prune-icache-use-trylock-rhel5.patch b/lustre/kernel_patches/patches/prune-icache-use-trylock-rhel5.patch
deleted file mode 100644 (file)
index beadec2..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
---- linux/fs/inode.c.orig      2009-01-24 03:28:57.000000000 +0800
-+++ linux/fs/inode.c   2009-01-24 03:30:18.000000000 +0800
-@@ -418,7 +418,9 @@ static void prune_icache(int nr_to_scan)
-       int nr_scanned;
-       unsigned long reap = 0;
--      mutex_lock(&iprune_mutex);
-+      if (!mutex_trylock(&iprune_mutex))
-+              return;
-+
-       spin_lock(&inode_lock);
-       for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
-               struct inode *inode;
diff --git a/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch b/lustre/kernel_patches/patches/quota-large-limits-rhel5.patch
deleted file mode 100644 (file)
index e53d871..0000000
+++ /dev/null
@@ -1,622 +0,0 @@
-Index: linux-2.6.18-128.1.6/fs/dquot.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/dquot.c       2009-04-14 21:04:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/dquot.c    2009-06-02 23:26:36.000000000 -0600
-@@ -1592,10 +1592,19 @@
- }
- /* Generic routine for setting common part of quota structure */
--static void do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
-+static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
- {
-       struct mem_dqblk *dm = &dquot->dq_dqb;
-       int check_blim = 0, check_ilim = 0;
-+      struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
-+
-+      if ((di->dqb_valid & QIF_BLIMITS &&
-+           (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
-+            di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
-+          (di->dqb_valid & QIF_ILIMITS &&
-+           (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
-+            di->dqb_isoftlimit > dqi->dqi_maxilimit)))
-+              return -ERANGE;
-       spin_lock(&dq_data_lock);
-       if (di->dqb_valid & QIF_SPACE) {
-@@ -1627,7 +1636,7 @@
-                       clear_bit(DQ_BLKS_B, &dquot->dq_flags);
-               }
-               else if (!(di->dqb_valid & QIF_BTIME))  /* Set grace only if user hasn't provided his own... */
--                      dm->dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
-+                      dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
-       }
-       if (check_ilim) {
-               if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
-@@ -1635,7 +1644,7 @@
-                       clear_bit(DQ_INODES_B, &dquot->dq_flags);
-               }
-               else if (!(di->dqb_valid & QIF_ITIME))  /* Set grace only if user hasn't provided his own... */
--                      dm->dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
-+                      dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
-       }
-       if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
-               clear_bit(DQ_FAKE_B, &dquot->dq_flags);
-@@ -1643,21 +1652,24 @@
-               set_bit(DQ_FAKE_B, &dquot->dq_flags);
-       spin_unlock(&dq_data_lock);
-       mark_dquot_dirty(dquot);
-+
-+      return 0;
- }
- int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
- {
-       struct dquot *dquot;
-+      int rc;
-       mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-       if (!(dquot = dqget(sb, id, type))) {
-               mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
-               return -ESRCH;
-       }
--      do_set_dqblk(dquot, di);
-+      rc = do_set_dqblk(dquot, di);
-       dqput(dquot);
-       mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
--      return 0;
-+      return rc;
- }
- /* Generic routine for getting common part of quota file information */
-Index: linux-2.6.18-128.1.6/fs/quota_v1.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/quota_v1.c    2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/quota_v1.c 2009-06-02 23:26:36.000000000 -0600
-@@ -139,6 +139,9 @@
-               goto out;
-       }
-       ret = 0;
-+      /* limits are stored as unsigned 32-bit data */
-+      dqopt->info[type].dqi_maxblimit = 0xffffffff;
-+      dqopt->info[type].dqi_maxilimit = 0xffffffff;
-       dqopt->info[type].dqi_igrace = dqblk.dqb_itime ? dqblk.dqb_itime : MAX_IQ_TIME;
-       dqopt->info[type].dqi_bgrace = dqblk.dqb_btime ? dqblk.dqb_btime : MAX_DQ_TIME;
- out:
-Index: linux-2.6.18-128.1.6/fs/quota_v2.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/quota_v2.c    2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/quota_v2.c 2009-06-02 23:26:36.000000000 -0600
-@@ -23,26 +23,64 @@
- typedef char *dqbuf_t;
- #define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
--#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
-+#define GETENTRIES(buf) ((union v2_disk_dqblk *)(((char *)buf) + \
-+                       sizeof(struct v2_disk_dqdbheader)))
-+#define REV_ASSERT(r) BUG_ON((rev) != 0 && (rev) != 1)
-+
-+static const union v2_disk_dqblk emptydquot;
-+static const union v2_disk_dqblk fakedquot[2] = {
-+      {.r0 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} },
-+      {.r1 = {.dqb_itime = __constant_cpu_to_le64(1LLU)} }
-+};
--/* Check whether given file is really vfsv0 quotafile */
--static int v2_check_quota_file(struct super_block *sb, int type)
-+static inline uint v2_dqblksz(uint rev)
-+{
-+      uint sz;
-+
-+      REV_ASSERT(rev);
-+
-+      if (rev == 0)
-+              sz = sizeof(struct v2_disk_dqblk_r0);
-+      else
-+              sz = sizeof(struct v2_disk_dqblk_r1);
-+
-+      return sz;
-+}
-+
-+/* Number of quota entries in a block */
-+static inline int v2_dqstrinblk(uint rev)
-+{
-+      return (V2_DQBLKSIZE-sizeof(struct v2_disk_dqdbheader))/v2_dqblksz(rev);
-+}
-+
-+/* Get revision of a quota file, -1 if it does not look a quota file */
-+static int v2_quota_file_revision(struct super_block *sb, int type)
- {
-       struct v2_disk_dqheader dqhead;
-       ssize_t size;
-       static const uint quota_magics[] = V2_INITQMAGICS;
--      static const uint quota_versions[] = V2_INITQVERSIONS;
-+      static const uint quota_versions_r0[] = V2_INITQVERSIONS_R0;
-+      static const uint quota_versions_r1[] = V2_INITQVERSIONS_R1;
-  
-       size = sb->s_op->quota_read(sb, type, (char *)&dqhead, sizeof(struct v2_disk_dqheader), 0);
-       if (size != sizeof(struct v2_disk_dqheader)) {
-               printk("quota_v2: failed read expected=%zd got=%zd\n",
-                       sizeof(struct v2_disk_dqheader), size);
--              return 0;
-+              return -1;
-       }
--      if (le32_to_cpu(dqhead.dqh_magic) != quota_magics[type] ||
--          le32_to_cpu(dqhead.dqh_version) != quota_versions[type])
--              return 0;
--      return 1;
-+      if (le32_to_cpu(dqhead.dqh_magic) == quota_magics[type]) {
-+              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r0[type])
-+                      return 0;
-+              if (le32_to_cpu(dqhead.dqh_version) == quota_versions_r1[type])
-+                      return 1;
-+      }
-+      return -1;
-+}
-+
-+/* Check whether given file is really vfsv0 quotafile */
-+static inline int v2_check_quota_file(struct super_block *sb, int type)
-+{
-+      return v2_quota_file_revision(sb, type) != -1;
- }
- /* Read information header from quota file */
-@@ -51,6 +89,13 @@
-       struct v2_disk_dqinfo dinfo;
-       struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
-       ssize_t size;
-+      int rev;
-+
-+      rev = v2_quota_file_revision(sb, type);
-+      if (rev < 0) {
-+              printk(KERN_WARNING "Second quota file check failed.\n");
-+              return -1;
-+      }
-       size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
-              sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
-@@ -65,6 +110,16 @@
-       info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
-       info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
-       info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
-+
-+      info->u.v2_i.dqi_revision = rev;
-+      if (rev == 0) {
-+              info->dqi_maxblimit = 0xffffffffULL;
-+              info->dqi_maxilimit = 0xffffffffULL;
-+      } else {
-+              info->dqi_maxblimit = 0xffffffffffffffffULL;
-+              info->dqi_maxilimit = 0xffffffffffffffffULL;
-+      }
-+
-       return 0;
- }
-@@ -94,29 +149,61 @@
-       return 0;
- }
--static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
-+static void disk2memdqb(struct mem_dqblk *m, union v2_disk_dqblk *d, uint rev)
- {
--      m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
--      m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
--      m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
--      m->dqb_itime = le64_to_cpu(d->dqb_itime);
--      m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
--      m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
--      m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
--      m->dqb_btime = le64_to_cpu(d->dqb_btime);
--}
--
--static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
--{
--      d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
--      d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
--      d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
--      d->dqb_itime = cpu_to_le64(m->dqb_itime);
--      d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
--      d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
--      d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
--      d->dqb_btime = cpu_to_le64(m->dqb_btime);
--      d->dqb_id = cpu_to_le32(id);
-+      REV_ASSERT(rev);
-+
-+      if (rev == 0) {
-+              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
-+              m->dqb_ihardlimit = le32_to_cpu(ddqblk->dqb_ihardlimit);
-+              m->dqb_isoftlimit = le32_to_cpu(ddqblk->dqb_isoftlimit);
-+              m->dqb_curinodes = le32_to_cpu(ddqblk->dqb_curinodes);
-+              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
-+              m->dqb_bhardlimit = le32_to_cpu(ddqblk->dqb_bhardlimit);
-+              m->dqb_bsoftlimit = le32_to_cpu(ddqblk->dqb_bsoftlimit);
-+              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
-+              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
-+      } else {
-+              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
-+              m->dqb_ihardlimit = le64_to_cpu(ddqblk->dqb_ihardlimit);
-+              m->dqb_isoftlimit = le64_to_cpu(ddqblk->dqb_isoftlimit);
-+              m->dqb_curinodes = le64_to_cpu(ddqblk->dqb_curinodes);
-+              m->dqb_itime = le64_to_cpu(ddqblk->dqb_itime);
-+              m->dqb_bhardlimit = le64_to_cpu(ddqblk->dqb_bhardlimit);
-+              m->dqb_bsoftlimit = le64_to_cpu(ddqblk->dqb_bsoftlimit);
-+              m->dqb_curspace = le64_to_cpu(ddqblk->dqb_curspace);
-+              m->dqb_btime = le64_to_cpu(ddqblk->dqb_btime);
-+      }
-+}
-+
-+static void mem2diskdqb(union v2_disk_dqblk *d, struct mem_dqblk *m,
-+                      qid_t id, uint rev)
-+{
-+      REV_ASSERT(rev);
-+
-+      if (rev == 0) {
-+              struct v2_disk_dqblk_r0 *ddqblk = &d->r0;
-+              ddqblk->dqb_id = cpu_to_le32(id);
-+              ddqblk->dqb_ihardlimit = cpu_to_le32((__u32)m->dqb_ihardlimit);
-+              ddqblk->dqb_isoftlimit = cpu_to_le32((__u32)m->dqb_isoftlimit);
-+              ddqblk->dqb_curinodes = cpu_to_le32((__u32)m->dqb_curinodes);
-+              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
-+              ddqblk->dqb_bhardlimit = cpu_to_le32((__u32)m->dqb_bhardlimit);
-+              ddqblk->dqb_bsoftlimit = cpu_to_le32((__u32)m->dqb_bsoftlimit);
-+              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-+              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
-+      } else {
-+              struct v2_disk_dqblk_r1 *ddqblk = &d->r1;
-+              ddqblk->dqb_id = cpu_to_le32(id);
-+              ddqblk->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
-+              ddqblk->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
-+              ddqblk->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
-+              ddqblk->dqb_itime = cpu_to_le64(m->dqb_itime);
-+              ddqblk->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
-+              ddqblk->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
-+              ddqblk->dqb_curspace = cpu_to_le64(m->dqb_curspace);
-+              ddqblk->dqb_btime = cpu_to_le64(ddqblk->dqb_btime);
-+      }
- }
- static dqbuf_t getdqbuf(void)
-@@ -268,10 +355,10 @@
- {
-       struct super_block *sb = dquot->dq_sb;
-       struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
--      uint blk, i;
-+      uint blk, i, rev = info->u.v2_i.dqi_revision;
-+      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
-       struct v2_disk_dqdbheader *dh;
--      struct v2_disk_dqblk *ddquot;
--      struct v2_disk_dqblk fakedquot;
-+      union v2_disk_dqblk *ddquot;
-       dqbuf_t buf;
-       *err = 0;
-@@ -298,17 +385,18 @@
-               info->u.v2_i.dqi_free_entry = blk;
-               mark_info_dirty(sb, dquot->dq_type);
-       }
--      if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK)   /* Block will be full? */
-+      /* Block will be full? */
-+      if (le16_to_cpu(dh->dqdh_entries)+1 >= dqstrinblk)
-               if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
-                       printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
-                       goto out_buf;
-               }
-       dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
--      memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
-       /* Find free structure in block */
--      for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
-+      for (i = 0; i < dqstrinblk && memcmp(&emptydquot, ddquot, dqblksz);
-+           i++, ddquot = (char *)ddquot + dqblksz);
- #ifdef __QUOTA_V2_PARANOIA
--      if (i == V2_DQSTRINBLK) {
-+      if (i == dqstrinblk) {
-               printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
-               *err = -EIO;
-               goto out_buf;
-@@ -318,7 +406,8 @@
-               printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
-               goto out_buf;
-       }
--      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
-+      dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+
-+                      ((char *)ddquot - (char *)buf);
-       freedqbuf(buf);
-       return blk;
- out_buf:
-@@ -392,7 +481,9 @@
- {
-       int type = dquot->dq_type;
-       ssize_t ret;
--      struct v2_disk_dqblk ddquot, empty;
-+      union v2_disk_dqblk ddquot;
-+      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
-+      uint dqblksz = v2_dqblksz(rev);
-       /* dq_off is guarded by dqio_mutex */
-       if (!dquot->dq_off)
-@@ -401,18 +492,22 @@
-                       return ret;
-               }
-       spin_lock(&dq_data_lock);
--      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
-+      mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id, rev);
-       /* Argh... We may need to write structure full of zeroes but that would be
-        * treated as an empty place by the rest of the code. Format change would
-        * be definitely cleaner but the problems probably are not worth it */
--      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
--      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
--              ddquot.dqb_itime = cpu_to_le64(1);
-+      if (!memcmp(&emptydquot, &ddquot, dqblksz)) {
-+              if (rev == 0)
-+                      ddquot.r0.dqb_itime = cpu_to_le64(1);
-+              else
-+                      ddquot.r1.dqb_itime = cpu_to_le64(1);
-+      }
-       spin_unlock(&dq_data_lock);
-       ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
--            (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
--      if (ret != sizeof(struct v2_disk_dqblk)) {
--              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
-+            (char *)&ddquot, dqblksz, dquot->dq_off);
-+      if (ret != dqblksz) {
-+              printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
-+                      dquot->dq_sb->s_id);
-               if (ret >= 0)
-                       ret = -ENOSPC;
-       }
-@@ -431,6 +526,7 @@
-       struct v2_disk_dqdbheader *dh;
-       dqbuf_t buf = getdqbuf();
-       int ret = 0;
-+      uint rev = sb_dqopt(sb)->info[type].u.v2_i.dqi_revision;
-       if (!buf)
-               return -ENOMEM;
-@@ -456,8 +552,8 @@
-       }
-       else {
-               memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
--                sizeof(struct v2_disk_dqblk));
--              if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
-+                v2_dqblksz(rev));
-+              if (le16_to_cpu(dh->dqdh_entries) == v2_dqstrinblk(rev)-1) {
-                       /* Insert will write block itself */
-                       if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
-                               printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
-@@ -529,41 +625,56 @@
-       return remove_tree(dquot, &tmp, 0);
- }
-+static inline __u32 dqid(union v2_disk_dqblk *ddquot, uint rev)
-+{
-+      __u32 dq_id;
-+
-+      REV_ASSERT(rev);
-+
-+      if (rev == 0)
-+              dq_id = le32_to_cpu(ddquot->r0.dqb_id);
-+      else
-+              dq_id = le32_to_cpu(ddquot->r1.dqb_id);
-+
-+      return dq_id;
-+}
-+
- /* Find entry in block */
- static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
- {
-       dqbuf_t buf = getdqbuf();
-       loff_t ret = 0;
-       int i;
--      struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
-+      union v2_disk_dqblk *ddquot = GETENTRIES(buf);
-+      int type = dquot->dq_type;
-+      uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.dqi_revision;
-+      uint dqblksz = v2_dqblksz(rev), dqstrinblk = v2_dqstrinblk(rev);
-       if (!buf)
-               return -ENOMEM;
--      if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
-+
-+      ret = read_blk(dquot->dq_sb, type, blk, buf);
-+      if (ret < 0) {
-               printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
-               goto out_buf;
-       }
-       if (dquot->dq_id)
--              for (i = 0; i < V2_DQSTRINBLK &&
--                   le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
-+              for (i = 0; i < dqstrinblk && dqid(ddquot, rev) != dquot->dq_id;
-+                   i++, ddquot = (char *)ddquot + dqblksz);
-       else {  /* ID 0 as a bit more complicated searching... */
--              struct v2_disk_dqblk fakedquot;
--
--              memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
--              for (i = 0; i < V2_DQSTRINBLK; i++)
--                      if (!le32_to_cpu(ddquot[i].dqb_id) &&
--                          memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
-+              for (i = 0; i < dqstrinblk; i++, ddquot = (char *)ddquot+dqblksz)
-+                      if (!dqid(ddquot, rev) &&
-+                          memcmp(&emptydquot, ddquot, dqblksz))
-                               break;
-       }
--      if (i == V2_DQSTRINBLK) {
-+      if (i == dqstrinblk) {
-               printk(KERN_ERR "VFS: Quota for id %u referenced "
-                 "but not present.\n", dquot->dq_id);
-               ret = -EIO;
-               goto out_buf;
-       }
-       else
--              ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
--                v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
-+              ret = (blk << V2_DQBLKSIZE_BITS)+((char *)ddquot-(char *)buf);
- out_buf:
-       freedqbuf(buf);
-       return ret;
-@@ -605,7 +716,7 @@
- {
-       int type = dquot->dq_type;
-       loff_t offset;
--      struct v2_disk_dqblk ddquot, empty;
-+      union v2_disk_dqblk ddquot;
-       int ret = 0;
- #ifdef __QUOTA_V2_PARANOIA
-@@ -626,25 +737,30 @@
-               ret = offset;
-       }
-       else {
-+              uint rev = sb_dqopt(dquot->dq_sb)->info[type].u.v2_i.
-+                         dqi_revision;
-+              uint  dqblksz = v2_dqblksz(rev);
-               dquot->dq_off = offset;
--              if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
--                  (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
--                  != sizeof(struct v2_disk_dqblk)) {
-+              ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
-+                                         (char *)&ddquot, dqblksz, offset);
-+              if (ret != dqblksz) {
-                       if (ret >= 0)
-                               ret = -EIO;
-                       printk(KERN_ERR "VFS: Error while reading quota "
-                         "structure for id %u.\n", dquot->dq_id);
--                      memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
-+                      memset(&ddquot, 0, dqblksz);
-               }
-               else {
-                       ret = 0;
-                       /* We need to escape back all-zero structure */
--                      memset(&empty, 0, sizeof(struct v2_disk_dqblk));
--                      empty.dqb_itime = cpu_to_le64(1);
--                      if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
--                              ddquot.dqb_itime = 0;
-+                      if (!memcmp(&fakedquot[rev], &ddquot, dqblksz)) {
-+                              if (rev == 0)
-+                                      ddquot.r0.dqb_itime = cpu_to_le64(0);
-+                              else
-+                                      ddquot.r1.dqb_itime = cpu_to_le64(0);
-+                      }
-               }
--              disk2memdqb(&dquot->dq_dqb, &ddquot);
-+              disk2memdqb(&dquot->dq_dqb, &ddquot, rev);
-               if (!dquot->dq_dqb.dqb_bhardlimit &&
-                       !dquot->dq_dqb.dqb_bsoftlimit &&
-                       !dquot->dq_dqb.dqb_ihardlimit &&
-Index: linux-2.6.18-128.1.6/include/linux/dqblk_v2.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/dqblk_v2.h 2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/dqblk_v2.h      2009-06-02 23:26:36.000000000 -0600
-@@ -21,6 +21,7 @@
-       unsigned int dqi_blocks;
-       unsigned int dqi_free_blk;
-       unsigned int dqi_free_entry;
-+      unsigned int dqi_revision;
- };
- #endif /* _LINUX_DQBLK_V2_H */
-Index: linux-2.6.18-128.1.6/include/linux/quota.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/quota.h    2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/quota.h 2009-06-02 23:26:36.000000000 -0600
-@@ -149,12 +149,12 @@
-  * Data for one user/group kept in memory
-  */
- struct mem_dqblk {
--      __u32 dqb_bhardlimit;   /* absolute limit on disk blks alloc */
--      __u32 dqb_bsoftlimit;   /* preferred limit on disk blks */
-+      qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
-+      qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
-       qsize_t dqb_curspace;   /* current used space */
--      __u32 dqb_ihardlimit;   /* absolute limit on allocated inodes */
--      __u32 dqb_isoftlimit;   /* preferred inode limit */
--      __u32 dqb_curinodes;    /* current # allocated inodes */
-+      qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
-+      qsize_t dqb_isoftlimit; /* preferred inode limit */
-+      qsize_t dqb_curinodes;  /* current # allocated inodes */
-       time_t dqb_btime;       /* time limit for excessive disk use */
-       time_t dqb_itime;       /* time limit for excessive inode use */
- };
-@@ -170,6 +170,8 @@
-       unsigned long dqi_flags;
-       unsigned int dqi_bgrace;
-       unsigned int dqi_igrace;
-+      qsize_t dqi_maxblimit;
-+      qsize_t dqi_maxilimit;
-       union {
-               struct v1_mem_dqinfo v1_i;
-               struct v2_mem_dqinfo v2_i;
-Index: linux-2.6.18-128.1.6/include/linux/quotaio_v2.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/quotaio_v2.h       2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/quotaio_v2.h    2009-06-02 23:26:36.000000000 -0600
-@@ -16,28 +16,51 @@
-       0xd9c01927      /* GRPQUOTA */\
- }
--#define V2_INITQVERSIONS {\
-+#define V2_INITQVERSIONS_R0 {\
-       0,              /* USRQUOTA */\
-       0               /* GRPQUOTA */\
- }
-+#define V2_INITQVERSIONS_R1 {\
-+      1,              /* USRQUOTA */\
-+      1               /* GRPQUOTA */\
-+}
-+
- /*
-  * The following structure defines the format of the disk quota file
-  * (as it appears on disk) - the file is a radix tree whose leaves point
-  * to blocks of these structures.
-  */
--struct v2_disk_dqblk {
-+struct v2_disk_dqblk_r0 {
-       __le32 dqb_id;          /* id this quota applies to */
-       __le32 dqb_ihardlimit;  /* absolute limit on allocated inodes */
-       __le32 dqb_isoftlimit;  /* preferred inode limit */
-       __le32 dqb_curinodes;   /* current # allocated inodes */
--      __le32 dqb_bhardlimit;  /* absolute limit on disk space (in QUOTABLOCK_SIZE) */
--      __le32 dqb_bsoftlimit;  /* preferred limit on disk space (in QUOTABLOCK_SIZE) */
-+      __le32 dqb_bhardlimit;  /* absolute limit on disk space */
-+      __le32 dqb_bsoftlimit;  /* preferred limit on disk space */
-+      __le64 dqb_curspace;    /* current space occupied (in bytes) */
-+      __le64 dqb_btime;       /* time limit for excessive disk use */
-+      __le64 dqb_itime;       /* time limit for excessive inode use */
-+};
-+
-+struct v2_disk_dqblk_r1 {
-+      __le32 dqb_id;          /* id this quota applies to */
-+      __le32 dqb_padding;     /* padding field */
-+      __le64 dqb_ihardlimit;  /* absolute limit on allocated inodes */
-+      __le64 dqb_isoftlimit;  /* preferred inode limit */
-+      __le64 dqb_curinodes;   /* current # allocated inodes */
-+      __le64 dqb_bhardlimit;  /* absolute limit on disk space */
-+      __le64 dqb_bsoftlimit;  /* preferred limit on disk space */
-       __le64 dqb_curspace;    /* current space occupied (in bytes) */
-       __le64 dqb_btime;       /* time limit for excessive disk use */
-       __le64 dqb_itime;       /* time limit for excessive inode use */
- };
-+union v2_disk_dqblk {
-+      struct v2_disk_dqblk_r0 r0;
-+      struct v2_disk_dqblk_r1 r1;
-+};
-+
- /*
-  * Here are header structures as written on disk and their in-memory copies
-  */
-@@ -59,7 +82,7 @@
- /*
-  *  Structure of header of block with quota structures. It is padded to 16 bytes so
-- *  there will be space for exactly 21 quota-entries in a block
-+ *  there will be space for exactly 21 (r0) or 14 (r1) quota-entries in a block
-  */
- struct v2_disk_dqdbheader {
-       __le32 dqdh_next_free;  /* Number of next block with free entry */
-@@ -74,6 +97,5 @@
- #define V2_DQBLKSIZE  (1 << V2_DQBLKSIZE_BITS)        /* Size of block with quota structures */
- #define V2_DQTREEOFF  1               /* Offset of tree in file in blocks */
- #define V2_DQTREEDEPTH        4               /* Depth of quota tree */
--#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk))     /* Number of entries in one blocks */
- #endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/lustre/kernel_patches/patches/raid5-configurable-cachesize-rhel5.patch b/lustre/kernel_patches/patches/raid5-configurable-cachesize-rhel5.patch
deleted file mode 100644 (file)
index be8f6c2..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-06 17:23:39.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-06 17:24:14.000000000 +0800
-@@ -57,7 +57,7 @@
-  * Stripe cache
-  */
--#define NR_STRIPES            256
-+static int raid5_nr_stripes = 256 * 8;
- #define STRIPE_SIZE           PAGE_SIZE
- #define STRIPE_SHIFT          (PAGE_SHIFT - 9)
- #define STRIPE_SECTORS                (STRIPE_SIZE>>9)
-@@ -3230,7 +3230,7 @@ static int run(mddev_t *mddev)
-       else
-               conf->max_degraded = 1;
-       conf->algorithm = mddev->layout;
--      conf->max_nr_stripes = NR_STRIPES;
-+      conf->max_nr_stripes = raid5_nr_stripes;
-       conf->expand_progress = mddev->reshape_position;
-       /* device size must be a multiple of chunk size */
-@@ -3821,6 +3821,7 @@ static void raid5_exit(void)
- module_init(raid5_init);
- module_exit(raid5_exit);
-+module_param(raid5_nr_stripes, int, 0644);
- MODULE_LICENSE("GPL");
- MODULE_ALIAS("md-personality-4"); /* RAID5 */
- MODULE_ALIAS("md-raid5");
-Only in linux-2.6.18-53/drivers/md: raid5.c.orig
-Only in linux-2.6.18-53.orig/include/linux/raid: .raid5.h.swp
diff --git a/lustre/kernel_patches/patches/raid5-large-io-rhel5.patch b/lustre/kernel_patches/patches/raid5-large-io-rhel5.patch
deleted file mode 100644 (file)
index 6a712a9..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-06 17:26:27.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-06 17:26:55.000000000 +0800
-@@ -3340,6 +3340,11 @@ static int run(mddev_t *mddev)
-       mddev->array_size =  mddev->size * (conf->previous_raid_disks -
-                                           conf->max_degraded);
-+      /* in order to support large I/Os */
-+      blk_queue_max_sectors(mddev->queue, conf->chunk_size * conf->previous_raid_disks >> 9);
-+      mddev->queue->max_phys_segments = conf->chunk_size * (conf->previous_raid_disks - conf->max_degraded) >> PAGE_SHIFT;
-+      mddev->queue->max_hw_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;;
-+
-       return 0;
- abort:
-       if (conf) {
diff --git a/lustre/kernel_patches/patches/raid5-maxsectors-rhel5.patch b/lustre/kernel_patches/patches/raid5-maxsectors-rhel5.patch
deleted file mode 100644 (file)
index 090d703..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-diff -ru linux-orig/drivers/md/raid5.c linux-new/drivers/md/raid5.c
---- linux-orig/drivers/md/raid5.c      2009-04-14 08:11:38.000000000 +1000
-+++ linux-new/drivers/md/raid5.c       2009-09-20 05:02:02.000000000 +1000
-@@ -3595,10 +3595,16 @@
-       mddev->array_size =  mddev->size * (conf->previous_raid_disks -
-                                           conf->max_degraded);
-+      int stripe_size = conf->chunk_size * (conf->previous_raid_disks - conf->max_degraded);
-+
-       /* in order to support large I/Os */
--      blk_queue_max_sectors(mddev->queue, conf->chunk_size * conf->previous_raid_disks >> 9);
--      mddev->queue->max_phys_segments = conf->chunk_size * (conf->previous_raid_disks - conf->max_degraded) >> PAGE_SHIFT;
--      mddev->queue->max_hw_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;;
-+      blk_queue_max_sectors(mddev->queue, stripe_size >> 9);
-+      /* KTVM: set default max_sectors the same as the max_hw_sectors set above */
-+      mddev->queue->max_sectors = mddev->queue->max_hw_sectors;
-+      printk("%s: setting max_sectors = %d, max_hw_sectors = %d\n", mdname(mddev), mddev->queue->max_sectors, mddev->queue->max_hw_sectors);
-+
-+      mddev->queue->max_phys_segments = stripe_size >> PAGE_SHIFT;
-+      mddev->queue->max_hw_segments = stripe_size >> PAGE_SHIFT;;
-       /* raid5 device is able to do zcopy right now. */
-       mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONSTANT_WRITE;
diff --git a/lustre/kernel_patches/patches/raid5-merge-ios-rhel5.patch b/lustre/kernel_patches/patches/raid5-merge-ios-rhel5.patch
deleted file mode 100644 (file)
index 52da835..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-diff -pur b/drivers/md/raid5.c a/drivers/md/raid5.c
---- b/drivers/md/raid5.c       2009-02-20 15:56:36.000000000 +0800
-+++ a/drivers/md/raid5.c       2009-02-20 15:57:49.000000000 +0800
-@@ -1277,7 +1277,26 @@ static void compute_block_2(struct strip
-       }
- }
-+/*
-+ * The whole idea is to collect all bio's and then issue them
-+ * disk by disk to assist merging a bit -bzzz
-+ */
-+static void raid5_flush_bios(raid5_conf_t *conf, struct bio *bios[], int raid_disks)
-+{
-+      struct bio *bio, *nbio;
-+      int i;
-+      for (i = 0; i < raid_disks; i++) {
-+              bio = bios[i];
-+              while (bio) {
-+                      nbio = bio->bi_next;
-+                      bio->bi_next = NULL;
-+                      generic_make_request(bio);
-+                      bio = nbio;
-+              }
-+              bios[i] = NULL;
-+      }
-+}
- /*
-  * Each stripe/dev can have one or more bion attached.
-@@ -1392,7 +1411,7 @@ static int stripe_to_pdidx(sector_t stri
-  *
-  */
-  
--static void handle_stripe5(struct stripe_head *sh)
-+static void handle_stripe5(struct stripe_head *sh, struct bio *bios[])
- {
-       raid5_conf_t *conf = sh->raid_conf;
-       int disks = sh->disks;
-@@ -1939,7 +1958,11 @@ static void handle_stripe5(struct stripe
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
-                       atomic_inc(&conf->out_reqs_in_queue);
--                      generic_make_request(bi);
-+                      if (bios) {
-+                              bi->bi_next = bios[i];
-+                              bios[i] = bi;
-+                      } else
-+                              generic_make_request(bi);
-               } else {
-                       if (rw == 1)
-                               set_bit(STRIPE_DEGRADED, &sh->state);
-@@ -1951,7 +1974,7 @@ static void handle_stripe5(struct stripe
-       }
- }
--static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
-+static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page, struct bio *bios[])
- {
-       raid6_conf_t *conf = sh->raid_conf;
-       int disks = conf->raid_disks;
-@@ -2499,7 +2522,11 @@ static void handle_stripe6(struct stripe
-                       if (rw == WRITE &&
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
--                      generic_make_request(bi);
-+                      if (bios) {
-+                              bi->bi_next = bios[i];
-+                              bios[i] = bi;
-+                      } else
-+                              generic_make_request(bi);
-                       atomic_inc(&conf->out_reqs_in_queue);
-               } else {
-                       if (rw == 1)
-@@ -2512,12 +2539,12 @@ static void handle_stripe6(struct stripe
-       }
- }
--static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
-+static void handle_stripe(struct stripe_head *sh, struct page *tmp_page, struct bio *bios[])
- {
-       if (sh->raid_conf->level == 6)
--              handle_stripe6(sh, tmp_page);
-+              handle_stripe6(sh, tmp_page, bios);
-       else
--              handle_stripe5(sh);
-+              handle_stripe5(sh, bios);
- }
-@@ -2670,6 +2697,7 @@ static int make_request(request_queue_t 
-       int stripes_per_chunk, sectors_per_block;
-       int sectors_per_stripe;
-       int i, j;
-+      struct bio *bios[MD_SB_DISKS];
-       DEFINE_WAIT(w);
-       int disks, data_disks;
-@@ -2698,6 +2726,7 @@ static int make_request(request_queue_t 
-       sectors = bi->bi_size >> 9;
-       stripes_per_chunk = conf->chunk_size / STRIPE_SIZE;
-+      memset(&bios, 0, sizeof(bios));
- redo_bio:
-       /* stripe by stripe handle needs a stable raid layout, so if this
-        * reuqest covers the expanding region, wait it over. 
-@@ -2756,8 +2785,10 @@ retry:
-                                        * the raid layout has been changed, we have to redo the 
-                                        * whole bio because we don't which sectors in it has been
-                                        * done, and which is not done. -jay */
--                                      if (raid5_redo_bio(conf, bi, disks, logical_sector))
-+                                      if (raid5_redo_bio(conf, bi, disks, logical_sector)) {
-+                                              raid5_flush_bios(conf, bios, disks);
-                                               goto redo_bio;
-+                                      }
-                                       if (test_bit(STRIPE_EXPANDING, &sh->state)) {
-                                               /* Stripe is busy expanding or
-@@ -2766,6 +2797,7 @@ retry:
-                                                */
-                                               release_stripe(sh);
-                                               sh = NULL;
-+                                              raid5_flush_bios(conf, bios, disks);
-                                               raid5_unplug_device(mddev->queue);
-                                               schedule();
-                                               goto retry;
-@@ -2784,17 +2816,19 @@ retry:
-                        */
-                       if (r_sector >= mddev->suspend_lo &&
-                           r_sector < mddev->suspend_hi) {
--                              handle_stripe(sh, NULL);
-+                              handle_stripe(sh, NULL, NULL);
-                               release_stripe(sh);
-                               sh = NULL;
-+                              raid5_flush_bios(conf, bios, disks);
-                               schedule();
-                               goto retry;
-                       }
-                       if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
--                              handle_stripe(sh, NULL);
-+                              handle_stripe(sh, NULL, NULL);
-                               release_stripe(sh);
-                               sh = NULL;
-+                              raid5_flush_bios(conf, bios, disks);
-                               raid5_unplug_device(mddev->queue);
-                               schedule();
-                               goto retry;
-@@ -2810,7 +2844,7 @@ retry:
-                       r_sector += sectors_per_chunk;
-               }
-               if (sh) {
--                      handle_stripe(sh, NULL);
-+                      handle_stripe(sh, NULL, bios);
-                       release_stripe(sh);
-                       sh = NULL;
-               }
-@@ -2820,6 +2854,9 @@ retry:
-       if (sectors > 0)
-               goto repeat;
-+      /* flush all of the bios */
-+      raid5_flush_bios(conf, bios, disks);
-+
-       spin_lock_irq(&conf->device_lock);
-       remaining = --bi->bi_phys_segments;
-       spin_unlock_irq(&conf->device_lock);
-@@ -3035,7 +3072,7 @@ static inline sector_t sync_request(mdde
-       clear_bit(STRIPE_INSYNC, &sh->state);
-       spin_unlock(&sh->lock);
--      handle_stripe(sh, NULL);
-+      handle_stripe(sh, NULL, NULL);
-       release_stripe(sh);
-       return STRIPE_SECTORS;
-@@ -3091,7 +3128,7 @@ static void raid5d (mddev_t *mddev)
-               
-               handled++;
-               atomic_inc(&conf->handled_in_raid5d);
--              handle_stripe(sh, conf->spare_page);
-+              handle_stripe(sh, conf->spare_page, NULL);
-               release_stripe(sh);
-               cond_resched();
diff --git a/lustre/kernel_patches/patches/raid5-mmp-unplug-dev.patch b/lustre/kernel_patches/patches/raid5-mmp-unplug-dev.patch
deleted file mode 100644 (file)
index cc25153..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Index: linux-2.6.18-128.1.6/drivers/md/raid5.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/drivers/md/raid5.c       2009-06-02 23:24:55.000000000 -0600
-+++ linux-2.6.18-128.1.6/drivers/md/raid5.c    2009-06-02 23:27:21.000000000 -0600
-@@ -1456,6 +1456,8 @@
-               bi->bi_next = *bip;
-       *bip = bi;
-       bi->bi_phys_segments ++;
-+      if (bio_sync(bi) && !forwrite)
-+              clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-       spin_unlock_irq(&conf->device_lock);
-       spin_unlock(&sh->lock);
-@@ -3012,6 +3014,8 @@
-               bi->bi_size = 0;
-               bi->bi_end_io(bi, bytes, 0);
-       }
-+      if (bio_sync(bi))
-+              raid5_unplug_device(q);
-       return 0;
- }
diff --git a/lustre/kernel_patches/patches/raid5-rebuild-corrupt-bug.patch b/lustre/kernel_patches/patches/raid5-rebuild-corrupt-bug.patch
deleted file mode 100644 (file)
index c434498..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-While the stripe in-memory must be in-sync, the stripe on disk might not be
-because if we computed a block rather than reading it from an in-sync disk,
-the in-memory stripe can be different from the on-disk stripe.
-
-If this bug were still in mainline I would probably want a bigger patch which
-would leave this code but also set R5_LOCKED on all blocks that have been
-computed.  But as it is a stablisation patch, the above is simple and more
-clearly correct.
-
-Thanks for you patience - I look forward to your success/failure report.
-
-NeilBrown
-
-diff -up /drivers/md/raid5.c
-===========================================
---- a/drivers/md/raid5.c
-+++ b/drivers/md/raid5.c
-@@ -2466,8 +2466,6 @@
-                                       locked++;
-                                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
-                               }
--                      /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
--                      set_bit(STRIPE_INSYNC, &sh->state);
-
-                       if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                               atomic_dec(&conf->preread_active_stripes);
diff --git a/lustre/kernel_patches/patches/raid5-stats-rhel5.patch b/lustre/kernel_patches/patches/raid5-stats-rhel5.patch
deleted file mode 100644 (file)
index b119334..0000000
+++ /dev/null
@@ -1,256 +0,0 @@
-diff -pru linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-06 17:15:22.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-06 17:17:30.000000000 +0800
-@@ -115,10 +115,12 @@ static void __release_stripe(raid5_conf_
-                       if (test_bit(STRIPE_DELAYED, &sh->state)) {
-                               list_add_tail(&sh->lru, &conf->delayed_list);
-                               blk_plug_device(conf->mddev->queue);
-+                              atomic_inc(&conf->delayed);
-                       } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
-                                  sh->bm_seq - conf->seq_write > 0) {
-                               list_add_tail(&sh->lru, &conf->bitmap_list);
-                               blk_plug_device(conf->mddev->queue);
-+                              atomic_inc(&conf->bit_delayed);
-                       } else {
-                               clear_bit(STRIPE_BIT_DELAY, &sh->state);
-                               list_add_tail(&sh->lru, &conf->handle_list);
-@@ -289,6 +291,7 @@ static struct stripe_head *get_active_st
-                       if (noblock && sh == NULL)
-                               break;
-                       if (!sh) {
-+                              atomic_inc(&conf->out_of_stripes);
-                               conf->inactive_blocked = 1;
-                               wait_event_lock_irq(conf->wait_for_stripe,
-                                                   !list_empty(&conf->inactive_list) &&
-@@ -311,6 +314,10 @@ static struct stripe_head *get_active_st
-                                   !test_bit(STRIPE_EXPANDING, &sh->state))
-                                       BUG();
-                               list_del_init(&sh->lru);
-+                              if (test_bit(STRIPE_DELAYED, &sh->state))
-+                                      atomic_dec(&conf->delayed);
-+                              if (test_bit(STRIPE_BIT_DELAY, &sh->state))
-+                                      atomic_dec(&conf->bit_delayed);
-                       }
-               }
-       } while (sh == NULL);
-@@ -529,6 +536,8 @@ static int raid5_end_read_request(struct
-       if (bi->bi_size)
-               return 1;
-+      atomic_dec(&conf->out_reqs_in_queue);
-+
-       for (i=0 ; i<disks; i++)
-               if (bi == &sh->dev[i].req)
-                       break;
-@@ -642,6 +651,8 @@ static int raid5_end_write_request (stru
-       if (bi->bi_size)
-               return 1;
-+      atomic_dec(&conf->out_reqs_in_queue);
-+
-       for (i=0 ; i<disks; i++)
-               if (bi == &sh->dev[i].req)
-                       break;
-@@ -1402,6 +1413,8 @@ static void handle_stripe5(struct stripe
-       clear_bit(STRIPE_HANDLE, &sh->state);
-       clear_bit(STRIPE_DELAYED, &sh->state);
-+      atomic_inc(&conf->handle_called);
-+
-       syncing = test_bit(STRIPE_SYNCING, &sh->state);
-       expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-       expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
-@@ -1684,6 +1697,7 @@ static void handle_stripe5(struct stripe
-                                               set_bit(R5_LOCKED, &dev->flags);
-                                               set_bit(R5_Wantread, &dev->flags);
-                                               locked++;
-+                                              atomic_inc(&conf->reads_for_rmw);
-                                       } else {
-                                               set_bit(STRIPE_DELAYED, &sh->state);
-                                               set_bit(STRIPE_HANDLE, &sh->state);
-@@ -1703,6 +1717,7 @@ static void handle_stripe5(struct stripe
-                                               set_bit(R5_LOCKED, &dev->flags);
-                                               set_bit(R5_Wantread, &dev->flags);
-                                               locked++;
-+                                              atomic_inc(&conf->reads_for_rcw);
-                                       } else {
-                                               set_bit(STRIPE_DELAYED, &sh->state);
-                                               set_bit(STRIPE_HANDLE, &sh->state);
-@@ -1870,6 +1885,7 @@ static void handle_stripe5(struct stripe
-               bi->bi_end_io(bi, bytes,
-                             test_bit(BIO_UPTODATE, &bi->bi_flags)
-                               ? 0 : -EIO);
-+              atomic_dec(&conf->in_reqs_in_queue);
-       }
-       for (i=disks; i-- ;) {
-               int rw;
-@@ -1885,10 +1901,13 @@ static void handle_stripe5(struct stripe
-               bi = &sh->dev[i].req;
-  
-               bi->bi_rw = rw;
--              if (rw)
-+              if (rw) {
-+                      atomic_inc(&conf->writes_out);
-                       bi->bi_end_io = raid5_end_write_request;
--              else
-+              } else {
-+                      atomic_inc(&conf->reads_out);
-                       bi->bi_end_io = raid5_end_read_request;
-+              }
-  
-               rcu_read_lock();
-               rdev = rcu_dereference(conf->disks[i].rdev);
-@@ -1919,6 +1938,7 @@ static void handle_stripe5(struct stripe
-                       if (rw == WRITE &&
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
-+                      atomic_inc(&conf->out_reqs_in_queue);
-                       generic_make_request(bi);
-               } else {
-                       if (rw == 1)
-@@ -1955,6 +1975,8 @@ static void handle_stripe6(struct stripe
-       clear_bit(STRIPE_HANDLE, &sh->state);
-       clear_bit(STRIPE_DELAYED, &sh->state);
-+      atomic_inc(&conf->handle_called);
-+
-       syncing = test_bit(STRIPE_SYNCING, &sh->state);
-       /* Now to look around and see what can be done */
-@@ -2255,6 +2277,7 @@ static void handle_stripe6(struct stripe
-                                               set_bit(R5_LOCKED, &dev->flags);
-                                               set_bit(R5_Wantread, &dev->flags);
-                                               locked++;
-+                                              atomic_inc(&conf->reads_for_rcw);
-                                       } else {
-                                               PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
-                                                      (unsigned long long)sh->sector, i);
-@@ -2423,6 +2446,7 @@ static void handle_stripe6(struct stripe
-               bi->bi_end_io(bi, bytes,
-                             test_bit(BIO_UPTODATE, &bi->bi_flags)
-                               ? 0 : -EIO);
-+              atomic_dec(&conf->in_reqs_in_queue);
-       }
-       for (i=disks; i-- ;) {
-               int rw;
-@@ -2438,10 +2462,13 @@ static void handle_stripe6(struct stripe
-               bi = &sh->dev[i].req;
-               bi->bi_rw = rw;
--              if (rw)
-+              if (rw) {
-+                      atomic_inc(&conf->writes_out);
-                       bi->bi_end_io = raid5_end_write_request;
--              else
-+              } else {
-+                      atomic_inc(&conf->reads_out);
-                       bi->bi_end_io = raid5_end_read_request;
-+              }
-               rcu_read_lock();
-               rdev = rcu_dereference(conf->disks[i].rdev);
-@@ -2473,6 +2500,7 @@ static void handle_stripe6(struct stripe
-                           test_bit(R5_ReWrite, &sh->dev[i].flags))
-                               atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
-                       generic_make_request(bi);
-+                      atomic_inc(&conf->out_reqs_in_queue);
-               } else {
-                       if (rw == 1)
-                               set_bit(STRIPE_DEGRADED, &sh->state);
-@@ -2506,6 +2534,7 @@ static void raid5_activate_delayed(raid5
-                       if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
-                               atomic_inc(&conf->preread_active_stripes);
-                       list_add_tail(&sh->lru, &conf->handle_list);
-+                      atomic_dec(&conf->delayed);
-               }
-       }
- }
-@@ -2608,6 +2637,8 @@ static int make_request(request_queue_t 
-       const int rw = bio_data_dir(bi);
-       int remaining;
-+      atomic_inc(&conf->in_reqs_in_queue);
-+
-       if (unlikely(bio_barrier(bi))) {
-               bio_endio(bi, bi->bi_size, -EOPNOTSUPP);
-               return 0;
-@@ -2617,6 +2648,11 @@ static int make_request(request_queue_t 
-       disk_stat_inc(mddev->gendisk, ios[rw]);
-       disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
-+      if (rw == WRITE)
-+              atomic_inc(&conf->writes_in);
-+      else
-+              atomic_inc(&conf->reads_in);
-+
-       logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-       last_sector = bi->bi_sector + (bi->bi_size>>9);
-@@ -2724,6 +2760,7 @@ static int make_request(request_queue_t 
-               if ( rw == WRITE )
-                       md_write_end(mddev);
-+              atomic_dec(&conf->in_reqs_in_queue);
-               bi->bi_size = 0;
-               bi->bi_end_io(bi, bytes, 0);
-       }
-@@ -2985,6 +3022,7 @@ static void raid5d (mddev_t *mddev)
-               spin_unlock_irq(&conf->device_lock);
-               
-               handled++;
-+              atomic_inc(&conf->handled_in_raid5d);
-               handle_stripe(sh, conf->spare_page);
-               release_stripe(sh);
-@@ -3381,6 +3419,21 @@ static void status (struct seq_file *seq
-                              conf->disks[i].rdev &&
-                              test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
-       seq_printf (seq, "]");
-+      seq_printf (seq, "\n\t\tin: %u reads, %u writes; out: %u reads, %u writes",
-+                      atomic_read(&conf->reads_in), atomic_read(&conf->writes_in),
-+                      atomic_read(&conf->reads_out), atomic_read(&conf->writes_out));
-+      seq_printf (seq, "\n\t\t%u in raid5d, %u out of stripes, %u handle called",
-+                      atomic_read(&conf->handled_in_raid5d),
-+                      atomic_read(&conf->out_of_stripes),
-+                      atomic_read(&conf->handle_called));
-+      seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
-+                      atomic_read(&conf->reads_for_rmw),
-+                      atomic_read(&conf->reads_for_rcw));
-+      seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n",
-+                      atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed),
-+                      atomic_read(&conf->active_stripes),
-+                      atomic_read(&conf->in_reqs_in_queue),
-+                      atomic_read(&conf->out_reqs_in_queue));
- #if RAID5_DEBUG
-       seq_printf (seq, "\n");
-       printall(seq, conf);
-diff -pru linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h
---- linux-2.6.18-53.orig/include/linux/raid/raid5.h    2007-12-06 17:15:22.000000000 +0800
-+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-06 17:15:32.000000000 +0800
-@@ -259,6 +259,25 @@ struct raid5_private_data {
-       int                     pool_size; /* number of disks in stripeheads in pool */
-       spinlock_t              device_lock;
-       struct disk_info        *disks;
-+
-+      /*
-+       * Stats
-+       */
-+      atomic_t                reads_in;
-+      atomic_t                writes_in;
-+      atomic_t                reads_out;
-+      atomic_t                writes_out;
-+      atomic_t                handled_in_raid5d;
-+      atomic_t                out_of_stripes;
-+      atomic_t                reads_for_rmw;
-+      atomic_t                reads_for_rcw;
-+      atomic_t                writes_zcopy;
-+      atomic_t                writes_copied;
-+      atomic_t                handle_called;
-+      atomic_t                delayed;
-+      atomic_t                bit_delayed;
-+      atomic_t                in_reqs_in_queue;
-+      atomic_t                out_reqs_in_queue;
- };
- typedef struct raid5_private_data raid5_conf_t;
-Only in linux-2.6.18-53.orig/include/linux/raid: .raid5.h.swp
diff --git a/lustre/kernel_patches/patches/raid5-stripe-by-stripe-handling-rhel5.patch b/lustre/kernel_patches/patches/raid5-stripe-by-stripe-handling-rhel5.patch
deleted file mode 100644 (file)
index 4b72d95..0000000
+++ /dev/null
@@ -1,284 +0,0 @@
-diff -pur linux-2.6.18-53.orig/drivers/md/raid5.c linux-2.6.18-53/drivers/md/raid5.c
---- linux-2.6.18-53.orig/drivers/md/raid5.c    2007-12-28 14:55:08.000000000 +0800
-+++ linux-2.6.18-53/drivers/md/raid5.c 2007-12-28 18:52:08.000000000 +0800
-@@ -2626,6 +2626,35 @@ static int raid5_issue_flush(request_que
-       return ret;
- }
-+static inline int raid5_expanding_overlap(raid5_conf_t *conf, struct bio *bi)
-+{
-+      sector_t first_sector, last_sector;
-+
-+      if (likely(conf->expand_progress == MaxSector))
-+              return 0;
-+
-+      first_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-+      last_sector = bi->bi_sector + (bi->bi_size>>9);
-+
-+      return (first_sector < conf->expand_progress &&
-+              last_sector >= conf->expand_lo);
-+}
-+
-+static inline int raid5_redo_bio(raid5_conf_t *conf, struct bio *bi, int disks, sector_t sector)
-+{
-+      int redo = 0;
-+
-+      if (likely(conf->expand_progress == MaxSector))
-+              return 0;
-+
-+      spin_lock_irq(&conf->device_lock);
-+      redo = (raid5_expanding_overlap(conf, bi) ||
-+              (unlikely(sector < conf->expand_progress) &&
-+              disks == conf->previous_raid_disks));
-+      spin_unlock_irq(&conf->device_lock);
-+      return redo;
-+}
-+
- static int make_request(request_queue_t *q, struct bio * bi)
- {
-       mddev_t *mddev = q->queuedata;
-@@ -2636,6 +2665,14 @@ static int make_request(request_queue_t 
-       struct stripe_head *sh;
-       const int rw = bio_data_dir(bi);
-       int remaining;
-+      sector_t stripe, sectors, block, r_sector, b_sector;
-+      int sectors_per_chunk = conf->chunk_size >> 9;
-+      int stripes_per_chunk, sectors_per_block;
-+      int sectors_per_stripe;
-+      int i, j;
-+
-+      DEFINE_WAIT(w);
-+      int disks, data_disks;
-       atomic_inc(&conf->in_reqs_in_queue);
-@@ -2653,105 +2690,136 @@ static int make_request(request_queue_t 
-       else
-               atomic_inc(&conf->reads_in);
--
-       logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-       last_sector = bi->bi_sector + (bi->bi_size>>9);
-       bi->bi_next = NULL;
-       bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
--      for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
--              DEFINE_WAIT(w);
--              int disks, data_disks;
--
--      retry:
--              prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
--              if (likely(conf->expand_progress == MaxSector))
--                      disks = conf->raid_disks;
--              else {
--                      /* spinlock is needed as expand_progress may be
--                       * 64bit on a 32bit platform, and so it might be
--                       * possible to see a half-updated value
--                       * Ofcourse expand_progress could change after
--                       * the lock is dropped, so once we get a reference
--                       * to the stripe that we think it is, we will have
--                       * to check again.
--                       */
--                      spin_lock_irq(&conf->device_lock);
--                      disks = conf->raid_disks;
--                      if (logical_sector >= conf->expand_progress)
--                              disks = conf->previous_raid_disks;
--                      else {
--                              if (logical_sector >= conf->expand_lo) {
--                                      spin_unlock_irq(&conf->device_lock);
--                                      schedule();
--                                      goto retry;
--                              }
--                      }
--                      spin_unlock_irq(&conf->device_lock);
--              }
--              data_disks = disks - conf->max_degraded;
-+      sectors = bi->bi_size >> 9;
-+      stripes_per_chunk = conf->chunk_size / STRIPE_SIZE;
--              new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
--                                                &dd_idx, &pd_idx, conf);
--              PRINTK("raid5: make_request, sector %llu logical %llu\n",
--                      (unsigned long long)new_sector, 
--                      (unsigned long long)logical_sector);
-+redo_bio:
-+      /* stripe by stripe handle needs a stable raid layout, so if this
-+       * reuqest covers the expanding region, wait it over. 
-+       * Furthermore, we may get here with partial request handled, so
-+       * wait for the bi_phys_segment to be 1 also. -jay */
-+      spin_lock_irq(&conf->device_lock);
-+      wait_event_lock_irq(conf->wait_for_overlap,
-+                      (bi->bi_phys_segments == 1) &&
-+                      !raid5_expanding_overlap(conf, bi),
-+                      conf->device_lock,
-+                      (unplug_slaves(conf->mddev), atomic_inc(&conf->expanding_overlap)));
-+
-+      disks = conf->raid_disks;
-+      if (unlikely(logical_sector >= conf->expand_progress))
-+              disks = conf->previous_raid_disks;
-+      data_disks = disks - conf->max_degraded;
-+      spin_unlock_irq(&conf->device_lock);
--              sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
--              if (sh) {
--                      if (unlikely(conf->expand_progress != MaxSector)) {
--                              /* expansion might have moved on while waiting for a
--                               * stripe, so we must do the range check again.
--                               * Expansion could still move past after this
--                               * test, but as we are holding a reference to
--                               * 'sh', we know that if that happens,
--                               *  STRIPE_EXPANDING will get set and the expansion
--                               * won't proceed until we finish with the stripe.
--                               */
--                              int must_retry = 0;
--                              spin_lock_irq(&conf->device_lock);
--                              if (logical_sector <  conf->expand_progress &&
--                                  disks == conf->previous_raid_disks)
--                                      /* mismatch, need to try again */
--                                      must_retry = 1;
--                              spin_unlock_irq(&conf->device_lock);
--                              if (must_retry) {
--                                      release_stripe(sh);
--                                      goto retry;
-+      /* compute the block # */
-+      sectors_per_stripe = STRIPE_SECTORS * data_disks;
-+      sectors_per_block = stripes_per_chunk * sectors_per_stripe;
-+
-+      block = logical_sector & ~((sector_t)sectors_per_block - 1);
-+      sector_div(block, sectors_per_block);
-+
-+repeat:
-+      stripe = block * (sectors_per_block / data_disks);
-+      b_sector = stripe * data_disks;
-+      /* iterate through all stripes in this block,
-+       * where block is a set of internal stripes
-+       * which covers chunk */
-+
-+      for (i = 0; i < stripes_per_chunk && sectors > 0; i++) {
-+              r_sector = b_sector + (i * STRIPE_SECTORS);
-+              sh = NULL;
-+              /* iterrate through all pages in the stripe */
-+              for (j = 0; j < data_disks && sectors > 0; j++) {
-+                      DEFINE_WAIT(w);
-+
-+                      if (r_sector + STRIPE_SECTORS <= bi->bi_sector ||
-+                          r_sector >= last_sector) {
-+                              r_sector += sectors_per_chunk;
-+                              continue;
-+                      }
-+
-+retry:
-+                      prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
-+                      new_sector = raid5_compute_sector(r_sector, disks,
-+                                                      data_disks, &dd_idx,
-+                                                      &pd_idx, conf);
-+                      if (sh == NULL) {
-+                              sh = get_active_stripe(conf, new_sector, disks, pd_idx,
-+                                                      (bi->bi_rw&RWA_MASK));
-+                              if (sh) {
-+                                      /* we're handling the bio stripe by stripe, so when we found
-+                                       * the raid layout has been changed, we have to redo the 
-+                                       * whole bio because we don't which sectors in it has been
-+                                       * done, and which is not done. -jay */
-+                                      if (raid5_redo_bio(conf, bi, disks, logical_sector))
-+                                              goto redo_bio;
-+
-+                                      if (test_bit(STRIPE_EXPANDING, &sh->state)) {
-+                                              /* Stripe is busy expanding or
-+                                               * add failed due to overlap.  Flush everything
-+                                               * and wait a while
-+                                               */
-+                                              release_stripe(sh);
-+                                              sh = NULL;
-+                                              raid5_unplug_device(mddev->queue);
-+                                              schedule();
-+                                              goto retry;
-+                                      }
-+                              } else {
-+                                      /* cannot get stripe for read-ahead, just give-up */
-+                                      finish_wait(&conf->wait_for_overlap, &w);
-+                                      clear_bit(BIO_UPTODATE, &bi->bi_flags);
-+                                      sectors = 0;
-+                                      break;
-                               }
-                       }
-+
-                       /* FIXME what if we get a false positive because these
-                        * are being updated.
-                        */
--                      if (logical_sector >= mddev->suspend_lo &&
--                          logical_sector < mddev->suspend_hi) {
-+                      if (r_sector >= mddev->suspend_lo &&
-+                          r_sector < mddev->suspend_hi) {
-+                              handle_stripe(sh, NULL);
-                               release_stripe(sh);
-+                              sh = NULL;
-                               schedule();
-                               goto retry;
-                       }
--                      if (test_bit(STRIPE_EXPANDING, &sh->state) ||
--                          !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
--                              /* Stripe is busy expanding or
--                               * add failed due to overlap.  Flush everything
--                               * and wait a while
--                               */
--                              raid5_unplug_device(mddev->queue);
-+                      if (!add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) {
-+                              handle_stripe(sh, NULL);
-                               release_stripe(sh);
-+                              sh = NULL;
-+                              raid5_unplug_device(mddev->queue);
-                               schedule();
-                               goto retry;
-                       }
-                       finish_wait(&conf->wait_for_overlap, &w);
-+
-+                      BUG_ON (new_sector != stripe);
-+                      sectors -= STRIPE_SECTORS;
-+                      if (bi->bi_sector > r_sector)
-+                              sectors += bi->bi_sector - r_sector;
-+                      if (r_sector + STRIPE_SECTORS > last_sector)
-+                              sectors += r_sector + STRIPE_SECTORS - last_sector;
-+                      r_sector += sectors_per_chunk;
-+              }
-+              if (sh) {
-                       handle_stripe(sh, NULL);
-                       release_stripe(sh);
--              } else {
--                      /* cannot get stripe for read-ahead, just give-up */
--                      clear_bit(BIO_UPTODATE, &bi->bi_flags);
--                      finish_wait(&conf->wait_for_overlap, &w);
--                      break;
-+                      sh = NULL;
-               }
--                      
-+              stripe += STRIPE_SECTORS;
-       }
-+      block++;
-+      if (sectors > 0)
-+              goto repeat;
-+
-       spin_lock_irq(&conf->device_lock);
-       remaining = --bi->bi_phys_segments;
-       spin_unlock_irq(&conf->device_lock);
-@@ -3439,6 +3507,8 @@ static void status (struct seq_file *seq
-                       atomic_read(&conf->active_stripes),
-                       atomic_read(&conf->in_reqs_in_queue),
-                       atomic_read(&conf->out_reqs_in_queue));
-+      seq_printf (seq, "\t\t%u expanding overlap\n",
-+                      atomic_read(&conf->expanding_overlap));
- #if RAID5_DEBUG
-       seq_printf (seq, "\n");
-       printall(seq, conf);
-diff -pur linux-2.6.18-53.orig/include/linux/raid/raid5.h linux-2.6.18-53/include/linux/raid/raid5.h
---- linux-2.6.18-53.orig/include/linux/raid/raid5.h    2007-12-28 14:55:08.000000000 +0800
-+++ linux-2.6.18-53/include/linux/raid/raid5.h 2007-12-28 18:09:37.000000000 +0800
-@@ -278,6 +278,7 @@ struct raid5_private_data {
-       atomic_t                bit_delayed;
-       atomic_t                in_reqs_in_queue;
-       atomic_t                out_reqs_in_queue;
-+      atomic_t                expanding_overlap;
- };
- typedef struct raid5_private_data raid5_conf_t;
diff --git a/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch b/lustre/kernel_patches/patches/raid5-zerocopy-rhel5.patch
deleted file mode 100644 (file)
index 06db94d..0000000
+++ /dev/null
@@ -1,489 +0,0 @@
-Index: linux-2.6.18-128.1.6/drivers/md/raid5.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/drivers/md/raid5.c       2009-06-02 23:24:52.000000000 -0600
-+++ linux-2.6.18-128.1.6/drivers/md/raid5.c    2009-06-02 23:24:55.000000000 -0600
-@@ -633,6 +633,9 @@
-               clear_buffer_uptodate(bh);
-       }
- #endif
-+      /* Read on a Directing write is allowable */
-+      /* BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags)) */
-+      BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page != sh->dev[i].page);
-       clear_bit(R5_LOCKED, &sh->dev[i].flags);
-       set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
-@@ -669,6 +672,10 @@
-       rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
-       
-+      if (test_bit(R5_Direct, &sh->dev[i].flags)) {
-+              BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
-+              sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
-+      }
-       clear_bit(R5_LOCKED, &sh->dev[i].flags);
-       set_bit(STRIPE_HANDLE, &sh->state);
-       release_stripe(sh);
-@@ -910,7 +917,27 @@
-       return r_sector;
- }
-+static struct page *zero_copy_data(struct bio *bio, sector_t sector)
-+{
-+      sector_t bi_sector = bio->bi_sector;
-+      struct page *page = NULL;
-+      struct bio_vec *bvl;
-+      int i;
-+      bio_for_each_segment(bvl, bio, i) {
-+              if (sector == bi_sector)
-+                      page = bio_iovec_idx(bio, i)->bv_page;
-+              bi_sector += bio_iovec_idx(bio, i)->bv_len >> 9;
-+              if (bi_sector >= sector + STRIPE_SECTORS) {
-+                      /* check if the stripe is covered by one page */
-+                      if (page == bio_iovec_idx(bio, i)->bv_page &&
-+                          PageConstant(page))
-+                              return page;
-+                      return NULL;
-+              }
-+      }
-+      return NULL;
-+}
- /*
-  * Copy data between a page in the stripe cache, and one or more bion
-@@ -1002,8 +1029,9 @@
- {
-       raid5_conf_t *conf = sh->raid_conf;
-       int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
--      void *ptr[MAX_XOR_BLOCKS];
-+      void *ptr[MAX_XOR_BLOCKS], *h_ptr[2];
-       struct bio *chosen;
-+      struct page *page;
-       PRINTK("compute_parity5, stripe %llu, method %d\n",
-               (unsigned long long)sh->sector, method);
-@@ -1053,34 +1081,92 @@
-               count = 1;
-       }
-       
--      for (i = disks; i--;)
--              if (sh->dev[i].written) {
--                      sector_t sector = sh->dev[i].sector;
--                      struct bio *wbi = sh->dev[i].written;
--                      while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
--                              copy_data(1, wbi, sh->dev[i].page, sector);
--                              wbi = r5_next_bio(wbi, sector);
-+      for (i = disks; i--;) {
-+              struct r5dev *dev = &sh->dev[i];
-+              struct bio *wbi = dev->written;
-+              sector_t sector;
-+
-+              if (!wbi)
-+                      continue;
-+
-+              sector = dev->sector;
-+              set_bit(R5_LOCKED, &sh->dev[i].flags);
-+              BUG_ON(test_bit(R5_Direct, &dev->flags));
-+
-+              /* check if it's covered by a single page
-+                 and whole stripe is written at once.
-+               * in this case we can avoid memcpy() */
-+              if (!wbi->bi_next && test_bit(R5_OVERWRITE, &dev->flags) &&
-+                  test_bit(R5_Insync, &dev->flags)) {
-+                      page = zero_copy_data(wbi, sector);
-+                      if (page) {
-+                              atomic_inc(&conf->writes_zcopy);
-+                              /* The pointer must be restored whenever the LOCKED
-+                               * gets cleared. */
-+                              dev->req.bi_io_vec[0].bv_page = page;
-+                              set_bit(R5_Direct, &dev->flags);
-+                              clear_bit(R5_UPTODATE, &sh->dev[i].flags);
-+                              clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
-+                              continue;
-                       }
-+              }
--                      set_bit(R5_LOCKED, &sh->dev[i].flags);
--                      set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+              /* do copy write */
-+              atomic_inc(&conf->writes_copied);
-+              clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
-+              set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+              while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-+                      copy_data(1, wbi, sh->dev[i].page, sector);
-+                      wbi = r5_next_bio(wbi, sector);
-               }
-+      }
-+      h_ptr[0] = ptr[0];
-       switch(method) {
-       case RECONSTRUCT_WRITE:
-       case CHECK_PARITY:
--              for (i=disks; i--;)
--                      if (i != pd_idx) {
--                              ptr[count++] = page_address(sh->dev[i].page);
--                              check_xor();
-+              for (i=disks; i--;) {
-+                      if (i == pd_idx)
-+                              continue;
-+                      if (test_bit(R5_Direct, &sh->dev[i].flags))
-+                              page = sh->dev[i].req.bi_io_vec[0].bv_page;
-+                      else
-+                              page = sh->dev[i].page;
-+
-+                      /* have to compute the parity immediately for
-+                       * a highmem page. it would happen for zerocopy. -jay
-+                       */
-+                      if (PageHighMem(page)) {
-+                              h_ptr[1] = kmap_atomic(page, KM_USER0);
-+                              xor_block(2, STRIPE_SIZE, h_ptr);
-+                              kunmap_atomic(page, KM_USER0);
-+                      } else {
-+                              ptr[count++] = page_address(page);
-                       }
-+                      check_xor();
-+              }
-               break;
-       case READ_MODIFY_WRITE:
--              for (i = disks; i--;)
--                      if (sh->dev[i].written) {
--                              ptr[count++] = page_address(sh->dev[i].page);
--                              check_xor();
-+              for (i = disks; i--;) {
-+                      if (!sh->dev[i].written)
-+                              continue;
-+                      if (test_bit(R5_Direct, &sh->dev[i].flags))
-+                              page = sh->dev[i].req.bi_io_vec[0].bv_page;
-+                      else
-+                              page = sh->dev[i].page;
-+
-+                      /* have to compute the parity immediately for
-+                       * a highmem page. it would happen for zerocopy. -jay
-+                       */
-+                      if (PageHighMem(page)) {
-+                              h_ptr[1] = kmap_atomic(page, KM_USER0);
-+                              xor_block(2, STRIPE_SIZE, h_ptr);
-+                              kunmap_atomic(page, KM_USER0);
-+                      } else {
-+                              ptr[count++] = page_address(page);
-                       }
-+                      check_xor();
-+              }
-       }
-       if (count != 1)
-               xor_block(count, STRIPE_SIZE, ptr);
-@@ -1097,6 +1183,7 @@
-       raid6_conf_t *conf = sh->raid_conf;
-       int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = conf->raid_disks, count;
-       struct bio *chosen;
-+      struct page *page;
-       /**** FIX THIS: This could be very bad if disks is close to 256 ****/
-       void *ptrs[disks];
-@@ -1126,18 +1213,49 @@
-               BUG();          /* Not implemented yet */
-       }
--      for (i = disks; i--;)
--              if (sh->dev[i].written) {
--                      sector_t sector = sh->dev[i].sector;
--                      struct bio *wbi = sh->dev[i].written;
--                      while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
--                              copy_data(1, wbi, sh->dev[i].page, sector);
--                              wbi = r5_next_bio(wbi, sector);
-+      for (i = disks; i--;) {
-+              struct r5dev *dev = &sh->dev[i];
-+              struct bio *wbi = dev->written;
-+              sector_t sector;
-+
-+              if (!wbi)
-+                      continue;
-+
-+              sector = sh->dev[i].sector;
-+              set_bit(R5_LOCKED, &sh->dev[i].flags);
-+              BUG_ON(test_bit(R5_Direct, &sh->dev[i].flags));
-+
-+              /* check if it's covered by a single page
-+               * and whole stripe is written at once.
-+               * in this case we can avoid memcpy() */
-+              if (!wbi->bi_next && test_bit(R5_Insync, &sh->dev[i].flags) &&
-+                  test_bit(R5_OVERWRITE, &sh->dev[i].flags)) {
-+                      page = zero_copy_data(wbi, sector);
-+                      /* we don't do zerocopy on a HighMem page. Raid6 tend
-+                       * to prepare all of the pages' content to be accessed
-+                       * before computing PQ parity. If we need to support HighMem
-+                       * page also, we have to modify the gen_syndrome()
-+                       * algorithm. -jay */
-+                      if (page && !PageHighMem(page)) {
-+                              atomic_inc(&conf->writes_zcopy);
-+                              /* The pointer must be restored whenever the LOCKED
-+                               * gets cleared. */
-+                              sh->dev[i].req.bi_io_vec[0].bv_page = page;
-+                              set_bit(R5_Direct, &sh->dev[i].flags);
-+                              clear_bit(R5_UPTODATE, &sh->dev[i].flags);
-+                              clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
-+                              continue;
-                       }
-+              }
--                      set_bit(R5_LOCKED, &sh->dev[i].flags);
--                      set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+              atomic_inc(&conf->writes_copied);
-+              clear_bit(R5_OVERWRITE, &sh->dev[i].flags);
-+              set_bit(R5_UPTODATE, &sh->dev[i].flags);
-+              while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-+                      copy_data(1, wbi, sh->dev[i].page, sector);
-+                      wbi = r5_next_bio(wbi, sector);
-               }
-+      }
- //    switch(method) {
- //    case RECONSTRUCT_WRITE:
-@@ -1148,8 +1266,12 @@
-               count = 0;
-               i = d0_idx;
-               do {
--                      ptrs[count++] = page_address(sh->dev[i].page);
--                      if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))
-+                      if (test_bit(R5_Direct, &sh->dev[i].flags))
-+                              ptrs[count++] = page_address(sh->dev[i].req.bi_io_vec[0].bv_page);
-+                      else
-+                              ptrs[count++] = page_address(sh->dev[i].page);
-+                      if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
-+                          !test_bit(R5_Direct, &sh->dev[i].flags))
-                               printk("block %d/%d not uptodate on parity calc\n", i,count);
-                       i = raid6_next_disk(i, disks);
-               } while ( i != d0_idx );
-@@ -1596,7 +1718,8 @@
-               if (sh->dev[i].written) {
-                   dev = &sh->dev[i];
-                   if (!test_bit(R5_LOCKED, &dev->flags) &&
--                       test_bit(R5_UPTODATE, &dev->flags) ) {
-+                       (test_bit(R5_UPTODATE, &dev->flags) ||
-+                        test_bit(R5_Direct, &dev->flags)) ) {
-                       /* We can return any write requests */
-                           struct bio *wbi, *wbi2;
-                           int bitmap_end = 0;
-@@ -1604,6 +1727,7 @@
-                           spin_lock_irq(&conf->device_lock);
-                           wbi = dev->written;
-                           dev->written = NULL;
-+                          clear_bit(R5_Direct, &dev->flags);
-                           while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
-                                   wbi2 = r5_next_bio(wbi, dev->sector);
-                                   if (--wbi->bi_phys_segments == 0) {
-@@ -1967,6 +2091,15 @@
-                               set_bit(STRIPE_DEGRADED, &sh->state);
-                       PRINTK("skip op %ld on disc %d for sector %llu\n",
-                               bi->bi_rw, i, (unsigned long long)sh->sector);
-+
-+                      if (test_bit(R5_Direct, &sh->dev[i].flags)) {
-+                              /* restore the page pointer of req, otherwise,
-+                               * no any read is permitted on this stripe, this is
-+                               * not what we want. -jay */
-+                              BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
-+                              sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
-+                      }
-+
-                       clear_bit(R5_LOCKED, &sh->dev[i].flags);
-                       set_bit(STRIPE_HANDLE, &sh->state);
-               }
-@@ -2172,7 +2305,8 @@
-                       if (sh->dev[i].written) {
-                               dev = &sh->dev[i];
-                               if (!test_bit(R5_LOCKED, &dev->flags) &&
--                                  test_bit(R5_UPTODATE, &dev->flags) ) {
-+                                  (test_bit(R5_UPTODATE, &dev->flags) ||
-+                                   test_bit(R5_Direct, &dev->flags)) ) {
-                                       /* We can return any write requests */
-                                       int bitmap_end = 0;
-                                       struct bio *wbi, *wbi2;
-@@ -2181,6 +2315,7 @@
-                                       spin_lock_irq(&conf->device_lock);
-                                       wbi = dev->written;
-                                       dev->written = NULL;
-+                                      clear_bit(R5_Direct, &dev->flags);
-                                       while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
-                                               wbi2 = r5_next_bio(wbi, dev->sector);
-                                               if (--wbi->bi_phys_segments == 0) {
-@@ -2532,6 +2667,15 @@
-                               set_bit(STRIPE_DEGRADED, &sh->state);
-                       PRINTK("skip op %ld on disc %d for sector %llu\n",
-                               bi->bi_rw, i, (unsigned long long)sh->sector);
-+
-+                      if (test_bit(R5_Direct, &sh->dev[i].flags)) {
-+                              /* restore the page pointer of req, otherwise,
-+                               * no any read is permitted on this stripe, this is
-+                               * not what we want. -jay */
-+                              BUG_ON(sh->dev[i].req.bi_io_vec[0].bv_page == sh->dev[i].page);
-+                              sh->dev[i].req.bi_io_vec[0].bv_page = sh->dev[i].page;
-+                      }
-+
-                       clear_bit(R5_LOCKED, &sh->dev[i].flags);
-                       set_bit(STRIPE_HANDLE, &sh->state);
-               }
-@@ -3451,6 +3595,9 @@
-       mddev->queue->max_phys_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;
-       mddev->queue->max_hw_segments = conf->chunk_size * conf->previous_raid_disks >> PAGE_SHIFT;;
-+      /* raid5 device is able to do zcopy right now. */
-+      mddev->queue->backing_dev_info.capabilities |= BDI_CAP_PAGE_CONSTANT_WRITE;
-+
-       return 0;
- abort:
-       if (conf) {
-@@ -3537,9 +3684,11 @@
-                       atomic_read(&conf->handled_in_raid5d),
-                       atomic_read(&conf->out_of_stripes),
-                       atomic_read(&conf->handle_called));
--      seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw",
-+      seq_printf (seq, "\n\t\treads: %u for rmw, %u for rcw. zcopy writes: %u, copied writes: %u",
-                       atomic_read(&conf->reads_for_rmw),
--                      atomic_read(&conf->reads_for_rcw));
-+                      atomic_read(&conf->reads_for_rcw),
-+                      atomic_read(&conf->writes_zcopy),
-+                      atomic_read(&conf->writes_copied));
-       seq_printf (seq, "\n\t\t%u delayed, %u bit delayed, %u active, queues: %u in, %u out\n",
-                       atomic_read(&conf->delayed), atomic_read(&conf->bit_delayed),
-                       atomic_read(&conf->active_stripes),
-Index: linux-2.6.18-128.1.6/include/linux/backing-dev.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/backing-dev.h      2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/backing-dev.h   2009-06-02 23:24:55.000000000 -0600
-@@ -48,6 +48,7 @@
- #define BDI_CAP_READ_MAP      0x00000010      /* Can be mapped for reading */
- #define BDI_CAP_WRITE_MAP     0x00000020      /* Can be mapped for writing */
- #define BDI_CAP_EXEC_MAP      0x00000040      /* Can be mapped for execution */
-+#define BDI_CAP_PAGE_CONSTANT_WRITE   0x00000080      /* Zcopy write - for raid5 */
- #define BDI_CAP_VMFLAGS \
-       (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
-@@ -94,11 +95,18 @@
- #define bdi_cap_account_dirty(bdi) \
-       (!((bdi)->capabilities & BDI_CAP_NO_ACCT_DIRTY))
-+#define bdi_cap_page_constant_write(bdi) \
-+      ((bdi)->capabilities & BDI_CAP_PAGE_CONSTANT_WRITE)
-+
- #define mapping_cap_writeback_dirty(mapping) \
-       bdi_cap_writeback_dirty((mapping)->backing_dev_info)
- #define mapping_cap_account_dirty(mapping) \
-       bdi_cap_account_dirty((mapping)->backing_dev_info)
-+#define mapping_cap_page_constant_write(mapping) \
-+      bdi_cap_page_constant_write((mapping)->backing_dev_info)
-+      
-+
- #endif                /* _LINUX_BACKING_DEV_H */
-Index: linux-2.6.18-128.1.6/include/linux/page-flags.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/page-flags.h       2009-04-14 21:05:24.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/page-flags.h    2009-06-02 23:24:55.000000000 -0600
-@@ -86,6 +86,7 @@
- #define PG_reclaim            17      /* To be reclaimed asap */
- #define PG_nosave_free                18      /* Free, should not be written */
- #define PG_buddy              19      /* Page is free, on buddy lists */
-+#define PG_constant           21      /* To mark if the page is constant */
- #define PG_xpmem              27      /* Testing for xpmem. */
- /* PG_owner_priv_1 users should have descriptive aliases */
-@@ -283,6 +284,14 @@
- struct page;  /* forward declaration */
-+#define PageConstant(page)    test_bit(PG_constant, &(page)->flags)
-+#define SetPageConstant(page)         set_bit(PG_constant, &(page)->flags)
-+#define ClearPageConstant(page) clear_bit(PG_constant, &(page->flags))
-+#define TestSetPageConstant(page) test_and_set_bit(PG_constant, &(page)->flags)
-+
-+extern int set_page_constant(struct page *page);
-+extern void clear_page_constant(struct page *);
-+
- int test_clear_page_dirty(struct page *page);
- int test_clear_page_writeback(struct page *page);
- int test_set_page_writeback(struct page *page);
-Index: linux-2.6.18-128.1.6/include/linux/raid/raid5.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/raid/raid5.h       2009-06-02 23:24:50.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/raid/raid5.h    2009-06-02 23:24:55.000000000 -0600
-@@ -156,8 +156,9 @@
- #define       R5_Overlap      7       /* There is a pending overlapping request on this block */
- #define       R5_ReadError    8       /* seen a read error here recently */
- #define       R5_ReWrite      9       /* have tried to over-write the readerror */
--
- #define       R5_Expanded     10      /* This block now has post-expand data */
-+#define       R5_Direct       11      /* Use the pages in bio to do the write directly. */
-+
- /*
-  * Write method
-  */
-Index: linux-2.6.18-128.1.6/mm/filemap.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/mm/filemap.c     2009-04-14 21:05:46.000000000 -0600
-+++ linux-2.6.18-128.1.6/mm/filemap.c  2009-06-02 23:24:55.000000000 -0600
-@@ -30,6 +30,7 @@
- #include <linux/security.h>
- #include <linux/syscalls.h>
- #include <linux/cpuset.h>
-+#include <linux/rmap.h>
- #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
- #include <trace/mm.h>
- #include "internal.h"
-@@ -567,11 +568,55 @@
-               if (!test_clear_page_writeback(page))
-                       BUG();
-       }
-+      clear_page_constant(page);
-       smp_mb__after_clear_bit();
-       wake_up_page(page, PG_writeback);
- }
- EXPORT_SYMBOL(end_page_writeback);
-+/* Make a page to be constant, `constant' means any write to this page will
-+ * be blocked until clear_page_constant is called.
-+ * The page lock must be held.
-+ */
-+int set_page_constant(struct page *page)
-+{
-+      BUG_ON(!PageLocked(page));
-+
-+      /* If it's an anonymous page and haven't been added to swap cache,
-+       * return directly because we have no way to swap this page.
-+       */
-+      if (page_mapping(page) == NULL)
-+              return SWAP_FAIL;
-+
-+      BUG_ON(!PageUptodate(page));
-+
-+      /* I have to clear page uptodate before trying to remove
-+       * it from user's page table because otherwise, the page may be
-+       * reinstalled by a page access which happens between try_to_unmap()
-+       * and ClearPageUptodate(). -jay
-+       */
-+      ClearPageUptodate(page);
-+      if (page_mapped(page) && try_to_unmap(page, 0) != SWAP_SUCCESS) {
-+              SetPageUptodate(page);
-+              return SWAP_FAIL;
-+      }
-+      SetPageConstant(page);
-+      return SWAP_SUCCESS;
-+}
-+
-+void clear_page_constant(struct page *page)
-+{
-+      if (PageConstant(page)) {
-+              BUG_ON(!PageLocked(page));
-+              BUG_ON(PageUptodate(page));
-+              ClearPageConstant(page);
-+              SetPageUptodate(page);
-+              unlock_page(page);
-+      }
-+}
-+EXPORT_SYMBOL(set_page_constant);
-+EXPORT_SYMBOL(clear_page_constant);
-+
- /**
-  * __lock_page - get a lock on the page, assuming we need to sleep to get it
-  * @page: the page to lock
diff --git a/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch b/lustre/kernel_patches/patches/sd_iostats-2.6-rhel5.patch
deleted file mode 100644 (file)
index 2297f8c..0000000
+++ /dev/null
@@ -1,581 +0,0 @@
-Index: linux-2.6.16.60-0.37/drivers/scsi/Kconfig
-===================================================================
---- linux-2.6.16.60-0.37.orig/drivers/scsi/Kconfig     2009-03-24 05:46:32.000000000 -0700
-+++ linux-2.6.16.60-0.37/drivers/scsi/Kconfig  2009-06-02 23:33:14.000000000 -0600
-@@ -78,6 +78,14 @@
-         To compile this driver as a module, choose M here and read
-         <file:Documentation/scsi/scsi.txt>. The module will be called st.
-+config SD_IOSTATS
-+   bool "Enable SCSI disk I/O stats"
-+   depends on BLK_DEV_SD
-+   default y
-+   ---help---
-+     This enables SCSI disk I/O stats collection.  You must also enable
-+     /proc file system support if you want this feature.
-+
- config CHR_DEV_OSST
-       tristate "SCSI OnStream SC-x0 tape support"
-       depends on SCSI
-Index: linux-2.6.16.60-0.37/drivers/scsi/scsi_proc.c
-===================================================================
---- linux-2.6.16.60-0.37.orig/drivers/scsi/scsi_proc.c 2009-03-24 05:46:25.000000000 -0700
-+++ linux-2.6.16.60-0.37/drivers/scsi/scsi_proc.c      2009-06-02 23:33:14.000000000 -0600
-@@ -40,7 +40,8 @@
- /* 4K page size, but our output routines, use some slack for overruns */
- #define PROC_BLOCK_SIZE (3*1024)
--static struct proc_dir_entry *proc_scsi;
-+struct proc_dir_entry *proc_scsi;
-+EXPORT_SYMBOL(proc_scsi);
- /* Protect sht->present and sht->proc_dir */
- static DEFINE_MUTEX(global_host_template_mutex);
-Index: linux-2.6.16.60-0.37/drivers/scsi/sd.c
-===================================================================
---- linux-2.6.16.60-0.37.orig/drivers/scsi/sd.c        2009-03-24 05:46:25.000000000 -0700
-+++ linux-2.6.16.60-0.37/drivers/scsi/sd.c     2009-06-02 23:33:14.000000000 -0600
-@@ -63,6 +63,63 @@
- #include "scsi_logging.h"
-+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
-+# include <linux/proc_fs.h>
-+# include <linux/seq_file.h>
-+
-+typedef struct {
-+      unsigned long long iostat_size;
-+      unsigned long long iostat_count;
-+} iostat_counter_t;
-+
-+#define IOSTAT_NCOUNTERS 16
-+typedef struct {
-+      iostat_counter_t        iostat_read_histogram[IOSTAT_NCOUNTERS];
-+      iostat_counter_t        iostat_write_histogram[IOSTAT_NCOUNTERS];
-+      struct timeval          iostat_timeval;
-+
-+      /* queue depth: how well the pipe is filled up */
-+      unsigned long long      iostat_queue_ticks[IOSTAT_NCOUNTERS];
-+      unsigned long long      iostat_queue_ticks_sum;
-+      unsigned long           iostat_queue_depth;
-+      unsigned long           iostat_queue_stamp;
-+
-+      /* seeks: how linear the traffic is */
-+      unsigned long long      iostat_next_sector;
-+      unsigned long long      iostat_seek_sectors;
-+      unsigned long long      iostat_seeks;
-+      unsigned long long      iostat_sectors;
-+      unsigned long long      iostat_reqs;
-+      unsigned long           iostat_read_reqs;
-+      unsigned long           iostat_write_reqs;
-+
-+      /* process time: how long it takes to process requests */
-+      unsigned long           iostat_rtime[IOSTAT_NCOUNTERS];
-+      unsigned long           iostat_wtime[IOSTAT_NCOUNTERS];
-+
-+      /* queue time: how long process spent in elevator's queue */
-+      unsigned long           iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
-+      unsigned long           iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
-+
-+      /* must be the last field, as it's used to know size to be memset'ed */
-+      spinlock_t              iostat_lock;
-+} ____cacheline_aligned_in_smp iostat_stats_t;
-+
-+struct proc_dir_entry *sd_iostats_procdir = NULL;
-+char sd_iostats_procdir_name[] = "sd_iostats";
-+static struct file_operations sd_iostats_proc_fops;
-+
-+extern void sd_iostats_init(void);
-+extern void sd_iostats_fini(void);
-+void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
-+void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
-+#else
-+static inline void sd_iostats_init(void) {}
-+static inline void sd_iostats_fini(void) {}
-+static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
-+static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
-+#endif
-+
- /*
-  * More than enough for everybody ;)  The huge number of majors
-  * is a leftover from 16bit dev_t days, we don't really need that
-@@ -127,6 +184,9 @@
-       unsigned        WCE : 1;        /* state of disk WCE bit */
-       unsigned        RCD : 1;        /* state of disk RCD bit, unused */
-       unsigned        DPOFUA : 1;     /* state of disk DPOFUA bit */
-+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
-+      iostat_stats_t  *stats;         /* scsi disk statistics */
-+#endif
- };
- #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)
-@@ -520,6 +580,8 @@
-        */
-       SCpnt->done = sd_rw_intr;
-+      sd_iostats_start_req(SCpnt);
-+
-       /*
-        * This indicates that the command is ready from our end to be
-        * queued.
-@@ -1014,6 +1076,7 @@
-               break;
-       }
-  out:
-+      sd_iostats_finish_req(SCpnt);
-       scsi_io_completion(SCpnt, good_bytes);
- }
-@@ -1713,6 +1776,36 @@
-       if (sdp->removable)
-               gd->flags |= GENHD_FL_REMOVABLE;
-+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
-+      sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL);
-+      if (!sdkp->stats) {
-+              printk(KERN_WARNING "cannot allocate iostat structure for"
-+                                  "%s\n", gd->disk_name);
-+      } else {
-+              do_gettimeofday(&sdkp->stats->iostat_timeval);
-+              sdkp->stats->iostat_queue_stamp = jiffies;
-+              spin_lock_init(&sdkp->stats->iostat_lock);
-+              if (sd_iostats_procdir) {
-+                      struct proc_dir_entry *pde;
-+                      pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR,
-+                                              sd_iostats_procdir);
-+                      if (!pde) {
-+                              printk(KERN_WARNING "Can't create /proc/scsi/"
-+                                                  "%s/%s\n",
-+                                                  sd_iostats_procdir_name,
-+                                                  gd->disk_name);
-+                              kfree(sdkp->stats);
-+                              sdkp->stats = NULL;
-+                      } else {
-+                              pde->proc_fops = &sd_iostats_proc_fops;
-+                              pde->data = gd;
-+                      }
-+              } else {
-+                      kfree(sdkp->stats);
-+                      sdkp->stats = NULL;
-+              }
-+      }
-+#endif
-       dev_set_drvdata(dev, sdkp);
-       add_disk(gd);
-@@ -1756,6 +1849,366 @@
-       return 0;
- }
-+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
-+static int
-+sd_iostats_seq_show(struct seq_file *seq, void *v)
-+{
-+      struct timeval     now;
-+      struct gendisk *disk = seq->private;
-+      iostat_stats_t    *stats;
-+      unsigned long long read_len;
-+      unsigned long long read_len_tot;
-+      unsigned long      read_num;
-+      unsigned long      read_num_tot;
-+      unsigned long long write_len;
-+      unsigned long long write_len_tot;
-+      unsigned long      write_num;
-+      unsigned long      write_num_tot;
-+      int                i;
-+      int                maxi;
-+
-+      stats = scsi_disk(disk)->stats;
-+      if (stats == NULL) {
-+              printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
-+              BUG();
-+      }
-+
-+      do_gettimeofday(&now);
-+      now.tv_sec -= stats->iostat_timeval.tv_sec;
-+      now.tv_usec -= stats->iostat_timeval.tv_usec;
-+      if (now.tv_usec < 0) {
-+              now.tv_usec += 1000000;
-+              now.tv_sec--;
-+      }
-+
-+      /* this sampling races with updates */
-+      seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
-+                      (unsigned long) scsi_disk(disk)->index,
-+                      now.tv_sec, now.tv_usec);
-+
-+      for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
-+              if (stats->iostat_read_histogram[i].iostat_count != 0 ||
-+                              stats->iostat_write_histogram[i].iostat_count != 0)
-+                      break;
-+      maxi = i;
-+
-+      seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
-+                      "reads", "total", "writes", "total");
-+
-+      read_len_tot = write_len_tot = 0;
-+      read_num_tot = write_num_tot = 0;
-+      for (i = 0; i <= maxi; i++) {
-+              read_len = stats->iostat_read_histogram[i].iostat_size;
-+              read_len_tot += read_len;
-+              read_num = stats->iostat_read_histogram[i].iostat_count;
-+              read_num_tot += read_num;
-+
-+              write_len = stats->iostat_write_histogram[i].iostat_size;
-+              write_len_tot += write_len;
-+              write_num = stats->iostat_write_histogram[i].iostat_count;
-+              write_num_tot += write_num;
-+
-+              seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
-+                              512<<i, read_num, read_len, write_num, write_len);
-+      }
-+
-+      seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
-+                      read_num_tot, read_len_tot, 
-+                      write_num_tot, write_len_tot);
-+
-+      seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
-+      for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
-+              unsigned long long ticks, percent;
-+              ticks = stats->iostat_queue_ticks[i];
-+              if (ticks == 0)
-+                      continue;
-+              percent = stats->iostat_queue_ticks[i] * 100;
-+              do_div(percent, stats->iostat_queue_ticks_sum);
-+              seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
-+      }
-+
-+      if (stats->iostat_reqs != 0) {
-+              unsigned long long aveseek = 0, percent = 0;
-+
-+              if (stats->iostat_seeks) {
-+                      aveseek = stats->iostat_seek_sectors;
-+                      do_div(aveseek, stats->iostat_seeks);
-+                      percent = stats->iostat_seeks * 100;
-+                      do_div(percent, stats->iostat_reqs);
-+              }
-+
-+              seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
-+                              "%llu sectors in ave, %llu%% of all reqs\n",
-+                              stats->iostat_sectors, stats->iostat_reqs,
-+                              stats->iostat_seeks, aveseek, percent);
-+      }
-+
-+      seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
-+                      "%%", "writes", "%%");
-+      for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
-+              unsigned long read_percent = 0, write_percent = 0;
-+              if (stats->iostat_wtime[i] == 0 &&
-+                              stats->iostat_rtime[i] == 0)
-+                      continue;
-+              if (stats->iostat_read_reqs)
-+                      read_percent = stats->iostat_rtime[i] * 100 / 
-+                              stats->iostat_read_reqs;
-+              if (stats->iostat_write_reqs)
-+                      write_percent = stats->iostat_wtime[i] * 100 / 
-+                              stats->iostat_write_reqs;
-+              seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
-+                              jiffies_to_msecs(((1UL << i) >> 1) << 1),
-+                              stats->iostat_rtime[i], read_percent,
-+                              stats->iostat_wtime[i], write_percent);
-+      }
-+
-+      seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
-+                      "%%", "writes", "%%");
-+      for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
-+              unsigned long read_percent = 0, write_percent = 0;
-+              if (stats->iostat_wtime_in_queue[i] == 0 &&
-+                              stats->iostat_rtime_in_queue[i] == 0)
-+                      continue;
-+              if (stats->iostat_read_reqs)
-+                      read_percent = stats->iostat_rtime_in_queue[i] * 100 / 
-+                              stats->iostat_read_reqs;
-+              if (stats->iostat_write_reqs)
-+                      write_percent = stats->iostat_wtime_in_queue[i] * 100 / 
-+                              stats->iostat_write_reqs;
-+              seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
-+                              jiffies_to_msecs(((1UL << i) >> 1) << 1),
-+                              stats->iostat_rtime_in_queue[i],
-+                              read_percent,
-+                              stats->iostat_wtime_in_queue[i],
-+                              write_percent);
-+      }
-+
-+      return 0;
-+}
-+
-+static void *
-+sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
-+{
-+      return (*pos == 0) ? (void *)1 : NULL;
-+}
-+
-+static void *
-+sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
-+{
-+      ++*pos;
-+      return NULL;
-+}
-+
-+static void
-+sd_iostats_seq_stop(struct seq_file *p, void *v)
-+{
-+}
-+
-+static struct seq_operations sd_iostats_seqops = {
-+      .start = sd_iostats_seq_start,
-+      .stop  = sd_iostats_seq_stop,
-+      .next  = sd_iostats_seq_next,
-+      .show  = sd_iostats_seq_show,
-+};
-+
-+static int
-+sd_iostats_seq_open (struct inode *inode, struct file *file)
-+{
-+      int rc;
-+
-+      rc = seq_open(file, &sd_iostats_seqops);
-+      if (rc != 0)
-+              return rc;
-+
-+      ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
-+      return 0;
-+}
-+
-+static ssize_t
-+sd_iostats_seq_write(struct file *file, const char *buffer,
-+                   size_t len, loff_t *off)
-+{
-+      struct seq_file   *seq = file->private_data;
-+      struct gendisk *disk = seq->private;
-+      iostat_stats_t    *stats = scsi_disk(disk)->stats;
-+      unsigned long      flags;
-+      unsigned long      qdepth;
-+
-+
-+      spin_lock_irqsave (&stats->iostat_lock, flags);
-+      qdepth = stats->iostat_queue_depth;
-+      memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
-+      do_gettimeofday(&stats->iostat_timeval);
-+      stats->iostat_queue_stamp = jiffies;
-+      stats->iostat_queue_depth = qdepth;
-+      spin_unlock_irqrestore (&stats->iostat_lock, flags);
-+
-+      return len;
-+}
-+
-+static struct file_operations sd_iostats_proc_fops = {
-+      .owner   = THIS_MODULE,
-+      .open    = sd_iostats_seq_open,
-+      .read    = seq_read,
-+      .write   = sd_iostats_seq_write,
-+      .llseek  = seq_lseek,
-+      .release = seq_release,
-+};
-+
-+extern struct proc_dir_entry *proc_scsi;
-+
-+void
-+sd_iostats_init(void)
-+{
-+      if (proc_scsi == NULL) {
-+              printk(KERN_WARNING "No access to sd iostats: "
-+                      "proc_scsi is NULL\n");
-+              return;
-+      }
-+
-+      sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
-+                                             S_IFDIR | S_IRUGO | S_IXUGO,
-+                                              proc_scsi);
-+      if (sd_iostats_procdir == NULL) {
-+              printk(KERN_WARNING "No access to sd iostats: "
-+                      "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
-+              return;
-+      }
-+}
-+
-+void sd_iostats_fini(void)
-+{
-+      if (proc_scsi != NULL && sd_iostats_procdir != NULL)
-+              remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
-+
-+      sd_iostats_procdir = NULL;
-+}
-+
-+void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
-+{
-+      struct request          *rq = SCpnt->request;
-+      iostat_stats_t          *stats;
-+      unsigned long           *tcounter;
-+      int                     tbucket;
-+      int                     tmp;
-+      unsigned long           irqflags;
-+      unsigned long           i;
-+
-+      stats = scsi_disk(rq->rq_disk)->stats;
-+      if (stats == NULL)
-+              return;
-+
-+      tmp = jiffies - rq->start_time;
-+      for (tbucket = 0; tmp > 1; tbucket++)
-+              tmp >>= 1;
-+      if (tbucket >= IOSTAT_NCOUNTERS)
-+              tbucket = IOSTAT_NCOUNTERS - 1;
-+      //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
-+
-+      tcounter = rq_data_dir(rq) == WRITE ?
-+              &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
-+
-+      spin_lock_irqsave(&stats->iostat_lock, irqflags);
-+
-+      /* update delay stats */
-+      (*tcounter)++;
-+
-+      /* update queue depth stats */
-+      i = stats->iostat_queue_depth;
-+      if (i >= IOSTAT_NCOUNTERS)
-+              i = IOSTAT_NCOUNTERS - 1;
-+      stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
-+      stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
-+      BUG_ON(stats->iostat_queue_depth == 0);
-+      stats->iostat_queue_depth--;
-+
-+      /* update seek stats. XXX: not sure about nr_sectors */
-+      stats->iostat_sectors += rq->nr_sectors;
-+      stats->iostat_reqs++;
-+      if (rq->sector != stats->iostat_next_sector) {
-+              stats->iostat_seek_sectors +=
-+                      rq->sector > stats->iostat_next_sector ?
-+                      rq->sector - stats->iostat_next_sector :
-+                      stats->iostat_next_sector - rq->sector;
-+              stats->iostat_seeks++;
-+      }
-+      stats->iostat_next_sector = rq->sector + rq->nr_sectors;
-+
-+      stats->iostat_queue_stamp = jiffies;
-+
-+      spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
-+}
-+
-+void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
-+{
-+      struct request          *rq = SCpnt->request;
-+      iostat_stats_t          *stats;
-+      iostat_counter_t        *counter;
-+      int                     bucket;
-+      int                     tbucket;
-+      int                     tmp;
-+      unsigned long           irqflags;
-+      unsigned long           i;
-+      int                     nsect;
-+
-+      stats = scsi_disk(rq->rq_disk)->stats;
-+      if (stats == NULL)
-+              return;
-+
-+      nsect = SCpnt->request_bufflen >> 9;
-+      for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
-+              tmp >>= 1;
-+
-+      if (bucket >= IOSTAT_NCOUNTERS) {
-+              printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
-+              BUG();
-+      }
-+
-+      counter = rq_data_dir(rq) == WRITE ?
-+              &stats->iostat_write_histogram[bucket] :
-+              &stats->iostat_read_histogram[bucket];
-+
-+      tmp = jiffies - rq->start_time;
-+      for (tbucket = 0; tmp > 1; tbucket++)
-+              tmp >>= 1;
-+      if (tbucket >= IOSTAT_NCOUNTERS)
-+              tbucket = IOSTAT_NCOUNTERS - 1;
-+      //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
-+
-+      /* an ugly hack to know exact processing time. the right
-+       * solution is to add one more field to struct request
-+       * hopefully it will break nothing ... */
-+      rq->start_time = jiffies;
-+
-+      spin_lock_irqsave(&stats->iostat_lock, irqflags);
-+
-+      /* update queue depth stats */
-+      i = stats->iostat_queue_depth;
-+      if (i >= IOSTAT_NCOUNTERS)
-+              i = IOSTAT_NCOUNTERS - 1;
-+      stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
-+      stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
-+      stats->iostat_queue_depth++;
-+
-+      /* update delay stats */
-+      if (rq_data_dir(rq) == WRITE) {
-+              stats->iostat_wtime_in_queue[tbucket]++;
-+              stats->iostat_write_reqs++;
-+      } else {
-+              stats->iostat_rtime_in_queue[tbucket]++;
-+              stats->iostat_read_reqs++;
-+      }
-+
-+      /* update size stats */
-+      counter->iostat_size += nsect;
-+      counter->iostat_count++;
-+
-+      stats->iostat_queue_stamp = jiffies;
-+
-+      spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
-+}
-+#endif
-+
- /**
-  *    scsi_disk_release - Called to free the scsi_disk structure
-  *    @cdev: pointer to embedded class device
-@@ -1774,10 +2227,16 @@
-       idr_remove(&sd_index_idr, sdkp->index);
-       spin_unlock(&sd_index_lock);
-+#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
-+      if (sdkp->stats) {
-+              remove_proc_entry(disk->disk_name, sd_iostats_procdir);
-+              kfree(sdkp->stats);
-+              sdkp->stats = NULL;
-+      }
-+#endif
-       disk->private_data = NULL;
-       put_disk(disk);
-       put_device(&sdkp->device->sdev_gendev);
--
-       kfree(sdkp);
- }
-@@ -1844,6 +2303,7 @@
- static int __init init_sd(void)
- {
-       int majors = 0, i;
-+      int rc = 0;
-       SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
-@@ -1854,9 +2314,13 @@
-       if (!majors)
-               return -ENODEV;
-+      sd_iostats_init();
-       class_register(&sd_disk_class);
--      return scsi_register_driver(&sd_template.gendrv);
-+      rc = scsi_register_driver(&sd_template.gendrv);
-+      if (rc)
-+              sd_iostats_fini();
-+      return rc;
- }
- /**
-@@ -1875,6 +2339,7 @@
-               unregister_blkdev(sd_major(i), "sd");
-       class_unregister(&sd_disk_class);
-+      sd_iostats_fini();
- }
- module_init(init_sd);
diff --git a/lustre/kernel_patches/patches/small-fixes-about-jbd.patch b/lustre/kernel_patches/patches/small-fixes-about-jbd.patch
deleted file mode 100644 (file)
index d39a174..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-diff -pur linux-2.6.18-128.orig/fs/jbd/commit.c linux-2.6.18-128/fs/jbd/commit.c
---- linux-2.6.18-128.orig/fs/jbd/commit.c      2009-04-10 16:31:40.000000000 +0800
-+++ linux-2.6.18-128/fs/jbd/commit.c   2009-04-10 16:33:14.000000000 +0800
-@@ -862,7 +862,8 @@ wait_for_iobuf:
-               if (err)
-                       __journal_abort_hard(journal);
-       }
--      err = journal_wait_on_commit_record(cbh);
-+      if (!err && !is_journal_aborted(journal))
-+              err = journal_wait_on_commit_record(cbh);
-       if (err)
-               journal_abort(journal, err);
diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series
deleted file mode 100644 (file)
index e3803f7..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-lustre_version.patch
-jbd-jcberr-2.6.18-vanilla.patch
-export_symbols-2.6.12.patch
-dev_read_only-2.6.18-vanilla.patch
-export-2.6.18-vanilla.patch
-sd_iostats-2.6-rhel5.patch
-export_symbol_numa-2.6-fc5.patch
-blkdev_tunables-2.6-rhel5.patch
-jbd-stats-2.6-rhel5.patch
-raid5-stats-rhel5.patch
-raid5-configurable-cachesize-rhel5.patch
-raid5-large-io-rhel5.patch
-raid5-stripe-by-stripe-handling-rhel5.patch
-raid5-merge-ios-rhel5.patch
-raid5-zerocopy-rhel5.patch
-raid5-maxsectors-rhel5.patch
-raid5-rebuild-corrupt-bug.patch
-md-rebuild-policy.patch
-jbd-journal-chksum-2.6.18-vanilla.patch
-quota-large-limits-rhel5.patch
-raid5-mmp-unplug-dev.patch
-small-fixes-about-jbd.patch
-mpt-fusion-max-sge.patch
-prune-icache-use-trylock-rhel5.patch
-jbd2-jcberr-2.6-rhel5.patch
-jbd2-commit-timer-no-jiffies-rounding.diff
-md-avoid-bug_on-when-bmc-overflow.patch
-jbd2_stats_proc_init-wrong-place.patch
-lustre_iser_max_sectors_tuning_lustre2.0.patch
-fix-forever-in-do_get_write_access.patch
index e5534ea..c1bcb70 100644 (file)
@@ -1,7 +1,6 @@
 SERIES                VERSION                  COMMENT
 
 SUPPORTED KERNELS:
-2.6-rhel5             RHEL5: 2.6.18-238.19.1.el5
 2.6-rhel6             RHEL6: 2.6.32-279.14.1.el6
 
 CLIENT SUPPORT FOR UNPATCHED KERNELS: