Whamcloud - gitweb
LU-73 RHEL6 support.
authorBobi Jam <bobijam@whamcloud.com>
Tue, 15 Mar 2011 01:19:12 +0000 (09:19 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 24 Mar 2011 15:40:31 +0000 (08:40 -0700)
Include client, ldiskfs, kernel patches.

Change-Id: Ice16b8bf40c2e37df9af9f399316917097e8ee8f
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/307
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Brian J. Murrell <brian@whamcloud.com>
43 files changed:
ldiskfs/configure.ac
ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-dynlocks-common-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-extents-mount-option-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-force_over_16tb-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-inode-version-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-kill-dx_root-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-map_inode_page-2.6-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-misc-rhel5.patch
ldiskfs/kernel_patches/patches/ext4-misc-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-mmp-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-osd-iam-exports-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-osd-iop-common-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-pdir-fix-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-prealloc-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4_data_in_dirent-rhel6.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series [new file with mode: 0644]
ldiskfs/ldiskfs/Makefile.in
ldiskfs/ldiskfs/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/include/linux/lustre_compat25.h
lustre/include/lustre_disk.h
lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/dev_read_only-2.6.32-rhel6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/export-2.6.32-vanilla.patch [new file with mode: 0644]
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/mpt-fusion-max-sge-rhel6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/raid5-mmp-unplug-dev-rhel6.patch [new file with mode: 0644]
lustre/kernel_patches/series/2.6-rhel6.series [new file with mode: 0644]
lustre/ldlm/ldlm_pool.c
lustre/llite/llite_lib.c
lustre/lvfs/fsfilt_ext3.c
lustre/obdclass/lu_object.c
lustre/ptlrpc/sec_bulk.c

index 959bc11..adb68fa 100644 (file)
@@ -136,6 +136,7 @@ case $LINUXRELEASE in
        ;;
 2.6.22*) LDISKFS_SERIES="2.6.22-vanilla.series";;
 2.6.27*) LDISKFS_SERIES="2.6-sles11.series";;
        ;;
 2.6.22*) LDISKFS_SERIES="2.6.22-vanilla.series";;
 2.6.27*) LDISKFS_SERIES="2.6-sles11.series";;
+2.6.32*) LDISKFS_SERIES="2.6-rhel6.series";;
 *) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix ldiskfs/configure.ac])
 esac
 AC_MSG_RESULT([$LDISKFS_SERIES])
 *) AC_MSG_WARN([Unknown kernel version $LINUXRELEASE, fix ldiskfs/configure.ac])
 esac
 AC_MSG_RESULT([$LDISKFS_SERIES])
diff --git a/ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel6.patch b/ldiskfs/kernel_patches/patches/export-ext4-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..c3411d2
--- /dev/null
@@ -0,0 +1,81 @@
+Index: linux-2.6.32.i386/fs/ext4/super.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/super.c     2010-04-07 14:18:32.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/super.c  2010-04-07 14:19:47.000000000 +0530
+@@ -291,6 +291,8 @@
+       jbd2_journal_abort_handle(handle);
+ }
++EXPORT_SYMBOL(ext4_journal_abort_handle);
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -3030,6 +3032,8 @@
+       return ret;
+ }
++EXPORT_SYMBOL(ext4_force_commit);
++
+ /*
+  * Setup any per-fs journal parameters now.  We'll do this both on
+  * initial mount, once the journal has been initialised but before we've
+@@ -4088,6 +4092,12 @@
+                       unsigned long *blocks, int *created, int create);
+ EXPORT_SYMBOL(ext4_map_inode_page);
++EXPORT_SYMBOL(ext4_xattr_get);
++EXPORT_SYMBOL(ext4_xattr_set_handle);
++EXPORT_SYMBOL(ext4_bread);
++EXPORT_SYMBOL(ext4_journal_start_sb);
++EXPORT_SYMBOL(__ext4_journal_stop);
++
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Fourth Extended Filesystem");
+ MODULE_LICENSE("GPL");
+Index: linux-2.6.32.i386/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/ext4.h      2010-04-07 14:17:04.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/ext4.h   2010-04-07 14:20:34.000000000 +0530
+@@ -1385,6 +1385,8 @@
+                                      struct buffer_head *bh,
+                                      ext4_group_t group,
+                                      struct ext4_group_desc *desc);
++extern struct buffer_head *ext4_read_inode_bitmap(struct super_block *sb,
++                                                ext4_group_t block_group);
+ extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+ /* mballoc.c */
+Index: linux-2.6.32.i386/fs/ext4/ialloc.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/ialloc.c    2009-12-03 09:21:21.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/ialloc.c 2010-04-07 14:19:47.000000000 +0530
+@@ -98,7 +98,7 @@
+  *
+  * Return buffer_head of bitmap on success or NULL.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ {
+       struct ext4_group_desc *desc;
+@@ -161,6 +161,7 @@
+       }
+       return bh;
+ }
++EXPORT_SYMBOL(ext4_read_inode_bitmap);
+ /*
+  * NOTE! When we get the inode, we're the only people
+Index: linux-2.6.32.i386/fs/ext4/balloc.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/balloc.c    2010-03-19 15:43:37.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/balloc.c 2010-04-07 14:19:47.000000000 +0530
+@@ -235,6 +235,7 @@
+               *bh = sbi->s_group_desc[group_desc];
+       return desc;
+ }
++EXPORT_SYMBOL(ext4_get_group_desc);
+ static int ext4_valid_block_bitmap(struct super_block *sb,
+                                       struct ext4_group_desc *desc,
diff --git a/ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-back-dquot-to-rhel6.patch
new file mode 100644 (file)
index 0000000..bf8826c
--- /dev/null
@@ -0,0 +1,54 @@
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-11 15:46:27.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-11 15:53:05.016701579 +0800
+@@ -1400,9 +1400,47 @@
+ static ssize_t ext4_quota_write(struct super_block *sb, int type,
+                               const char *data, size_t len, loff_t off);
++static int ext4_dquot_initialize(struct inode *inode, int type)
++{
++      handle_t *handle;
++      int ret, err;
++
++      /* We may create quota structure so we need to reserve enough blocks */
++      handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++      ret = dquot_initialize(inode, type);
++      err = ext4_journal_stop(handle);
++      if (!ret)
++              ret = err;
++      return ret;
++}
++
++static int ext4_dquot_drop(struct inode *inode)
++{
++      handle_t *handle;
++      int ret, err;
++
++      /* We may delete quota structure so we need to reserve enough blocks */
++      handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
++      if (IS_ERR(handle)) {
++              /*
++               * We call dquot_drop() anyway to at least release references
++               * to quota structures so that umount does not hang.
++               */
++              dquot_drop(inode);
++              return PTR_ERR(handle);
++      }
++      ret = dquot_drop(inode);
++      err = ext4_journal_stop(handle);
++      if (!ret)
++              ret = err;
++      return ret;
++}
++
+ static const struct dquot_operations ext4_quota_operations = {
+-      .initialize     = dquot_initialize,
+-      .drop           = dquot_drop,
++      .initialize     = ext4_dquot_initialize,
++      .drop           = ext4_dquot_drop,
+       .alloc_space    = dquot_alloc_space,
+       .reserve_space  = dquot_reserve_space,
+       .claim_space    = dquot_claim_space,
diff --git a/ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-big-endian-check-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..6a1ef25
--- /dev/null
@@ -0,0 +1,57 @@
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-11 15:27:08.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-11 15:29:41.023089829 +0800
+@@ -72,6 +72,8 @@
+ static int ext4_freeze(struct super_block *sb);
++static int bigendian_extents;
++
+ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+                              struct ext4_group_desc *bg)
+ {
+@@ -1492,7 +1494,7 @@
+       Opt_block_validity, Opt_noblock_validity,
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
+-      Opt_mballoc,
++      Opt_mballoc, Opt_bigendian_extents,
+ };
+ static const match_table_t tokens = {
+@@ -1559,6 +1561,7 @@
+       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
++      {Opt_bigendian_extents, "bigendian_extents"},
+       {Opt_mballoc, "mballoc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
+@@ -1996,6 +1999,9 @@
+                       break;
+               case Opt_mballoc:
+                       break;
++              case Opt_bigendian_extents:
++                      bigendian_extents = 1;
++                      break;
+               case Opt_discard:
+                       set_opt(sbi->s_mount_opt, DISCARD);
+                       break;
+@@ -3073,6 +3079,16 @@
+               goto failed_mount;
+       }
++#ifdef __BIG_ENDIAN
++      if (bigendian_extents == 0) {
++              printk(KERN_ERR "EXT4-fs: extents feature is not guaranteed to "
++                     "work on big-endian systems. Use \"bigendian_extents\" "
++                     "mount option to override.\n");
++              goto failed_mount;
++      }
++#endif
++
++
+ #ifdef CONFIG_PROC_FS
+       if (ext4_proc_root)
+               sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
diff --git a/ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-disable-mb-cache-rhel6.patch
new file mode 100644 (file)
index 0000000..8c98c62
--- /dev/null
@@ -0,0 +1,154 @@
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-14 16:16:45.000000000 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-14 16:17:08.732676431 +0800
+@@ -758,7 +758,8 @@
+ /*
+  * Mount flags
+  */
+-#define EXT4_MOUNT_OLDALLOC           0x00002  /* Don't use the new Orlov allocator */
++#define EXT4_MOUNT_NO_MBCACHE         0x00001 /* Disable mbcache */
++#define EXT4_MOUNT_OLDALLOC           0x00002 /* Don't use the new Orlov allocator */
+ #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
+ #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
+ #define EXT4_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-14 16:16:45.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-14 16:18:13.831956469 +0800
+@@ -1502,6 +1502,7 @@
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
+       Opt_mballoc, Opt_bigendian_extents, Opt_force_over_16tb,
++      Opt_no_mbcache,
+       Opt_extents, Opt_noextents,
+ };
+@@ -1574,6 +1575,7 @@
+       {Opt_mballoc, "mballoc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
++      {Opt_no_mbcache, "no_mbcache"},
+       {Opt_extents, "extents"},
+       {Opt_noextents, "noextents"},
+       {Opt_err, NULL},
+@@ -2049,6 +2051,9 @@
+                       }
+                       clear_opt(sbi->s_mount_opt, EXTENTS);
+                       break;
++              case Opt_no_mbcache:
++                      set_opt(sbi->s_mount_opt, NO_MBCACHE);
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+Index: linux-stage/fs/ext4/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext4/xattr.c   2011-03-14 16:16:43.000000000 +0800
++++ linux-stage/fs/ext4/xattr.c        2011-03-14 16:17:08.806677883 +0800
+@@ -86,7 +86,8 @@
+ # define ea_bdebug(f...)
+ #endif
+-static void ext4_xattr_cache_insert(struct buffer_head *);
++static void ext4_xattr_cache_insert(struct super_block *,
++                                  struct buffer_head *);
+ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
+                                                struct ext4_xattr_header *,
+                                                struct mb_cache_entry **);
+@@ -234,7 +235,7 @@
+               error = -EIO;
+               goto cleanup;
+       }
+-      ext4_xattr_cache_insert(bh);
++      ext4_xattr_cache_insert(inode->i_sb, bh);
+       entry = BFIRST(bh);
+       error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+       if (error == -EIO)
+@@ -376,7 +377,7 @@
+               error = -EIO;
+               goto cleanup;
+       }
+-      ext4_xattr_cache_insert(bh);
++      ext4_xattr_cache_insert(inode->i_sb, bh);
+       error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+ cleanup:
+@@ -473,7 +474,9 @@
+       struct mb_cache_entry *ce = NULL;
+       int error = 0;
+-      ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
++      if (!test_opt(inode->i_sb, NO_MBCACHE))
++              ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev,
++                                      bh->b_blocknr);
+       error = ext4_journal_get_write_access(handle, bh);
+       if (error)
+               goto out;
+@@ -700,8 +703,10 @@
+       if (i->value && i->value_len > sb->s_blocksize)
+               return -ENOSPC;
+       if (s->base) {
+-              ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
+-                                      bs->bh->b_blocknr);
++              if (!test_opt(inode->i_sb, NO_MBCACHE))
++                      ce = mb_cache_entry_get(ext4_xattr_cache,
++                                              bs->bh->b_bdev,
++                                              bs->bh->b_blocknr);
+               error = ext4_journal_get_write_access(handle, bs->bh);
+               if (error)
+                       goto cleanup;
+@@ -718,7 +723,7 @@
+                               if (!IS_LAST_ENTRY(s->first))
+                                       ext4_xattr_rehash(header(s->base),
+                                                         s->here);
+-                              ext4_xattr_cache_insert(bs->bh);
++                              ext4_xattr_cache_insert(sb, bs->bh);
+                       }
+                       unlock_buffer(bs->bh);
+                       if (error == -EIO)
+@@ -801,7 +806,8 @@
+                               if (error)
+                                       goto cleanup_dquot;
+                       }
+-                      mb_cache_entry_release(ce);
++                      if (ce)
++                              mb_cache_entry_release(ce);
+                       ce = NULL;
+               } else if (bs->bh && s->base == bs->bh->b_data) {
+                       /* We were modifying this block in-place. */
+@@ -845,7 +851,7 @@
+                       memcpy(new_bh->b_data, s->base, new_bh->b_size);
+                       set_buffer_uptodate(new_bh);
+                       unlock_buffer(new_bh);
+-                      ext4_xattr_cache_insert(new_bh);
++                      ext4_xattr_cache_insert(sb, new_bh);
+                       error = ext4_handle_dirty_metadata(handle,
+                                                          inode, new_bh);
+                       if (error)
+@@ -1403,12 +1409,15 @@
+  * Returns 0, or a negative error number on failure.
+  */
+ static void
+-ext4_xattr_cache_insert(struct buffer_head *bh)
++ext4_xattr_cache_insert(struct super_block *sb, struct buffer_head *bh)
+ {
+       __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+       struct mb_cache_entry *ce;
+       int error;
++      if (test_opt(sb, NO_MBCACHE))
++              return;
++
+       ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS);
+       if (!ce) {
+               ea_bdebug(bh, "out of memory");
+@@ -1482,6 +1491,8 @@
+       __u32 hash = le32_to_cpu(header->h_hash);
+       struct mb_cache_entry *ce;
++      if (test_opt(inode->i_sb, NO_MBCACHE))
++              return NULL;
+       if (!header->h_hash)
+               return NULL;  /* never share */
+       ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
diff --git a/ldiskfs/kernel_patches/patches/ext4-dynlocks-common-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-dynlocks-common-rhel6.patch
new file mode 100644 (file)
index 0000000..0a66c86
--- /dev/null
@@ -0,0 +1,352 @@
+Index: linux-stage/fs/ext4/dynlocks.c
+===================================================================
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ linux-stage/fs/ext4/dynlocks.c     2011-03-03 15:25:04.025526781 +0800
+@@ -0,0 +1,236 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++#define DYNLOCK_HANDLE_MAGIC  0xd19a10c
++#define DYNLOCK_HANDLE_DEAD   0xd1956ee
++#define DYNLOCK_LIST_MAGIC    0x11ee91e6
++
++static struct kmem_cache * dynlock_cachep = NULL;
++
++struct dynlock_handle {
++      unsigned                dh_magic;
++      struct list_head        dh_list;
++      unsigned long           dh_value;       /* lock value */
++      int                     dh_refcount;    /* number of users */
++      int                     dh_readers;
++      int                     dh_writers;
++      int                     dh_pid;         /* holder of the lock */
++      wait_queue_head_t       dh_wait;
++};
++
++int __init dynlock_cache_init(void)
++{
++      int rc = 0;
++
++      printk(KERN_INFO "init dynlocks cache\n");
++      dynlock_cachep = kmem_cache_create("dynlock_cache",
++                                       sizeof(struct dynlock_handle),
++                                       0,
++                                       SLAB_HWCACHE_ALIGN,
++                                       NULL);
++      if (dynlock_cachep == NULL) {
++              printk(KERN_ERR "Not able to create dynlock cache");
++              rc = -ENOMEM;
++      }
++      return rc;
++}
++
++void __exit dynlock_cache_exit(void)
++{
++      printk(KERN_INFO "exit dynlocks cache\n");
++      kmem_cache_destroy(dynlock_cachep);
++}
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++      spin_lock_init(&dl->dl_list_lock);
++      INIT_LIST_HEAD(&dl->dl_list);
++      dl->dl_magic = DYNLOCK_LIST_MAGIC;
++}
++EXPORT_SYMBOL(dynlock_init);
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value,
++                                  enum dynlock_type lt, gfp_t gfp)
++{
++      struct dynlock_handle *nhl = NULL;
++      struct dynlock_handle *hl;
++
++      BUG_ON(dl == NULL);
++      BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC);
++
++repeat:
++      /* find requested lock in lockspace */
++      spin_lock(&dl->dl_list_lock);
++      BUG_ON(dl->dl_list.next == NULL);
++      BUG_ON(dl->dl_list.prev == NULL);
++      list_for_each_entry(hl, &dl->dl_list, dh_list) {
++              BUG_ON(hl->dh_list.next == NULL);
++              BUG_ON(hl->dh_list.prev == NULL);
++              BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
++              if (hl->dh_value == value) {
++                      /* lock is found */
++                      if (nhl) {
++                              /* someone else just allocated
++                               * lock we didn't find and just created
++                               * so, we drop our lock
++                               */
++                              kmem_cache_free(dynlock_cachep, nhl);
++                              nhl = NULL;
++                      }
++                      hl->dh_refcount++;
++                      goto found;
++              }
++      }
++      /* lock not found */
++      if (nhl) {
++              /* we already have allocated lock. use it */
++              hl = nhl;
++              nhl = NULL;
++              list_add(&hl->dh_list, &dl->dl_list);
++              goto found;
++      }
++      spin_unlock(&dl->dl_list_lock);
++      
++      /* lock not found and we haven't allocated lock yet. allocate it */
++      nhl = kmem_cache_alloc(dynlock_cachep, gfp);
++      if (nhl == NULL)
++              return NULL;
++      nhl->dh_refcount = 1;
++      nhl->dh_value = value;
++      nhl->dh_readers = 0;
++      nhl->dh_writers = 0;
++      nhl->dh_magic = DYNLOCK_HANDLE_MAGIC;
++      init_waitqueue_head(&nhl->dh_wait);
++
++      /* while lock is being allocated, someone else may allocate it
++       * and put onto to list. check this situation
++       */
++      goto repeat;
++
++found:
++      if (lt == DLT_WRITE) {
++              /* exclusive lock: user don't want to share lock at all
++               * NOTE: one process may take the same lock several times
++               * this functionaly is useful for rename operations */
++              while ((hl->dh_writers && hl->dh_pid != current->pid) ||
++                              hl->dh_readers) {
++                      spin_unlock(&dl->dl_list_lock);
++                      wait_event(hl->dh_wait,
++                              hl->dh_writers == 0 && hl->dh_readers == 0);
++                      spin_lock(&dl->dl_list_lock);
++              }
++              hl->dh_writers++;
++      } else {
++              /* shared lock: user do not want to share lock with writer */
++              while (hl->dh_writers) {
++                      spin_unlock(&dl->dl_list_lock);
++                      wait_event(hl->dh_wait, hl->dh_writers == 0);
++                      spin_lock(&dl->dl_list_lock);
++              }
++              hl->dh_readers++;
++      }
++      hl->dh_pid = current->pid;
++      spin_unlock(&dl->dl_list_lock);
++
++      return hl;
++}
++EXPORT_SYMBOL(dynlock_lock);
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl)
++{
++      int wakeup = 0;
++      
++      BUG_ON(dl == NULL);
++      BUG_ON(hl == NULL);
++      BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC);
++
++      if (hl->dh_magic != DYNLOCK_HANDLE_MAGIC)
++              printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dh_magic);
++
++      BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
++      BUG_ON(hl->dh_writers != 0 && current->pid != hl->dh_pid);
++
++      spin_lock(&dl->dl_list_lock);
++      if (hl->dh_writers) {
++              BUG_ON(hl->dh_readers != 0);
++              hl->dh_writers--;
++              if (hl->dh_writers == 0)
++                      wakeup = 1;
++      } else if (hl->dh_readers) {
++              hl->dh_readers--;
++              if (hl->dh_readers == 0)
++                      wakeup = 1;
++      } else {
++              BUG();
++      }
++      if (wakeup) {
++              hl->dh_pid = 0;
++              wake_up(&hl->dh_wait);
++      }
++      if (--(hl->dh_refcount) == 0) {
++              hl->dh_magic = DYNLOCK_HANDLE_DEAD;
++              list_del(&hl->dh_list);
++              kmem_cache_free(dynlock_cachep, hl);
++      }
++      spin_unlock(&dl->dl_list_lock);
++}
++EXPORT_SYMBOL(dynlock_unlock);
++
++int dynlock_is_locked(struct dynlock *dl, unsigned long value)
++{
++      struct dynlock_handle *hl;
++      int result = 0;
++
++      /* find requested lock in lockspace */
++      spin_lock(&dl->dl_list_lock);
++      BUG_ON(dl->dl_list.next == NULL);
++      BUG_ON(dl->dl_list.prev == NULL);
++      list_for_each_entry(hl, &dl->dl_list, dh_list) {
++              BUG_ON(hl->dh_list.next == NULL);
++              BUG_ON(hl->dh_list.prev == NULL);
++              BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC);
++              if (hl->dh_value == value && hl->dh_pid == current->pid) {
++                      /* lock is found */
++                      result = 1;
++                      break;
++              }
++      }
++      spin_unlock(&dl->dl_list_lock);
++      return result;
++}
++EXPORT_SYMBOL(dynlock_is_locked);
+Index: linux-stage/include/linux/dynlocks.h
+===================================================================
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ linux-stage/include/linux/dynlocks.h       2011-03-03 15:25:04.055526552 +0800
+@@ -0,0 +1,34 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_handle;
++
++/*
++ * lock's namespace:
++ *   - list of locks
++ *   - lock to protect this list
++ */
++struct dynlock {
++      unsigned                dl_magic;
++      struct list_head        dl_list;
++      spinlock_t              dl_list_lock;
++};
++
++enum dynlock_type {
++      DLT_WRITE,
++      DLT_READ
++};
++
++int dynlock_cache_init(void);
++void dynlock_cache_exit(void);
++void dynlock_init(struct dynlock *dl);
++struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value,
++                                  enum dynlock_type lt, gfp_t gfp);
++void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock);
++int dynlock_is_locked(struct dynlock *dl, unsigned long value);
++
++#endif
++
+Index: linux-stage/fs/ext4/Makefile
+===================================================================
+--- linux-stage.orig/fs/ext4/Makefile  2011-03-05 11:50:43.000000000 +0800
++++ linux-stage/fs/ext4/Makefile       2011-03-05 11:52:42.349154982 +0800
+@@ -6,7 +6,8 @@
+ ext4-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+               ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
+-              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
++              ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
++              dynlocks.o
+ ext4-$(CONFIG_EXT4_FS_XATTR)          += xattr.o xattr_user.o xattr_trusted.o
+ ext4-$(CONFIG_EXT4_FS_POSIX_ACL)      += acl.o
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-05 11:50:43.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-05 11:57:33.632869451 +0800
+@@ -4457,17 +4457,20 @@
+               return err;
+       ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
+       if (!ext4_kset)
+-              goto out4;
++              goto out5;
+       ext4_proc_root = proc_mkdir("fs/ext4", NULL);
+       err = init_ext4_mballoc();
+       if (err)
+-              goto out3;
++              goto out4;
+       err = init_ext4_xattr();
+       if (err)
+-              goto out2;
++              goto out3;
+       err = init_inodecache();
+       if (err)
++              goto out2;
++      err = dynlock_cache_init();
++      if (err)
+               goto out1;
+       err = register_filesystem(&ext4_fs_type);
+       if (err)
+@@ -4477,15 +4480,17 @@
+       return 0;
+ out:
+-      destroy_inodecache();
++      dynlock_cache_exit();
+ out1:
+-      exit_ext4_xattr();
++      destroy_inodecache();
+ out2:
+-      exit_ext4_mballoc();
++      exit_ext4_xattr();
+ out3:
++      exit_ext4_mballoc();
++out4:
+       remove_proc_entry("fs/ext4", NULL);
+       kset_unregister(ext4_kset);
+-out4:
++out5:
+       exit_ext4_system_zone();
+       return err;
+ }
+@@ -4493,6 +4498,7 @@
+ static void __exit exit_ext4_fs(void)
+ {
+       unregister_filesystem(&ext4_fs_type);
++      dynlock_cache_exit();
+       destroy_inodecache();
+       exit_ext4_xattr();
+       exit_ext4_mballoc();
diff --git a/ldiskfs/kernel_patches/patches/ext4-extents-mount-option-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-extents-mount-option-rhel6.patch
new file mode 100644 (file)
index 0000000..c4cc531
--- /dev/null
@@ -0,0 +1,174 @@
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-14 15:57:13.613674482 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-14 15:57:22.031906980 +0800
+@@ -780,6 +780,7 @@
+ #define EXT4_MOUNT_QUOTA              0x80000 /* Some quota option set */
+ #define EXT4_MOUNT_USRQUOTA           0x100000 /* "old" user quota */
+ #define EXT4_MOUNT_GRPQUOTA           0x200000 /* "old" group quota */
++#define EXT4_MOUNT_EXTENTS            0x400000 /* Extents support */
+ #define EXT4_MOUNT_JOURNAL_CHECKSUM   0x800000 /* Journal checksums */
+ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT       0x1000000 /* Journal Async Commit */
+ #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h       2011-03-14 15:57:12.000000000 +0800
++++ linux-stage/fs/ext4/ext4_jbd2.h    2011-03-14 15:58:55.957499110 +0800
+@@ -33,7 +33,7 @@
+ #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb)                              \
+       (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
+-       ? 27U : 8U)
++       || test_opt(sb, EXTENTS) ? 27U : 8U)
+ #define ext4_journal_dirty_metadata(handle, bh)  \
+                 ext4_handle_dirty_metadata(handle, NULL, bh)
+Index: linux-stage/fs/ext4/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext4/extents.c 2011-03-14 15:57:12.000000000 +0800
++++ linux-stage/fs/ext4/extents.c      2011-03-14 16:14:14.246265207 +0800
+@@ -2553,7 +2553,7 @@
+        * possible initialization would be here
+        */
+-      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
++      if (test_opt(sb, EXTENTS)) {
+ #if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
+               printk(KERN_INFO "EXT4-fs: file extents enabled");
+ #ifdef AGGRESSIVE_TEST
+@@ -2580,7 +2580,7 @@
+  */
+ void ext4_ext_release(struct super_block *sb)
+ {
+-      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
++      if (!test_opt(sb, EXTENTS))
+               return;
+ #ifdef EXTENTS_STATS
+Index: linux-stage/fs/ext4/ialloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ialloc.c  2011-03-14 15:57:13.000000000 +0800
++++ linux-stage/fs/ext4/ialloc.c       2011-03-14 16:02:03.334308846 +0800
+@@ -1049,7 +1049,7 @@
+       if (err)
+               goto fail_free_drop;
+-      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
++      if (test_opt(sb, EXTENTS)) {
+               /* set extent flag only for directory, file and normal symlink*/
+               if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
+                       EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+Index: linux-stage/fs/ext4/migrate.c
+===================================================================
+--- linux-stage.orig/fs/ext4/migrate.c 2011-03-14 15:36:15.000000000 +0800
++++ linux-stage/fs/ext4/migrate.c      2011-03-14 16:05:39.083369164 +0800
+@@ -459,13 +459,13 @@
+       unsigned long max_entries;
+       __u32 goal;
+-      /*
+-       * If the filesystem does not support extents, or the inode
+-       * already is extent-based, error out.
+-       */
+-      if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
+-                                     EXT4_FEATURE_INCOMPAT_EXTENTS) ||
+-          (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
++      if (!test_opt(inode->i_sb, EXTENTS))
++              /*
++               * if mounted with noextents we don't allow the migrate
++               */
++              return -EINVAL;
++
++      if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+               return -EINVAL;
+       if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-14 15:57:18.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-14 16:11:58.234626200 +0800
+@@ -942,6 +942,8 @@
+               seq_puts(seq, ",journal_async_commit");
+       if (test_opt(sb, NOBH))
+               seq_puts(seq, ",nobh");
++      if (!test_opt(sb, EXTENTS))
++              seq_puts(seq, ",noextents");
+       if (test_opt(sb, I_VERSION))
+               seq_puts(seq, ",i_version");
+       if (!test_opt(sb, DELALLOC))
+@@ -1500,6 +1502,7 @@
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
+       Opt_mballoc, Opt_bigendian_extents, Opt_force_over_16tb,
++      Opt_extents, Opt_noextents,
+ };
+ static const match_table_t tokens = {
+@@ -1571,6 +1574,8 @@
+       {Opt_mballoc, "mballoc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
++      {Opt_extents, "extents"},
++      {Opt_noextents, "noextents"},
+       {Opt_err, NULL},
+ };
+@@ -1613,6 +1618,7 @@
+       int qtype, qfmt;
+       char *qname;
+ #endif
++      ext4_fsblk_t last_block;
+       if (!options)
+               return 1;
+@@ -2017,6 +2023,32 @@
+               case Opt_force_over_16tb:
+                       force_over_16tb = 1;
+                       break;
++              case Opt_extents:
++                      if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
++                                      EXT4_FEATURE_INCOMPAT_EXTENTS)) {
++                              ext4_warning(sb, "extents feature not enabled "
++                                               "on this filesystem, use tune2fs");
++                              return 0;
++                      }
++                      set_opt(sbi->s_mount_opt, EXTENTS);
++                      break;
++              case Opt_noextents:
++                      /*
++                       * When e2fsprogs support resizing an already existing
++                       * ext4 file system to greater than 2**32 we need to
++                       * add support to block allocator to handle growing
++                       * already existing block  mapped inode so that blocks
++                       * allocated for them fall within 2**32
++                       */
++                      last_block = ext4_blocks_count(sbi->s_es) - 1;
++                      if (last_block  > 0xffffffffULL) {
++                              printk(KERN_ERR "EXT4-fs: Filesystem too "
++                                              "large to mount with "
++                                              "-o noextents options\n");
++                              return 0;
++                      }
++                      clear_opt(sbi->s_mount_opt, EXTENTS);
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+@@ -2879,6 +2911,17 @@
+       set_opt(sbi->s_mount_opt, BARRIER);
+       /*
++       * turn on extents feature by default in ext4 filesystem
++       * only if feature flag already set by mkfs or tune2fs.
++       * Use -o noextents to turn it off
++       */
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
++              set_opt(sbi->s_mount_opt, EXTENTS);
++      else
++              ext4_warning(sb, "extents feature not enabled on this filesystem, "
++                               "use tune2fs.");
++
++      /*
+        * enable delayed allocation by default
+        * Use -o nodelalloc to turn it off
+        */
diff --git a/ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-fiemap-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..fb4690b
--- /dev/null
@@ -0,0 +1,111 @@
+This patch adds direct EXT4_IOC_FIEMAP support to ldiskfs, for Lustre to call
+without having to go through do_vfs_ioctl() (which isn't exported, and has a
+number of other ioctls which are not suitable for Lustre). The actual FIEMAP
+support is already in the kernel/ext4 for normal usage.
+
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-05 12:34:16.458850451 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-05 12:35:25.338882364 +0800
+@@ -405,7 +405,7 @@
+ #define EXT4_IOC_GROUP_ADD            _IOW('f', 8, struct ext4_new_group_input)
+ #define EXT4_IOC_MIGRATE              _IO('f', 9)
+  /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
+- /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
++#define EXT4_IOC_FIEMAP                       _IOWR('f', 11, struct fiemap)
+ #define EXT4_IOC_ALLOC_DA_BLKS                _IO('f', 12)
+ #define EXT4_IOC_MOVE_EXT             _IOWR('f', 15, struct move_extent)
+Index: linux-stage/fs/ext4/ioctl.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ioctl.c   2011-03-05 12:34:11.299779163 +0800
++++ linux-stage/fs/ext4/ioctl.c        2011-03-05 12:34:16.862856069 +0800
+@@ -18,6 +18,71 @@
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
++/* So that the fiemap access checks can't overflow on 32 bit machines. */
++#define FIEMAP_MAX_EXTENTS     (UINT_MAX / sizeof(struct fiemap_extent))
++
++static int fiemap_check_ranges(struct super_block *sb,
++                             u64 start, u64 len, u64 *new_len)
++{
++      *new_len = len;
++
++      if (len == 0)
++              return -EINVAL;
++
++      if (start > sb->s_maxbytes)
++              return -EFBIG;
++
++      /*
++       * Shrink request scope to what the fs can actually handle.
++       */
++      if ((len > sb->s_maxbytes) ||
++          (sb->s_maxbytes - len) < start)
++              *new_len = sb->s_maxbytes - start;
++
++      return 0;
++}
++
++int ioctl_fiemap(struct inode *inode, struct file *filp, unsigned long arg)
++{
++      struct fiemap fiemap;
++      u64 len;
++      struct fiemap_extent_info fieinfo = {0, };
++      struct super_block *sb = inode->i_sb;
++      int error = 0;
++
++      if (copy_from_user(&fiemap, (struct fiemap __user *) arg,
++                         sizeof(struct fiemap)))
++               return -EFAULT;
++
++      if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
++              return -EINVAL;
++
++      error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
++                                  &len);
++      if (error)
++              return error;
++
++      fieinfo.fi_flags = fiemap.fm_flags;
++      fieinfo.fi_extents_max = fiemap.fm_extent_count;
++      fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
++
++      if (fiemap.fm_extent_count != 0 &&
++          !access_ok(VERIFY_WRITE, (void *)arg,
++                     offsetof(typeof(fiemap), fm_extents[fiemap.fm_extent_count])))
++              return -EFAULT;
++
++      if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
++              filemap_write_and_wait(inode->i_mapping);
++
++      error = ext4_fiemap(inode, &fieinfo, fiemap.fm_start, len);
++      fiemap.fm_flags = fieinfo.fi_flags;
++      fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
++      if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
++              error = -EFAULT;
++
++      return error;
++}
++
+ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+       struct inode *inode = filp->f_dentry->d_inode;
+@@ -330,6 +395,9 @@
+               mnt_drop_write(filp->f_path.mnt);
+               return err;
+       }
++      case EXT4_IOC_FIEMAP: {
++              return ioctl_fiemap(inode, filp, arg);
++      }
+       default:
+               return -ENOTTY;
+Index: linux-stage/fs/ext4/fiemap.h
+===================================================================
+--- /dev/null  1970-01-01 00:00:00.000000000 +0000
++++ linux-stage/fs/ext4/fiemap.h       2011-03-05 12:36:24.606879702 +0800
+@@ -0,0 +1,2 @@
++
++#include_next <fiemap.h>
diff --git a/ldiskfs/kernel_patches/patches/ext4-force_over_16tb-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-force_over_16tb-rhel6.patch
new file mode 100644 (file)
index 0000000..8f99774
--- /dev/null
@@ -0,0 +1,67 @@
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-11 15:35:15.680343230 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-11 15:44:45.037632078 +0800
+@@ -55,6 +55,8 @@
+ struct proc_dir_entry *ext4_proc_root;
+ static struct kset *ext4_kset;
++static int force_over_16tb;
++
+ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
+                            unsigned long journal_devnum);
+ static int ext4_commit_super(struct super_block *sb, int sync);
+@@ -1494,7 +1496,7 @@
+       Opt_block_validity, Opt_noblock_validity,
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
+-      Opt_mballoc, Opt_bigendian_extents,
++      Opt_mballoc, Opt_bigendian_extents, Opt_force_over_16tb,
+ };
+ static const match_table_t tokens = {
+@@ -1562,6 +1564,7 @@
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
+       {Opt_bigendian_extents, "bigendian_extents"},
++      {Opt_force_over_16tb, "force_over_16th"},
+       {Opt_mballoc, "mballoc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
+@@ -2008,6 +2011,9 @@
+                       break;
+               case Opt_mballoc:
+                       break;
++              case Opt_force_over_16tb:
++                      force_over_16tb = 1;
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+@@ -3031,6 +3037,26 @@
+               goto failed_mount;
+       }
++      if (ext4_blocks_count(es) >= (1ULL << 32)) {
++              if (force_over_16tb == 0) {
++                      printk(KERN_ERR "EXT4-fs does not support filesystems "
++                             "greater than 16TB and can cause data corruption."
++                             "Use \"force_over_16tb\" mount option to override."
++                             "\n");
++                      goto failed_mount;
++              }
++      }
++
++      if (ext4_blocks_count(es) >= (1ULL << 32)) {
++              if (force_over_16tb == 0) {
++                      printk(KERN_ERR "EXT4-fs does not support filesystems "
++                             "greater than 16TB and can cause data corruption."
++                             "Use \"force_over_16tb\" mount option to override."
++                             "\n");
++                      goto failed_mount;
++              }
++      }
++
+       if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
+               goto cantfind_ext4;
diff --git a/ldiskfs/kernel_patches/patches/ext4-inode-version-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-inode-version-rhel6.patch
new file mode 100644 (file)
index 0000000..a104bed
--- /dev/null
@@ -0,0 +1,63 @@
+Index: linux-2.6.32-el6-beta/fs/ext4/inode.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/inode.c
++++ linux-2.6.32-el6-beta/fs/ext4/inode.c
+@@ -4920,11 +4920,11 @@ struct inode *ext4_iget(struct super_blo
+       EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
+       EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
+-      inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
++      ei->i_fs_version = le32_to_cpu(raw_inode->i_disk_version);
+       if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
+-                      inode->i_version |=
+-                      (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
++                      ei->i_fs_version |= (__u64)(le32_to_cpu(raw_inode->i_version_hi))
++                                                                       << 32;
+       }
+       ret = 0;
+@@ -5134,11 +5134,11 @@ static int ext4_do_update_inode(handle_t
+               for (block = 0; block < EXT4_N_BLOCKS; block++)
+                       raw_inode->i_block[block] = ei->i_data[block];
+-      raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
++      raw_inode->i_disk_version = cpu_to_le32(ei->i_fs_version);
+       if (ei->i_extra_isize) {
+               if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
+-                      raw_inode->i_version_hi =
+-                      cpu_to_le32(inode->i_version >> 32);
++                      raw_inode->i_version_hi = cpu_to_le32(ei->i_fs_version
++                                                            >> 32);
+               raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+       }
+Index: linux-2.6.32-el6-beta/fs/ext4/ialloc.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/ialloc.c
++++ linux-2.6.32-el6-beta/fs/ext4/ialloc.c
+@@ -1018,6 +1018,7 @@ got:
+       ei->i_dtime = 0;
+       ei->i_block_group = group;
+       ei->i_last_alloc_group = ~0;
++      ei->i_fs_version = 0;
+       ext4_set_inode_flags(inode);
+       if (IS_DIRSYNC(inode))
+Index: linux-2.6.32-el6-beta/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/ext4.h
++++ linux-2.6.32-el6-beta/fs/ext4/ext4.h
+@@ -714,8 +714,12 @@ struct ext4_inode_info {
+        */
+       tid_t i_sync_tid;
+       tid_t i_datasync_tid;
++
++      __u64 i_fs_version;
+ };
++#define HAVE_DISK_INODE_VERSION
++
+ /*
+  * File system states
+  */
diff --git a/ldiskfs/kernel_patches/patches/ext4-kill-dx_root-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-kill-dx_root-rhel6.patch
new file mode 100644 (file)
index 0000000..6631dde
--- /dev/null
@@ -0,0 +1,236 @@
+removes static definition of dx_root struct. so that "." and ".." dirent can
+have extra data. This patch does not change any functionality but is required for
+ext4_data_in_dirent patch.
+Index: linux-2.6.32.i386/fs/ext4/namei.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/namei.c     2010-04-16 05:35:06.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/namei.c  2010-04-16 05:47:41.000000000 +0530
+@@ -115,22 +115,13 @@
+  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
+  */
+-struct dx_root
++struct dx_root_info
+ {
+-      struct fake_dirent dot;
+-      char dot_name[4];
+-      struct fake_dirent dotdot;
+-      char dotdot_name[4];
+-      struct dx_root_info
+-      {
+-              __le32 reserved_zero;
+-              u8 hash_version;
+-              u8 info_length; /* 8 */
+-              u8 indirect_levels;
+-              u8 unused_flags;
+-      }
+-      info;
+-      struct dx_entry entries[0];
++      __le32 reserved_zero;
++      u8 hash_version;
++      u8 info_length; /* 8 */
++      u8 indirect_levels;
++      u8 unused_flags;
+ };
+ struct dx_node
+@@ -244,6 +235,16 @@
+  * Future: use high four bits of block for coalesce-on-delete flags
+  * Mask them off for now.
+  */
++struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
++{
++       /* get dotdot first */
++       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
++
++       /* dx root info is after dotdot entry */
++       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
++
++       return (struct dx_root_info *) de;
++}
+ static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
+ {
+@@ -398,7 +399,7 @@
+ {
+       unsigned count, indirect;
+       struct dx_entry *at, *entries, *p, *q, *m;
+-      struct dx_root *root;
++      struct dx_root_info * info;
+       struct buffer_head *bh;
+       struct dx_frame *frame = frame_in;
+       u32 hash;
+@@ -406,17 +407,18 @@
+       frame->bh = NULL;
+       if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
+               goto fail;
+-      root = (struct dx_root *) bh->b_data;
+-      if (root->info.hash_version != DX_HASH_TEA &&
+-          root->info.hash_version != DX_HASH_HALF_MD4 &&
+-          root->info.hash_version != DX_HASH_LEGACY) {
++
++      info = dx_get_dx_info((struct ext4_dir_entry_2*)bh->b_data);
++      if (info->hash_version != DX_HASH_TEA &&
++          info->hash_version != DX_HASH_HALF_MD4 &&
++          info->hash_version != DX_HASH_LEGACY) {
+               ext4_warning(dir->i_sb, "Unrecognised inode hash code %d for directory "
+-                             "#%lu", root->info.hash_version, dir->i_ino);
++                             "#%lu", info->hash_version, dir->i_ino);
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+-      hinfo->hash_version = root->info.hash_version;
++      hinfo->hash_version = info->hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
+       hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+@@ -425,27 +427,26 @@
+               ext4fs_dirhash(d_name->name, d_name->len, hinfo);
+       hash = hinfo->hash;
+-      if (root->info.unused_flags & 1) {
++      if (info->unused_flags & 1) {
+               ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
+-                           root->info.unused_flags);
++                           info->unused_flags);
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+-      if ((indirect = root->info.indirect_levels) > 1) {
++      if ((indirect = info->indirect_levels) > 1) {
+               ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
+-                           root->info.indirect_levels);
++                           info->indirect_levels);
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+       }
+-      entries = (struct dx_entry *) (((char *)&root->info) +
+-                                     root->info.info_length);
++      entries = (struct dx_entry *) (((char *)info) + info->info_length);
+       if (dx_get_limit(entries) != dx_root_limit(dir,
+-                                                 root->info.info_length)) {
++                                                 info->info_length)) {
+               ext4_warning(dir->i_sb, "dx entry: limit != root limit");
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+@@ -525,10 +526,12 @@ fail:
+ static void dx_release (struct dx_frame *frames)
+ {
++      struct dx_root_info *info;
+       if (frames[0].bh == NULL)
+               return;
+-      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++      info = dx_get_dx_info((struct ext4_dir_entry_2*)frames[0].bh->b_data);
++      if (info->indirect_levels)
+               brelse(frames[1].bh);
+       brelse(frames[0].bh);
+ }
+@@ -1447,17 +1450,16 @@
+       const char      *name = dentry->d_name.name;
+       int             namelen = dentry->d_name.len;
+       struct buffer_head *bh2;
+-      struct dx_root  *root;
+       struct dx_frame frames[2], *frame;
+       struct dx_entry *entries;
+-      struct ext4_dir_entry_2 *de, *de2;
++      struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
+       char            *data1, *top;
+       unsigned        len;
+       int             retval;
+       unsigned        blocksize;
+       struct dx_hash_info hinfo;
+       ext4_lblk_t  block;
+-      struct fake_dirent *fde;
++      struct dx_root_info *dx_info;
+       blocksize =  dir->i_sb->s_blocksize;
+       dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
+@@ -1467,20 +1469,21 @@
+               brelse(bh);
+               return retval;
+       }
+-      root = (struct dx_root *) bh->b_data;
++
++      dot_de = (struct ext4_dir_entry_2 *) bh->b_data;
++      dotdot_de = ext4_next_entry(dot_de, blocksize);
+       /* The 0th block becomes the root, move the dirents out */
+-      fde = &root->dotdot;
+-      de = (struct ext4_dir_entry_2 *)((char *)fde +
+-              ext4_rec_len_from_disk(fde->rec_len, blocksize));
+-      if ((char *) de >= (((char *) root) + blocksize)) {
++      de = (struct ext4_dir_entry_2 *)((char *)dotdot_de +
++              ext4_rec_len_from_disk(dotdot_de->rec_len, blocksize));
++      if ((char *) de >= (((char *) dot_de) + blocksize)) {
+               ext4_error(dir->i_sb,
+                          "invalid rec_len for '..' in inode %lu",
+                          dir->i_ino);
+               brelse(bh);
+               return -EIO;
+       }
+-      len = ((char *) root) + blocksize - (char *) de;
++      len = ((char *) dot_de) + blocksize - (char *) de;
+       /* Allocate new block for the 0th block's dirents */
+       bh2 = ext4_append(handle, dir, &block, &retval);
+@@ -1499,19 +1502,23 @@
+       de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
+                                          blocksize);
+       /* Initialize the root; the dot dirents already exist */
+-      de = (struct ext4_dir_entry_2 *) (&root->dotdot);
+-      de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
+-                                         blocksize);
+-      memset (&root->info, 0, sizeof(root->info));
+-      root->info.info_length = sizeof(root->info);
+-      root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+-      entries = root->entries;
++      dotdot_de->rec_len = ext4_rec_len_to_disk(blocksize -
++                      le16_to_cpu(dot_de->rec_len), blocksize);
++
++      /* initialize hashing info */
++      dx_info = dx_get_dx_info(dot_de);
++      memset (dx_info, 0, sizeof(*dx_info));
++      dx_info->info_length = sizeof(*dx_info);
++      dx_info->hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
++
++      entries = (void *)dx_info + sizeof(*dx_info);
++
+       dx_set_block(entries, 1);
+       dx_set_count(entries, 1);
+-      dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
++      dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
+       /* Initialize as for dx_probe */
+-      hinfo.hash_version = root->info.hash_version;
++      hinfo.hash_version = dx_info->hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
+       hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+@@ -1759,6 +1766,7 @@
+                               goto journal_error;
+                       brelse (bh2);
+               } else {
++                      struct dx_root_info * info;
+                       dxtrace(printk(KERN_DEBUG
+                                      "Creating second level index...\n"));
+                       memcpy((char *) entries2, (char *) entries,
+@@ -1768,7 +1776,9 @@
+                       /* Set up root */
+                       dx_set_count(entries, 1);
+                       dx_set_block(entries + 0, newblock);
+-                      ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++                      info = dx_get_dx_info((struct ext4_dir_entry_2*)
++                                      frames[0].bh->b_data);
++                      info->indirect_levels = 1;
+                       /* Add new access path frame */
+                       frame = frames + 1;
diff --git a/ldiskfs/kernel_patches/patches/ext4-map_inode_page-2.6-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-map_inode_page-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..ced4af6
--- /dev/null
@@ -0,0 +1,87 @@
+Index: linux-2.6.32-el6-beta/fs/ext4/inode.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/inode.c
++++ linux-2.6.32-el6-beta/fs/ext4/inode.c
+@@ -5834,3 +5834,67 @@ out_unlock:
+       up_read(&inode->i_alloc_sem);
+       return ret;
+ }
++
++int ext4_map_inode_page(struct inode *inode, struct page *page,
++                      unsigned long *blocks, int *created, int create)
++{
++      unsigned int blocksize, blocks_per_page;
++      unsigned long iblock;
++      struct buffer_head dummy;
++      void *handle;
++      int i, rc = 0, failed = 0, needed_blocks;
++
++      blocksize = inode->i_sb->s_blocksize;
++      blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++      iblock = page->index * blocks_per_page;
++
++      for (i = 0; i < blocks_per_page; i++, iblock++) {
++              blocks[i] = ext4_bmap(inode->i_mapping, iblock);
++              if (blocks[i] == 0) {
++                      failed++;
++                      if (created)
++                              created[i] = -1;
++              } else if (created) {
++                      created[i] = 0;
++              }
++      }
++
++      if (failed == 0 || create == 0)
++              return 0;
++
++      needed_blocks = ext4_writepage_trans_blocks(inode);
++      handle = ext4_journal_start(inode, needed_blocks);
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      iblock = page->index * blocks_per_page;
++      for (i = 0; i < blocks_per_page; i++, iblock++) {
++              if (blocks[i] != 0)
++                      continue;
++
++              rc = ext4_get_blocks(handle, inode, iblock, 1, &dummy, 1);
++              if (rc < 0) {
++                      printk(KERN_INFO "ext4_map_inode_page: error reading "
++                                      "block %ld\n", iblock);
++                      goto out;
++              } else {
++                      if (rc > 1)
++                              WARN_ON(1);
++
++                      rc = 0;
++              }
++              /* Unmap any metadata buffers from the block mapping, to avoid
++               * data corruption due to direct-write from Lustre being
++               * clobbered by a later flush of the blockdev metadata buffer.*/
++              if (buffer_new(&dummy))
++                      unmap_underlying_metadata(dummy.b_bdev,
++                                      dummy.b_blocknr);
++              blocks[i] = dummy.b_blocknr;
++              if (created)
++                      created[i] = 1;
++      }
++
++out:
++      ext4_journal_stop(handle);
++      return rc;
++}
+Index: linux-2.6.32-el6-beta/fs/ext4/super.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/super.c
++++ linux-2.6.32-el6-beta/fs/ext4/super.c
+@@ -4084,6 +4084,10 @@ static void __exit exit_ext4_fs(void)
+       exit_ext4_system_zone();
+ }
++int ext4_map_inode_page(struct inode *inode, struct page *page,
++                      unsigned long *blocks, int *created, int create);
++EXPORT_SYMBOL(ext4_map_inode_page);
++
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Fourth Extended Filesystem");
+ MODULE_LICENSE("GPL");
diff --git a/ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-max-dir-size-rhel6.patch
new file mode 100644 (file)
index 0000000..8352d02
--- /dev/null
@@ -0,0 +1,67 @@
+Index: linux-2.6.32-el6-beta/fs/ext4/ialloc.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/ialloc.c
++++ linux-2.6.32-el6-beta/fs/ext4/ialloc.c
+@@ -825,11 +825,15 @@ struct inode *ext4_new_inode(handle_t *h
+       sb = dir->i_sb;
+       ngroups = ext4_get_groups_count(sb);
+       trace_ext4_request_inode(dir, mode);
++
++      sbi = EXT4_SB(sb);
++      if (sbi->s_max_dir_size > 0 && i_size_read(dir) >= sbi->s_max_dir_size)
++              return ERR_PTR(-EFBIG);
++
+       inode = new_inode(sb);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+       ei = EXT4_I(inode);
+-      sbi = EXT4_SB(sb);
+       if (!goal)
+               goal = sbi->s_inode_goal;
+Index: linux-2.6.32-el6-beta/fs/ext4/super.c
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/super.c
++++ linux-2.6.32-el6-beta/fs/ext4/super.c
+@@ -2601,6 +2601,7 @@ EXT4_RO_ATTR(lifetime_write_kbytes);
+ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
+                inode_readahead_blks_store, s_inode_readahead_blks);
+ EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
++EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size);
+ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+@@ -2615,6 +2616,7 @@ static struct attribute *ext4_attrs[] = 
+       ATTR_LIST(lifetime_write_kbytes),
+       ATTR_LIST(inode_readahead_blks),
+       ATTR_LIST(inode_goal),
++      ATTR_LIST(max_dir_size),
+       ATTR_LIST(mb_stats),
+       ATTR_LIST(mb_max_to_scan),
+       ATTR_LIST(mb_min_to_scan),
+Index: linux-2.6.32-el6-beta/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32-el6-beta.orig/fs/ext4/ext4.h
++++ linux-2.6.32-el6-beta/fs/ext4/ext4.h
+@@ -1029,6 +1029,8 @@ struct ext4_sb_info {
+       /* Kernel thread for multiple mount protection */
+       struct task_struct *s_mmp_tsk;
++
++      unsigned long s_max_dir_size;
+ };
+ static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
+@@ -1353,6 +1355,12 @@ struct mmp_struct {
+ #define EXT4_MMP_MIN_CHECK_INTERVAL   5
+ /*
++ * max directory size tunable
++ */
++#define EXT4_DEFAULT_MAX_DIR_SIZE     0
++#define EXT4_MAX_DIR_SIZE_NAME                "max_dir_size"
++
++/*
+  * Function prototypes
+  */
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel6.patch
new file mode 100644 (file)
index 0000000..c0b59f0
--- /dev/null
@@ -0,0 +1,317 @@
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-14 16:18:28.300241437 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-14 16:33:17.056087375 +0800
+@@ -1770,6 +1770,7 @@
+       ext4_grpblk_t   bb_free;        /* total free blocks */
+       ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
+       struct          list_head bb_prealloc_list;
++      unsigned long   bb_prealloc_nr;
+ #ifdef DOUBLE_CHECK
+       void            *bb_bitmap;
+ #endif
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c 2011-03-14 16:18:28.336242149 +0800
++++ linux-stage/fs/ext4/mballoc.c      2011-03-14 16:33:27.072292006 +0800
+@@ -337,7 +337,7 @@
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+ static struct kmem_cache *ext4_free_ext_cachep;
+-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+                                               ext4_group_t group);
+@@ -659,7 +659,7 @@
+ }
+ static noinline_for_stack
+-void ext4_mb_generate_buddy(struct super_block *sb,
++int ext4_mb_generate_buddy(struct super_block *sb,
+                               void *buddy, void *bitmap, ext4_group_t group)
+ {
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+@@ -691,14 +691,13 @@
+       grp->bb_fragments = fragments;
+       if (free != grp->bb_free) {
+-              ext4_grp_locked_error(sb, group,  __func__,
+-                      "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
+-                      group, free, grp->bb_free);
+-              /*
+-               * If we intent to continue, we consider group descritor
+-               * corrupt and update bb_free using bitmap value
+-               */
+-              grp->bb_free = free;
++              struct ext4_group_desc *gdp;
++              gdp = ext4_get_group_desc (sb, group, NULL);
++              ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
++                      "%u in gd, %lu pa's\n", (long unsigned int)group,
++                      free, grp->bb_free, ext4_free_blks_count(sb, gdp),
++                      grp->bb_prealloc_nr);
++              return -EIO;
+       }
+       clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+@@ -708,6 +707,8 @@
+       EXT4_SB(sb)->s_mb_buddies_generated++;
+       EXT4_SB(sb)->s_mb_generation_time += period;
+       spin_unlock(&EXT4_SB(sb)->s_bal_lock);
++
++      return 0;
+ }
+ /* The buddy information is attached the buddy cache inode
+@@ -839,7 +840,7 @@
+       first_block = page->index * blocks_per_page;
+       /* init the page  */
+       memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
+-      for (i = 0; i < blocks_per_page; i++) {
++      for (i = 0; i < blocks_per_page && err == 0; i++) {
+               int group;
+               struct ext4_group_info *grinfo;
+@@ -874,7 +875,7 @@
+                        * incore got set to the group block bitmap below
+                        */
+                       ext4_lock_group(sb, group);
+-                      ext4_mb_generate_buddy(sb, data, incore, group);
++                      err = ext4_mb_generate_buddy(sb, data, incore, group);
+                       ext4_unlock_group(sb, group);
+                       incore = NULL;
+               } else {
+@@ -888,7 +889,7 @@
+                       memcpy(data, bitmap, blocksize);
+                       /* mark all preallocated blks used in in-core bitmap */
+-                      ext4_mb_generate_from_pa(sb, data, group);
++                      err = ext4_mb_generate_from_pa(sb, data, group);
+                       ext4_mb_generate_from_freelist(sb, data, group);
+                       ext4_unlock_group(sb, group);
+@@ -898,7 +899,8 @@
+                       incore = data;
+               }
+       }
+-      SetPageUptodate(page);
++      if (likely(err == 0))
++              SetPageUptodate(page);
+ out:
+       if (bh) {
+@@ -2142,9 +2144,11 @@
+ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ {
+       struct super_block *sb = seq->private;
++      struct ext4_group_desc *gdp;
+       ext4_group_t group = (ext4_group_t) ((unsigned long) v);
+       int i;
+       int err;
++      int free = 0;
+       struct ext4_buddy e4b;
+       struct sg {
+               struct ext4_group_info info;
+@@ -2153,10 +2157,10 @@
+       group--;
+       if (group == 0)
+-              seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
+                               "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
+                                 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
+-                         "group", "free", "frags", "first",
++                         "group", "free", "frags", "first", "first", "pa",
+                          "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
+                          "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
+@@ -2167,13 +2171,20 @@
+               seq_printf(seq, "#%-5u: I/O error\n", group);
+               return 0;
+       }
++
++      gdp = ext4_get_group_desc(sb, group, NULL);
++      if (gdp != NULL)
++              free = ext4_free_blks_count(sb, gdp);
++
+       ext4_lock_group(sb, group);
+       memcpy(&sg, ext4_get_group_info(sb, group), i);
+       ext4_unlock_group(sb, group);
+       ext4_mb_release_desc(&e4b);
+-      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
+-                      sg.info.bb_fragments, sg.info.bb_first_free);
++      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
++                      (long unsigned int)group, sg.info.bb_free, free,
++                      sg.info.bb_fragments, sg.info.bb_first_free,
++                      sg.info.bb_prealloc_nr);
+       for (i = 0; i <= 13; i++)
+               seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
+                               sg.info.bb_counters[i] : 0);
+@@ -3354,23 +3365,68 @@
+ }
+ /*
++ * check free blocks in bitmap match free block in group descriptor
++ * do this before taking preallocated blocks into account to be able
++ * to detect on-disk corruptions. The group lock should be hold by the
++ * caller.
++ */
++int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
++                              struct ext4_group_desc *gdp, int group)
++{
++      unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
++      unsigned short i, first, free = 0;
++
++      i = mb_find_next_zero_bit(bitmap, max, 0);
++
++      while (i < max) {
++              first = i;
++              i = mb_find_next_bit(bitmap, max, i);
++              if (i > max)
++                      i = max;
++              free += i - first;
++              if (i < max)
++                      i = mb_find_next_zero_bit(bitmap, max, i);
++      }
++
++      if (free != ext4_free_blks_count(sb, gdp)) {
++              ext4_error(sb, "on-disk bitmap for group %d"
++                      "corrupted: %u blocks free in bitmap, %u - in gd\n",
++                      group, free, ext4_free_blks_count(sb, gdp));
++              return -EIO;
++      }
++      return 0;
++}
++
++/*
+  * the function goes through all preallocation in this group and marks them
+  * used in in-core bitmap. buddy must be generated from this bitmap
+  * Need to be called with ext4 group lock held
+  */
+ static noinline_for_stack
+-void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+       struct ext4_prealloc_space *pa;
++      struct ext4_group_desc *gdp;
+       struct list_head *cur;
+       ext4_group_t groupnr;
+       ext4_grpblk_t start;
+       int preallocated = 0;
+       int count = 0;
++      int skip = 0;
++      int err;
+       int len;
++      gdp = ext4_get_group_desc (sb, group, NULL);
++      if (gdp == NULL)
++              return -EIO;
++
++      /* before applying preallocations, check bitmap consistency */
++      err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
++      if (err)
++              return err;
++
+       /* all form of preallocation discards first load group,
+        * so the only competing code is preallocation use.
+        * we don't need any locking here
+@@ -3386,14 +3442,23 @@
+                                            &groupnr, &start);
+               len = pa->pa_len;
+               spin_unlock(&pa->pa_lock);
+-              if (unlikely(len == 0))
++              if (unlikely(len == 0)) {
++                      skip++;
+                       continue;
++              }
+               BUG_ON(groupnr != group);
+               mb_set_bits(bitmap, start, len);
+               preallocated += len;
+               count++;
+       }
++      if (count + skip != grp->bb_prealloc_nr) {
++              ext4_error(sb, "lost preallocations: "
++                         "count %d, bb_prealloc_nr %lu, skip %d\n",
++                         count, grp->bb_prealloc_nr, skip);
++              return -EIO;
++      }
+       mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
++      return 0;
+ }
+ static void ext4_mb_pa_callback(struct rcu_head *head)
+@@ -3452,6 +3517,7 @@
+        */
+       ext4_lock_group(sb, grp);
+       list_del(&pa->pa_group_list);
++      ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
+       ext4_unlock_group(sb, grp);
+       spin_lock(pa->pa_obj_lock);
+@@ -3543,6 +3609,7 @@
+       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++      grp->bb_prealloc_nr++;
+       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+       spin_lock(pa->pa_obj_lock);
+@@ -3604,6 +3671,7 @@
+       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++      grp->bb_prealloc_nr++;
+       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+       /*
+@@ -3802,6 +3870,8 @@
+               spin_unlock(&pa->pa_lock);
++              BUG_ON(grp->bb_prealloc_nr == 0);
++              grp->bb_prealloc_nr--;
+               list_del(&pa->pa_group_list);
+               list_add(&pa->u.pa_tmp_list, &list);
+       }
+@@ -3942,7 +4012,7 @@
+               if (err) {
+                       ext4_error(sb, "Error loading buddy information for %u",
+                                       group);
+-                      continue;
++                      return;
+               }
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
+@@ -3954,6 +4024,8 @@
+               }
+               ext4_lock_group(sb, group);
++              BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
++              e4b.bd_info->bb_prealloc_nr--;
+               list_del(&pa->pa_group_list);
+               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+               ext4_unlock_group(sb, group);
+@@ -4227,6 +4299,7 @@
+               }
+               ext4_lock_group(sb, group);
+               list_del(&pa->pa_group_list);
++              ext4_get_group_info(sb, group)->bb_prealloc_nr--;
+               ext4_mb_release_group_pa(&e4b, pa, ac);
+               ext4_unlock_group(sb, group);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h 2011-03-14 16:18:26.670209322 +0800
++++ linux-stage/fs/ext4/mballoc.h      2011-03-14 16:32:50.859552482 +0800
+@@ -88,7 +88,7 @@
+ /*
+  * for which requests use 2^N search using buddies
+  */
+-#define MB_DEFAULT_ORDER2_REQS                2
++#define MB_DEFAULT_ORDER2_REQS                8
+ /*
+  * default group prealloc size 512 blocks
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-pa_free-mismatch-rhel6.patch
new file mode 100644 (file)
index 0000000..faf7fce
--- /dev/null
@@ -0,0 +1,152 @@
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c 2011-03-14 16:34:39.790758415 +0800
++++ linux-stage/fs/ext4/mballoc.c      2011-03-14 16:38:36.211681104 +0800
+@@ -3593,6 +3593,7 @@
+       INIT_LIST_HEAD(&pa->pa_group_list);
+       pa->pa_deleted = 0;
+       pa->pa_type = MB_INODE_PA;
++      pa->pa_error = 0;
+       mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
+                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3654,6 +3655,7 @@
+       INIT_LIST_HEAD(&pa->pa_group_list);
+       pa->pa_deleted = 0;
+       pa->pa_type = MB_GROUP_PA;
++      pa->pa_error = 0;
+       mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
+                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+@@ -3716,7 +3718,9 @@
+       int err = 0;
+       int free = 0;
++      assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
+       BUG_ON(pa->pa_deleted == 0);
++      BUG_ON(pa->pa_inode == NULL);
+       ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+       grp_blk_start = pa->pa_pstart - bit;
+       BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+@@ -3752,19 +3756,27 @@
+               mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
+               bit = next + 1;
+       }
+-      if (free != pa->pa_free) {
+-              printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
+-                      pa, (unsigned long) pa->pa_lstart,
+-                      (unsigned long) pa->pa_pstart,
+-                      (unsigned long) pa->pa_len);
++
++      /* "free < pa->pa_free" means we maybe double alloc the same blocks,
++       * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
++      if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
++              ext4_error(sb, "pa free mismatch: [pa %p] "
++                              "[phy %lu] [logic %lu] [len %u] [free %u] "
++                              "[error %u] [inode %lu] [freed %u]", pa,
++                              (unsigned long)pa->pa_pstart,
++                              (unsigned long)pa->pa_lstart,
++                              (unsigned)pa->pa_len, (unsigned)pa->pa_free,
++                              (unsigned)pa->pa_error, pa->pa_inode->i_ino,
++                              free);
+               ext4_grp_locked_error(sb, group,
+-                                      __func__, "free %u, pa_free %u",
+-                                      free, pa->pa_free);
++                              __func__, "free %u, pa_free %u",
++                              free, pa->pa_free);
+               /*
+                * pa is already deleted so we use the value obtained
+                * from the bitmap and continue.
+                */
+       }
++      BUG_ON(pa->pa_free != free);
+       atomic_add(free, &sbi->s_mb_discarded);
+       return err;
+@@ -4450,6 +4462,24 @@
+       trace_ext4_request_blocks(ar);
++
++      if (dev_check_rdonly(sb->s_bdev)) {
++              struct block_device *bdev = sb->s_bdev;
++
++              printk(KERN_WARNING "Alloc from readonly device %s (%#x): "
++                      "[inode %lu] [logic %llu] [goal %llu] [ll %llu] "
++                      "[pl %llu] [lr %llu] [pr %llu] [len %u] [flags %u]\n",
++                      bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++                      bdev->bd_dev, ar->inode->i_ino,
++                      (unsigned long long)ar->logical,
++                      (unsigned long long)ar->goal,
++                      (unsigned long long)ar->lleft,
++                      (unsigned long long)ar->pleft,
++                      (unsigned long long)ar->lright,
++                      (unsigned long long)ar->pright,
++                      ar->len, ar->flags);
++      }
++
+       /*
+        * For delayed allocation, we could skip the ENOSPC and
+        * EDQUOT check, as blocks and quotas have been already
+@@ -4529,6 +4559,25 @@
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
++                      if (ac->ac_pa) {
++                              struct ext4_prealloc_space *pa = ac->ac_pa;
++
++                              /* We can not make sure whether the bitmap has
++                               * been updated or not when fail case. So can
++                               * not revert pa_free back, just mark pa_error*/
++                              pa->pa_error++;
++                              ext4_error(sb,
++                                      "Updating bitmap error: [err %d] "
++                                      "[pa %p] [phy %lu] [logic %lu] "
++                                      "[len %u] [free %u] [error %u] "
++                                      "[inode %lu]", *errp, pa,
++                                      (unsigned long)pa->pa_pstart,
++                                      (unsigned long)pa->pa_lstart,
++                                      (unsigned)pa->pa_len,
++                                      (unsigned)pa->pa_free,
++                                      (unsigned)pa->pa_error,
++                                      pa->pa_inode ? pa->pa_inode->i_ino : 0);
++                      }
+               } else {
+                       block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+                       ar->len = ac->ac_b_ex.fe_len;
+@@ -4691,6 +4740,15 @@
+               goto error_return;
+       }
++      if (dev_check_rdonly(sb->s_bdev)) {
++              struct block_device *bdev = sb->s_bdev;
++
++              printk(KERN_WARNING "Release to readonly device %s (%#x): "
++                      "[inode %lu] [block %llu] [count %lu] [is_meta %d]\n",
++                      bdev->bd_disk ? bdev->bd_disk->disk_name : "",
++                      bdev->bd_dev, inode->i_ino, block, count, metadata);
++      }
++
+       ext4_debug("freeing block %llu\n", block);
+       trace_ext4_free_blocks(inode, block, count, metadata);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h 2011-03-14 16:32:50.859552482 +0800
++++ linux-stage/fs/ext4/mballoc.h      2011-03-14 16:39:20.928429776 +0800
+@@ -20,6 +20,7 @@
+ #include <linux/version.h>
+ #include <linux/blkdev.h>
+ #include <linux/mutex.h>
++#include <linux/genhd.h>
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
+@@ -130,6 +131,7 @@
+       ext4_grpblk_t           pa_free;        /* how many blocks are free */
+       unsigned short          pa_type;        /* pa type. inode or group */
+       spinlock_t              *pa_obj_lock;
++      unsigned short          pa_error;
+       struct inode            *pa_inode;      /* hack, for history only */
+ };
index 18e15c2..43cc3bc 100644 (file)
@@ -1,24 +1,21 @@
-Index: linux-2.6.18.i386/fs/ext4/ext4_jbd2.h
+Index: linux-stage/fs/ext4/ext4_jbd2.h
 ===================================================================
 ===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/ext4_jbd2.h
-+++ linux-2.6.18.i386/fs/ext4/ext4_jbd2.h
-@@ -35,6 +35,11 @@
-       (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
-               || test_opt(sb, EXTENTS) ? 27U : 8U)
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h       2011-03-14 17:17:57.962614294 +0800
++++ linux-stage/fs/ext4/ext4_jbd2.h    2011-03-14 17:26:00.570661921 +0800
+@@ -35,6 +35,8 @@
+       (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
+        ? 27U : 8U)
  
  
-+/* Indicate that EXT4_SINGLEDATA_TRANS_BLOCKS takes the sb as argument */
-+#define EXT4_SINGLEDATA_TRANS_BLOCKS_HAS_SB
-+
 +#define ext4_journal_dirty_metadata(handle, bh)  \
 +                ext4_handle_dirty_metadata(handle, NULL, bh)
  /* Extended attribute operations touch at most two data buffers,
   * two bitmap buffers, and two group summaries, in addition to the inode
   * and the superblock, which are already accounted for. */
 +#define ext4_journal_dirty_metadata(handle, bh)  \
 +                ext4_handle_dirty_metadata(handle, NULL, bh)
  /* Extended attribute operations touch at most two data buffers,
   * two bitmap buffers, and two group summaries, in addition to the inode
   * and the superblock, which are already accounted for. */
-Index: linux-2.6.18.i386/fs/ext4/extents.c
+Index: linux-stage/fs/ext4/extents.c
 ===================================================================
 ===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/extents.c
-+++ linux-2.6.18.i386/fs/ext4/extents.c
-@@ -60,6 +60,17 @@ static ext4_fsblk_t ext_pblock(struct ex
+--- linux-stage.orig/fs/ext4/extents.c 2011-03-14 17:17:57.491605523 +0800
++++ linux-stage/fs/ext4/extents.c      2011-03-14 17:25:23.230957562 +0800
+@@ -59,6 +59,17 @@ static ext4_fsblk_t ext_pblock(struct ex
  }
  
  /*
  }
  
  /*
@@ -36,7 +33,7 @@ Index: linux-2.6.18.i386/fs/ext4/extents.c
   * idx_pblock:
   * combine low and high parts of a leaf physical block number into ext4_fsblk_t
   */
   * idx_pblock:
   * combine low and high parts of a leaf physical block number into ext4_fsblk_t
   */
-@@ -73,17 +84,6 @@ ext4_fsblk_t idx_pblock(struct ext4_exte
+@@ -72,17 +83,6 @@ ext4_fsblk_t idx_pblock(struct ext4_exte
  }
  
  /*
  }
  
  /*
@@ -54,7 +51,7 @@ Index: linux-2.6.18.i386/fs/ext4/extents.c
   * ext4_idx_store_pblock:
   * stores a large physical block number into an index struct,
   * breaking it into parts
   * ext4_idx_store_pblock:
   * stores a large physical block number into an index struct,
   * breaking it into parts
-@@ -1826,6 +1826,56 @@ static int ext4_ext_rm_idx(handle_t *han
+@@ -1980,6 +1980,56 @@ static int ext4_ext_rm_idx(handle_t *han
  }
  
  /*
  }
  
  /*
@@ -111,11 +108,10 @@ Index: linux-2.6.18.i386/fs/ext4/extents.c
   * ext4_ext_calc_credits_for_single_extent:
   * This routine returns max. credits that needed to insert an extent
   * to the extent tree.
   * ext4_ext_calc_credits_for_single_extent:
   * This routine returns max. credits that needed to insert an extent
   * to the extent tree.
-@@ -3157,4 +3207,14 @@ int ext4_fiemap(struct inode *inode, str
+@@ -3731,3 +3781,13 @@ int ext4_fiemap(struct inode *inode, str
        return error;
  }
        return error;
  }
-+
 +EXPORT_SYMBOL(ext4_ext_store_pblock);
 +EXPORT_SYMBOL(ext4_ext_search_right);
 +EXPORT_SYMBOL(ext4_ext_search_left);
 +EXPORT_SYMBOL(ext4_ext_store_pblock);
 +EXPORT_SYMBOL(ext4_ext_search_right);
 +EXPORT_SYMBOL(ext4_ext_search_left);
@@ -125,12 +121,12 @@ Index: linux-2.6.18.i386/fs/ext4/extents.c
 +EXPORT_SYMBOL(ext4_ext_walk_space);
 +EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
 +EXPORT_SYMBOL(ext4_mark_inode_dirty);
 +EXPORT_SYMBOL(ext4_ext_walk_space);
 +EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
 +EXPORT_SYMBOL(ext4_mark_inode_dirty);
-Index: linux-2.6.18.i386/fs/ext4/ext4_extents.h
++
+Index: linux-stage/fs/ext4/ext4_extents.h
 ===================================================================
 ===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/ext4_extents.h
-+++ linux-2.6.18.i386/fs/ext4/ext4_extents.h
-@@ -59,6 +59,12 @@
+--- linux-stage.orig/fs/ext4/ext4_extents.h    2011-03-14 17:17:57.928613657 +0800
++++ linux-stage/fs/ext4/ext4_extents.h 2011-03-14 17:27:23.673232962 +0800
+@@ -58,6 +58,12 @@
   */
  #define EXT_STATS_
  
   */
  #define EXT_STATS_
  
@@ -143,16 +139,15 @@ Index: linux-2.6.18.i386/fs/ext4/ext4_extents.h
  
  /*
   * ext4_inode has i_block array (60 bytes total).
  
  /*
   * ext4_inode has i_block array (60 bytes total).
-@@ -124,6 +129,8 @@ struct ext4_ext_path {
- #define EXT4_EXT_CACHE_GAP    1
- #define EXT4_EXT_CACHE_EXTENT 2
+@@ -160,6 +166,7 @@ struct ext4_ext_path {
+ #define EXT_INIT_MAX_LEN      (1UL << 15)
+ #define EXT_UNINIT_MAX_LEN    (EXT_INIT_MAX_LEN - 1)
  
 +#define EXT4_EXT_HAS_NO_TREE  /* ext4_extents_tree struct is not used*/
  
 +#define EXT4_EXT_HAS_NO_TREE  /* ext4_extents_tree struct is not used*/
-+#define EXT_INSERT_EXTENT_WITH_5ARGS
- #define EXT_MAX_BLOCK 0xffffffff
  
  
-@@ -228,6 +234,8 @@ static inline int ext4_ext_get_actual_le
+ #define EXT_FIRST_EXTENT(__hdr__) \
+       ((struct ext4_extent *) (((char *) (__hdr__)) +         \
+@@ -230,6 +237,8 @@
  extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
  extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
  extern int ext4_extent_tree_init(handle_t *, struct inode *);
  extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
  extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
  extern int ext4_extent_tree_init(handle_t *, struct inode *);
@@ -161,11 +156,11 @@ Index: linux-2.6.18.i386/fs/ext4/ext4_extents.h
  extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
                                                   int num,
                                                   struct ext4_ext_path *path);
  extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
                                                   int num,
                                                   struct ext4_ext_path *path);
-Index: linux-2.6.18.i386/fs/ext4/mballoc.c
+Index: linux-stage/fs/ext4/mballoc.c
 ===================================================================
 ===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/mballoc.c
-+++ linux-2.6.18.i386/fs/ext4/mballoc.c
-@@ -4355,6 +4355,13 @@
+--- linux-stage.orig/fs/ext4/mballoc.c 2011-03-14 17:17:59.872649833 +0800
++++ linux-stage/fs/ext4/mballoc.c      2011-03-14 17:25:20.373903681 +0800
+@@ -4302,6 +4302,13 @@
                kmem_cache_free(ext4_ac_cachep, ac);
  }
  
                kmem_cache_free(ext4_ac_cachep, ac);
  }
  
@@ -179,35 +174,35 @@ Index: linux-2.6.18.i386/fs/ext4/mballoc.c
  /*
   * finds all preallocated spaces and return blocks being freed to them
   * if preallocated space becomes full (no block is used from the space)
  /*
   * finds all preallocated spaces and return blocks being freed to them
   * if preallocated space becomes full (no block is used from the space)
-@@ -4965,3 +4965,6 @@ error_return:
+@@ -5111,3 +5118,6 @@ error_return:
                kmem_cache_free(ext4_ac_cachep, ac);
        return;
  }
 +
 +EXPORT_SYMBOL(ext4_free_blocks);
 +
                kmem_cache_free(ext4_ac_cachep, ac);
        return;
  }
 +
 +EXPORT_SYMBOL(ext4_free_blocks);
 +
-Index: linux-2.6.18.i386/fs/ext4/ext4_jbd2.c
+Index: linux-stage/fs/ext4/ext4_jbd2.c
 ===================================================================
 ===================================================================
---- linux-2.6.18.i386.orig/fs/ext4/ext4_jbd2.c
-+++ linux-2.6.18.i386/fs/ext4/ext4_jbd2.c
-@@ -21,6 +21,7 @@ int __ext4_journal_get_write_access(cons
-               ext4_journal_abort_handle(where, __func__, bh, handle, err);
+--- linux-stage.orig/fs/ext4/ext4_jbd2.c       2011-03-14 17:17:57.463605024 +0800
++++ linux-stage/fs/ext4/ext4_jbd2.c    2011-03-14 17:18:00.157655139 +0800
+@@ -31,6 +31,7 @@ int __ext4_journal_get_write_access(cons
+       }
        return err;
  }
 +EXPORT_SYMBOL(__ext4_journal_get_write_access);
  
  int __ext4_journal_forget(const char *where, handle_t *handle,
                                struct buffer_head *bh)
        return err;
  }
 +EXPORT_SYMBOL(__ext4_journal_get_write_access);
  
  int __ext4_journal_forget(const char *where, handle_t *handle,
                                struct buffer_head *bh)
-@@ -57,3 +58,4 @@ int __ext4_journal_dirty_metadata(const 
-               ext4_journal_abort_handle(where, __func__, bh, handle, err);
+@@ -107,3 +108,4 @@ int __ext4_journal_dirty_metadata(const
+       }
        return err;
  }
 +EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
        return err;
  }
 +EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
-Index: linux-2.6.27.21-0.1/fs/ext4/ext4.h
+Index: linux-stage/fs/ext4/ext4.h
 ===================================================================
 ===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/ext4.h    2009-07-07 14:47:19.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/ext4.h 2009-07-07 14:47:22.000000000 +0530
-@@ -1123,6 +1128,8 @@
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-14 17:17:59.916650654 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-14 17:25:30.236089694 +0800
+@@ -1448,6 +1448,8 @@
  extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
  extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
                                                ext4_group_t, int);
  extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
  extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
                                                ext4_group_t, int);
@@ -216,11 +211,11 @@ Index: linux-2.6.27.21-0.1/fs/ext4/ext4.h
  /* inode.c */
  int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
  /* inode.c */
  int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
-Index: linux-2.6.27.21-0.1/fs/ext4/inode.c
+Index: linux-stage/fs/ext4/inode.c
 ===================================================================
 ===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c   2009-07-07 14:47:19.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/inode.c        2009-07-07 14:47:22.000000000 +0530
-@@ -4240,6 +4240,7 @@
+--- linux-stage.orig/fs/ext4/inode.c   2011-03-14 17:17:59.745647471 +0800
++++ linux-stage/fs/ext4/inode.c        2011-03-14 17:18:00.219656294 +0800
+@@ -4882,6 +4882,7 @@
        iget_failed(inode);
        return ERR_PTR(ret);
  }
        iget_failed(inode);
        return ERR_PTR(ret);
  }
@@ -228,54 +223,54 @@ Index: linux-2.6.27.21-0.1/fs/ext4/inode.c
  
  static int ext4_inode_blocks_set(handle_t *handle,
                                struct ext4_inode *raw_inode,
  
  static int ext4_inode_blocks_set(handle_t *handle,
                                struct ext4_inode *raw_inode,
-Index: linux-2.6.27.21-0.1/fs/ext4/super.c
+Index: linux-stage/fs/ext4/super.c
 ===================================================================
 ===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/super.c   2009-07-07 14:47:19.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/super.c        2009-07-07 14:48:53.000000000 +0530
-@@ -91,6 +91,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct su
+--- linux-stage.orig/fs/ext4/super.c   2011-03-14 17:17:59.659645870 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-14 17:25:31.027104616 +0800
+@@ -90,6 +90,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct su
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
                (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-               (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
+                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
  }
 +EXPORT_SYMBOL(ext4_inode_bitmap);
  
  ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                              struct ext4_group_desc *bg)
  }
 +EXPORT_SYMBOL(ext4_inode_bitmap);
  
  ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                              struct ext4_group_desc *bg)
-@@ -113,6 +118,7 @@
-               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
-               (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+@@ -114,6 +115,7 @@
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+                (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
  }
 +EXPORT_SYMBOL(ext4_itable_unused_count);
  }
 +EXPORT_SYMBOL(ext4_itable_unused_count);
-
void ext4_block_bitmap_set(struct super_block *sb,
-                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
-@@ -1286,9 +1287,11 @@
-       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-       Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
-       Opt_usrquota, Opt_grpquota, Opt_i_version,
__u32 ext4_used_dirs_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+@@ -1434,9 +1436,11 @@
+       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
+       Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
+       Opt_usrquota, Opt_grpquota, Opt_i_version,
 +      Opt_mballoc, Opt_extents,
 +      Opt_mballoc, Opt_extents,
-       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_block_validity, Opt_noblock_validity,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
+       Opt_block_validity, Opt_noblock_validity,
 -      Opt_inode_readahead_blks, Opt_journal_ioprio
 +      Opt_inode_readahead_blks, Opt_journal_ioprio,
 +      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
  static match_table_t tokens = {
 -      Opt_inode_readahead_blks, Opt_journal_ioprio
 +      Opt_inode_readahead_blks, Opt_journal_ioprio,
 +      Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
  };
  
  static match_table_t tokens = {
-@@ -1346,6 +1348,11 @@
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_i_version, "i_version"},
+@@ -1491,6 +1495,11 @@
+       {Opt_barrier, "barrier"},
+       {Opt_nobarrier, "nobarrier"},
+       {Opt_i_version, "i_version"},
 +      {Opt_mballoc, "mballoc"},
 +      {Opt_extents, "extents"},
 +      {Opt_iopen, "iopen"},
 +      {Opt_noiopen, "noiopen"},
 +      {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_mballoc, "mballoc"},
 +      {Opt_extents, "extents"},
 +      {Opt_iopen, "iopen"},
 +      {Opt_noiopen, "noiopen"},
 +      {Opt_iopen_nopriv, "iopen_nopriv"},
-       {Opt_stripe, "stripe=%u"},
-       {Opt_resize, "resize"},
-       {Opt_delalloc, "delalloc"},
-@@ -1768,6 +1771,12 @@
-               case Opt_bigendian_extents:
-                       bigendian_extents = 1;
+       {Opt_stripe, "stripe=%u"},
+       {Opt_resize, "resize"},
+       {Opt_delalloc, "delalloc"},
+@@ -1930,6 +1939,12 @@
+                       else
+                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
                        break;
 +              case Opt_mballoc:
 +              case Opt_extents:
                        break;
 +              case Opt_mballoc:
 +              case Opt_extents:
@@ -284,64 +279,64 @@ Index: linux-2.6.27.21-0.1/fs/ext4/super.c
 +              case Opt_iopen_nopriv:
 +                      break;
                default:
 +              case Opt_iopen_nopriv:
 +                      break;
                default:
-                       printk(KERN_ERR
-                              "EXT4-fs: Unrecognized mount option \"%s\" "
-@@ -2768,7 +2771,7 @@
-                                             char *buf)
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+@@ -2480,7 +2495,7 @@
+                                             char *buf)
  {
  {
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
 -                      (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
 +                      (unsigned long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
  }
  
  static ssize_t session_write_kbytes_show(struct ext4_attr *a,
 -                      (s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
 +                      (unsigned long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
  }
  
  static ssize_t session_write_kbytes_show(struct ext4_attr *a,
-@@ -2868,11 +2871,11 @@
-       struct super_block *sb = sbi->s_buddy_cache->i_sb;
-
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
+@@ -2501,11 +2516,11 @@
+       struct super_block *sb = sbi->s_buddy_cache->i_sb;
+       return snprintf(buf, PAGE_SIZE, "%llu\n",
 -                      sbi->s_kbytes_written + 
 +                      (unsigned long long)(sbi->s_kbytes_written + 
 -                      sbi->s_kbytes_written + 
 +                      (unsigned long long)(sbi->s_kbytes_written + 
-                       (sb->s_bdev->bd_part ?
-                       (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
-                         EXT4_SB(sb)->s_sectors_written_start) >> 1
+                       (sb->s_bdev->bd_part ?
+                       (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+                         EXT4_SB(sb)->s_sectors_written_start) >> 1
 -                      : 0));
 +                      : 0)));
  }
  
  static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 -                      : 0));
 +                      : 0)));
  }
  
  static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
-@@ -3868,7 +3871,7 @@
-       if (blocks_count && ext4_blocks_count(es) > blocks_count) {
-               ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
-                      "exceeds size of device (%llu blocks)",
+@@ -2972,7 +2987,7 @@
+       if (blocks_count && ext4_blocks_count(es) > blocks_count) {
+               ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
+                      "exceeds size of device (%llu blocks)",
 -                     ext4_blocks_count(es), blocks_count);
 +                     ext4_blocks_count(es), (unsigned long long)blocks_count);
 -                     ext4_blocks_count(es), blocks_count);
 +                     ext4_blocks_count(es), (unsigned long long)blocks_count);
-               goto failed_mount;
-       }
-
-Index: linux-2.6.27.21-0.1/fs/ext4/fsync.c
+               goto failed_mount;
+       }
+Index: linux-stage/fs/ext4/fsync.c
 ===================================================================
 ===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/fsync.c   2009-07-07 14:47:19.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/fsync.c        2009-07-07 14:48:53.000000000 +0530
-@@ -1768,7 +1771,7 @@
+--- linux-stage.orig/fs/ext4/fsync.c   2011-03-14 17:17:57.533606303 +0800
++++ linux-stage/fs/ext4/fsync.c        2011-03-14 17:18:00.266657168 +0800
+@@ -56,7 +56,7 @@
  
  
-       trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
-                  inode->i_sb->s_id, datasync, inode->i_ino,
+       trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
+                  inode->i_sb->s_id, datasync, inode->i_ino,
 -                 dentry->d_parent->d_inode->i_ino);
 +                 0L);
 -                 dentry->d_parent->d_inode->i_ino);
 +                 0L);
-
-       /*
-        * data=writeback:
-Index: linux-2.6.27.21-0.1/fs/ext4/move_extent.c
+       ret = flush_aio_dio_completed_IO(inode);
+       if (ret < 0)
+Index: linux-stage/fs/ext4/move_extent.c
 ===================================================================
 ===================================================================
---- linux-2.6.27.21-0.1.orig/fs/ext4/move_extent.c     2009-07-07 14:47:19.000000000 +0530
-+++ linux-2.6.27.21-0.1/fs/ext4/move_extent.c  2009-07-07 14:48:53.000000000 +0530
-@@ -1768,7 +1771,8 @@
-                               ext4_error(orig_inode->i_sb, __func__,
-                                       "We replaced blocks too much! "
-                                       "sum of replaced: %llu requested: %llu",
+--- linux-stage.orig/fs/ext4/move_extent.c     2011-03-14 17:17:57.742610199 +0800
++++ linux-stage/fs/ext4/move_extent.c  2011-03-14 17:18:00.284657501 +0800
+@@ -1388,7 +1388,8 @@
+                               ext4_error(orig_inode->i_sb, __func__,
+                                       "We replaced blocks too much! "
+                                       "sum of replaced: %llu requested: %llu",
 -                                      *moved_len, len);
 +                                      (unsigned long long)(*moved_len),
 +                                      (unsigned long long)(len));
 -                                      *moved_len, len);
 +                                      (unsigned long long)(*moved_len),
 +                                      (unsigned long long)(len));
-                               ret1 = -EIO;
-                               goto out;
-                       }
+                               ret1 = -EIO;
+                               goto out;
+                       }
diff --git a/ldiskfs/kernel_patches/patches/ext4-misc-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-misc-rhel6.patch
new file mode 100644 (file)
index 0000000..126e659
--- /dev/null
@@ -0,0 +1,255 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h       2011-03-14 16:33:17.087088010 +0800
++++ linux-stage/fs/ext4/ext4_jbd2.h    2011-03-14 16:42:28.416591789 +0800
+@@ -35,6 +35,8 @@
+       (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)   \
+        ? 27U : 8U)
++#define ext4_journal_dirty_metadata(handle, bh)  \
++              ext4_handle_dirty_metadata(handle, NULL, bh)
+ /* Extended attribute operations touch at most two data buffers,
+  * two bitmap buffers, and two group summaries, in addition to the inode
+  * and the superblock, which are already accounted for. */
+Index: linux-stage/fs/ext4/ext4_extents.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_extents.h    2011-03-14 16:33:17.076087785 +0800
++++ linux-stage/fs/ext4/ext4_extents.h 2011-03-14 16:43:08.254267525 +0800
+@@ -58,6 +58,12 @@
+  */
+ #define EXT_STATS_
++/*
++ * define EXT4_ALLOC_NEEDED to 0 since block bitmap, group desc. and sb
++ * are now accounted in ext4_ext_calc_credits_for_insert()
++ */
++#define EXT4_ALLOC_NEEDED 0
++#define HAVE_EXT_PREPARE_CB_EXTENT
+ /*
+  * ext4_inode has i_block array (60 bytes total).
+@@ -160,6 +166,7 @@
+ #define EXT_INIT_MAX_LEN      (1UL << 15)
+ #define EXT_UNINIT_MAX_LEN    (EXT_INIT_MAX_LEN - 1)
++#define EXT4_EXT_HAS_NO_TREE  /* ext4_extents_tree struct is not used*/
+ #define EXT_FIRST_EXTENT(__hdr__) \
+       ((struct ext4_extent *) (((char *) (__hdr__)) +         \
+@@ -239,6 +246,8 @@
+ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
+                                                  int num,
+                                                  struct ext4_ext_path *path);
++extern int ext4_ext_calc_credits_for_insert(struct inode *,
++                                          struct ext4_ext_path *);
+ extern int ext4_can_extents_be_merged(struct inode *inode,
+                                     struct ext4_extent *ex1,
+                                     struct ext4_extent *ex2);
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c 2011-03-14 16:33:27.072292006 +0800
++++ linux-stage/fs/ext4/mballoc.c      2011-03-14 16:41:02.500138039 +0800
+@@ -4039,6 +4039,7 @@
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
+ }
++EXPORT_SYMBOL(ext4_discard_preallocations);
+ /*
+  * finds all preallocated spaces and return blocks being freed to them
+@@ -4831,3 +4832,6 @@
+               kmem_cache_free(ext4_ac_cachep, ac);
+       return;
+ }
++
++EXPORT_SYMBOL(ext4_free_blocks);
++
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-14 16:33:17.036086967 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-14 16:41:14.964348396 +0800
+@@ -127,6 +127,7 @@
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+                (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+ }
++EXPORT_SYMBOL(ext4_itable_unused_count);
+ void ext4_block_bitmap_set(struct super_block *sb,
+                          struct ext4_group_desc *bg, ext4_fsblk_t blk)
+@@ -1491,6 +1492,7 @@
+       Opt_block_validity, Opt_noblock_validity,
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_discard, Opt_nodiscard,
++      Opt_mballoc,
+ };
+ static const match_table_t tokens = {
+@@ -1557,6 +1559,7 @@
+       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
+       {Opt_auto_da_alloc, "auto_da_alloc"},
+       {Opt_noauto_da_alloc, "noauto_da_alloc"},
++      {Opt_mballoc, "mballoc"},
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
+       {Opt_err, NULL},
+@@ -1997,6 +2000,8 @@
+               case Opt_nodiscard:
+                       clear_opt(sbi->s_mount_opt, DISCARD);
+                       break;
++              case Opt_mballoc:
++                      break;
+               default:
+                       ext4_msg(sb, KERN_ERR,
+                              "Unrecognized mount option \"%s\" "
+Index: linux-stage/fs/ext4/ext4_jbd2.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.c       2011-03-14 16:33:17.049087232 +0800
++++ linux-stage/fs/ext4/ext4_jbd2.c    2011-03-14 16:34:39.849759386 +0800
+@@ -31,6 +31,7 @@
+       }
+       return err;
+ }
++EXPORT_SYMBOL(__ext4_journal_get_write_access);
+ int __ext4_journal_forget(const char *where, handle_t *handle,
+                               struct buffer_head *bh)
+@@ -107,3 +108,4 @@
+       }
+       return err;
+ }
++EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-14 16:33:17.056087375 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-14 16:45:40.754870806 +0800
+@@ -1110,6 +1110,9 @@
+ #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
++/* Has been moved to linux/magic.h but we need it for Lustre */
++#define EXT4_SUPER_MAGIC      0xEF53
++
+ /*
+  * Codes for operating systems
+  */
+@@ -1528,6 +1531,8 @@
+ extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
+ extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
+                                               ext4_group_t, int);
++extern void ext4_mb_discard_inode_preallocations(struct inode *);
++
+ /* inode.c */
+ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+               struct buffer_head *bh, ext4_fsblk_t blocknr);
+Index: linux-stage/fs/ext4/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext4/inode.c   2011-03-14 16:33:17.063087519 +0800
++++ linux-stage/fs/ext4/inode.c        2011-03-14 16:34:39.913760434 +0800
+@@ -5199,6 +5199,7 @@
+       iget_failed(inode);
+       return ERR_PTR(ret);
+ }
++EXPORT_SYMBOL(ext4_iget);
+ static int ext4_inode_blocks_set(handle_t *handle,
+                               struct ext4_inode *raw_inode,
+Index: linux-stage/fs/ext4/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext4/extents.c 2011-03-14 16:33:17.070087661 +0800
++++ linux-stage/fs/ext4/extents.c      2011-03-14 16:41:04.894178430 +0800
+@@ -1866,9 +1866,7 @@
+       while (block < last && block != EXT_MAX_BLOCK) {
+               num = last - block;
+               /* find extent for this block */
+-              down_read(&EXT4_I(inode)->i_data_sem);
+               path = ext4_ext_find_extent(inode, block, path);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+               if (IS_ERR(path)) {
+                       err = PTR_ERR(path);
+                       path = NULL;
+@@ -1965,6 +1963,7 @@
+       return err;
+ }
++EXPORT_SYMBOL(ext4_ext_walk_space);
+ static void
+ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
+@@ -2133,6 +2132,55 @@
+ }
+ /*
++ * This routine returns max. credits extent tree can consume.
++ * It should be OK for low-performance paths like ->writepage()
++ * To allow many writing process to fit a single transaction,
++ * caller should calculate credits under truncate_mutex and
++ * pass actual path.
++ */
++int ext4_ext_calc_credits_for_insert(struct inode *inode,
++                                   struct ext4_ext_path *path)
++{
++      int depth, needed;
++
++      if (path) {
++              /* probably there is space in leaf? */
++              depth = ext_depth(inode);
++              if (le16_to_cpu(path[depth].p_hdr->eh_entries)
++                              < le16_to_cpu(path[depth].p_hdr->eh_max))
++                      return 1;
++      }
++
++      /*
++       * given 32bit logical block (4294967296 blocks), max. tree
++       * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
++       * let's also add one more level for imbalance.
++       */
++      depth = 5;
++
++      /* allocation of new data block(s) */
++      needed = 2;
++
++      /*
++       * tree can be full, so it'd need to grow in depth:
++       * we need one credit to modify old root, credits for
++       * new root will be added in split accounting
++       */
++      needed += 1;
++      /*
++       * Index split can happen, we'd need:
++       *    allocate intermediate indexes (bitmap + group)
++       *  + change two blocks at each level, but root (already included)
++       */
++      needed += (depth * 2) + (depth * 2);
++
++      /* any allocation modifies superblock */
++      needed += 1;
++
++      return needed;
++}
++
++/*
+  * How many index/leaf blocks need to change/allocate to modify nrblocks?
+  *
+  * if nrblocks are fit in a single extent (chunk flag is 1), then
+@@ -3934,10 +3982,21 @@
+                * Walk the extent tree gathering extent information.
+                * ext4_ext_fiemap_cb will push extents back to user.
+                */
++              down_read(&EXT4_I(inode)->i_data_sem);
+               error = ext4_ext_walk_space(inode, start_blk, len_blks,
+                                         ext4_ext_fiemap_cb, fieinfo);
++              up_read(&EXT4_I(inode)->i_data_sem);
+       }
+       return error;
+ }
++EXPORT_SYMBOL(ext4_ext_store_pblock);
++EXPORT_SYMBOL(ext4_ext_search_right);
++EXPORT_SYMBOL(ext4_ext_search_left);
++EXPORT_SYMBOL(ext_pblock);
++EXPORT_SYMBOL(ext4_ext_insert_extent);
++EXPORT_SYMBOL(ext4_mb_new_blocks);
++EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
++EXPORT_SYMBOL(ext4_mark_inode_dirty);
++
diff --git a/ldiskfs/kernel_patches/patches/ext4-mmp-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-mmp-rhel6.patch
new file mode 100644 (file)
index 0000000..83777e3
--- /dev/null
@@ -0,0 +1,479 @@
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-03 15:25:02.376539424 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-05 12:24:02.918774335 +0800
+@@ -40,6 +40,8 @@
+ #include <linux/log2.h>
+ #include <linux/crc16.h>
+ #include <asm/uaccess.h>
++#include <linux/kthread.h>
++#include <linux/utsname.h>
+ #include "ext4.h"
+ #include "ext4_jbd2.h"
+@@ -700,6 +702,8 @@
+               invalidate_bdev(sbi->journal_bdev);
+               ext4_blkdev_remove(sbi);
+       }
++      if (sbi->s_mmp_tsk)
++              kthread_stop(sbi->s_mmp_tsk);
+       sb->s_fs_info = NULL;
+       /*
+        * Now that we are completely done shutting down the
+@@ -970,6 +974,344 @@
+       return 0;
+ }
++/*
++ * Write the MMP block using WRITE_SYNC to try to get the block on-disk
++ * faster.
++ */
++static int write_mmp_block(struct buffer_head *bh)
++{
++      mark_buffer_dirty(bh);
++      lock_buffer(bh);
++      bh->b_end_io = end_buffer_write_sync;
++      get_bh(bh);
++      submit_bh(WRITE_SYNC, bh);
++      wait_on_buffer(bh);
++      if (unlikely(!buffer_uptodate(bh)))
++              return 1;
++
++      return 0;
++}
++
++/*
++ * Read the MMP block. It _must_ be read from disk and hence we clear the
++ * uptodate flag on the buffer.
++ */
++static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
++                        unsigned long mmp_block)
++{
++      struct mmp_struct *mmp;
++
++      if (*bh)
++              clear_buffer_uptodate(*bh);
++
++#if 0
++      brelse(*bh);
++
++      *bh = sb_bread(sb, mmp_block);
++#else
++      if (!*bh)
++              *bh = sb_getblk(sb, mmp_block);
++      if (*bh) {
++              get_bh(*bh);
++              lock_buffer(*bh);
++              (*bh)->b_end_io = end_buffer_read_sync;
++              submit_bh(READ_SYNC, *bh);
++              wait_on_buffer(*bh);
++              if (!buffer_uptodate(*bh)) {
++                      brelse(*bh);
++                      *bh = NULL;
++              }
++      }
++#endif
++      if (!*bh) {
++              ext4_warning(sb,
++                           "Error while reading MMP block %lu", mmp_block);
++              return -EIO;
++      }
++
++      mmp = (struct mmp_struct *)((*bh)->b_data);
++      if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
++              return -EINVAL;
++
++      return 0;
++}
++
++/*
++ * Dump as much information as possible to help the admin.
++ */
++static void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
++                       const char *function, const char *msg)
++{
++      __ext4_warning(sb, function, msg);
++      __ext4_warning(sb, function, "MMP failure info: last update time: %llu, "
++                   "last update node: %s, last update device: %s\n",
++                   (long long unsigned int)le64_to_cpu(mmp->mmp_time),
++                   mmp->mmp_nodename, mmp->mmp_bdevname);
++}
++
++/*
++ * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
++ */
++static int kmmpd(void *data)
++{
++      struct super_block *sb = (struct super_block *) data;
++      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++      struct buffer_head *bh = NULL;
++      struct mmp_struct *mmp;
++      unsigned long mmp_block;
++      u32 seq = 0;
++      unsigned long failed_writes = 0;
++      int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
++      unsigned mmp_check_interval;
++      unsigned long last_update_time;
++      unsigned long diff;
++      int retval;
++
++      mmp_block = le64_to_cpu(es->s_mmp_block);
++      retval = read_mmp_block(sb, &bh, mmp_block);
++      if (retval)
++              goto failed;
++
++      mmp = (struct mmp_struct *)(bh->b_data);
++      mmp->mmp_time = cpu_to_le64(get_seconds());
++      /*
++       * Start with the higher mmp_check_interval and reduce it if
++       * the MMP block is being updated on time.
++       */
++      mmp_check_interval = max(5 * mmp_update_interval,
++                               EXT4_MMP_MIN_CHECK_INTERVAL);
++      mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++      bdevname(bh->b_bdev, mmp->mmp_bdevname);
++
++      memcpy(mmp->mmp_nodename, init_utsname()->sysname,
++             sizeof(mmp->mmp_nodename));
++
++      while (!kthread_should_stop()) {
++              if (++seq > EXT4_MMP_SEQ_MAX)
++                      seq = 1;
++
++              mmp->mmp_seq = cpu_to_le32(seq);
++              mmp->mmp_time = cpu_to_le64(get_seconds());
++              last_update_time = jiffies;
++
++              retval = write_mmp_block(bh);
++              /*
++               * Don't spew too many error messages. Print one every
++               * (s_mmp_update_interval * 60) seconds.
++               */
++              if (retval && (failed_writes % 60) == 0) {
++                      ext4_error(sb,
++                                 "Error writing to MMP block");
++                      failed_writes++;
++              }
++
++              if (!(le32_to_cpu(es->s_feature_incompat) &
++                  EXT4_FEATURE_INCOMPAT_MMP)) {
++                      ext4_warning(sb, "kmmpd being stopped "
++                                   "since MMP feature has been disabled.");
++                      EXT4_SB(sb)->s_mmp_tsk = 0;
++                      goto failed;
++              }
++
++              if (sb->s_flags & MS_RDONLY) {
++                      ext4_warning(sb, "kmmpd being stopped "
++                                   "since filesystem has been remounted as "
++                                   "readonly.");
++                      EXT4_SB(sb)->s_mmp_tsk = 0;
++                      goto failed;
++              }
++
++              diff = jiffies - last_update_time;
++              if (diff < mmp_update_interval * HZ)
++                      schedule_timeout_interruptible(mmp_update_interval *
++                                                     HZ - diff);
++
++              /*
++               * We need to make sure that more than mmp_check_interval
++               * seconds have not passed since writing. If that has happened
++               * we need to check if the MMP block is as we left it.
++               */
++              diff = jiffies - last_update_time;
++              if (diff > mmp_check_interval * HZ) {
++                      struct buffer_head *bh_check = NULL;
++                      struct mmp_struct *mmp_check;
++
++                      retval = read_mmp_block(sb, &bh_check, mmp_block);
++                      if (retval) {
++                              EXT4_SB(sb)->s_mmp_tsk = 0;
++                              goto failed;
++                      }
++
++                      mmp_check = (struct mmp_struct *)(bh_check->b_data);
++                      if (mmp->mmp_time != mmp_check->mmp_time ||
++                          memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
++                                 sizeof(mmp->mmp_nodename)))
++                              dump_mmp_msg(sb, mmp_check, __func__,
++                                           "Error while updating MMP info. "
++                                           "The filesystem seems to have "
++                                           "been multiply mounted.");
++
++                      put_bh(bh_check);
++              }
++
++              /*
++               * Adjust the mmp_check_interval depending on how much time
++               * it took for the MMP block to be written.
++               */
++              mmp_check_interval = max(5 * diff / HZ,
++                               (unsigned long) EXT4_MMP_MIN_CHECK_INTERVAL);
++              mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
++      }
++
++      /*
++       * Unmount seems to be clean.
++       */
++      mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
++      mmp->mmp_time = cpu_to_le64(get_seconds());
++
++      retval = write_mmp_block(bh);
++
++failed:
++      brelse(bh);
++      return retval;
++}
++
++/*
++ * Get a random new sequence number but make sure it is not greater than
++ * EXT4_MMP_SEQ_MAX.
++ */
++static unsigned int mmp_new_seq(void)
++{
++      u32 new_seq;
++
++      do {
++              get_random_bytes(&new_seq, sizeof(u32));
++      } while (new_seq > EXT4_MMP_SEQ_MAX);
++
++      return new_seq;
++}
++
++/*
++ * Protect the filesystem from being mounted more than once.
++ */
++static int ext4_multi_mount_protect(struct super_block *sb,
++                                  unsigned long mmp_block)
++{
++      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++      struct buffer_head *bh = NULL;
++      struct mmp_struct *mmp = NULL;
++      u32 seq;
++      unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
++      unsigned int wait_time = 0;
++      int retval;
++
++      if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
++          mmp_block >= ext4_blocks_count(es)) {
++              ext4_warning(sb,
++                           "Invalid MMP block in superblock");
++              goto failed;
++      }
++
++      retval = read_mmp_block(sb, &bh, mmp_block);
++      if (retval)
++              goto failed;
++
++      mmp = (struct mmp_struct *)(bh->b_data);
++
++      if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
++              mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
++
++      /*
++       * If check_interval in MMP block is larger, use that instead of
++       * update_interval from the superblock.
++       */
++      if (mmp->mmp_check_interval > mmp_check_interval)
++              mmp_check_interval = mmp->mmp_check_interval;
++
++      seq = le32_to_cpu(mmp->mmp_seq);
++      if (seq == EXT4_MMP_SEQ_CLEAN)
++              goto skip;
++
++      if (seq == EXT4_MMP_SEQ_FSCK) {
++              dump_mmp_msg(sb, mmp, __func__,
++                           "fsck is running on the filesystem");
++              goto failed;
++      }
++
++      wait_time = min(mmp_check_interval * 2 + 1,
++              mmp_check_interval + 60);
++
++      /* Print MMP interval if more than 20 secs. */
++      if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
++              ext4_warning(sb, "MMP interval %u higher than "
++                           "expected, please wait.\n", wait_time * 2);
++
++      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++              ext4_warning(sb, "MMP startup interrupted, failing "
++                           "mount\n");
++              goto failed;
++      }
++
++      retval = read_mmp_block(sb, &bh, mmp_block);
++      if (retval)
++              goto failed;
++      mmp = (struct mmp_struct *)(bh->b_data);
++      if (seq != le32_to_cpu(mmp->mmp_seq)) {
++              dump_mmp_msg(sb, mmp, __func__,
++                           "Device is already active on another node.");
++              goto failed;
++      }
++
++skip:
++      /*
++       * write a new random sequence number.
++       */
++      mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
++
++      retval = write_mmp_block(bh);
++      if (retval)
++              goto failed;
++
++      /*
++       * wait for MMP interval and check mmp_seq.
++       */
++      if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
++              ext4_warning(sb, "MMP startup interrupted, failing "
++                           "mount\n");
++              goto failed;
++      }
++
++      retval = read_mmp_block(sb, &bh, mmp_block);
++      if (retval)
++              goto failed;
++      mmp = (struct mmp_struct *)(bh->b_data);
++      if (seq != le32_to_cpu(mmp->mmp_seq)) {
++              dump_mmp_msg(sb, mmp, __func__,
++                           "Device is already active on another node.");
++              goto failed;
++      }
++
++      /*
++       * Start a kernel thread to update the MMP block periodically.
++       */
++      EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x",
++                                           MAJOR(sb->s_dev),
++                                           MINOR(sb->s_dev));
++      if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
++              EXT4_SB(sb)->s_mmp_tsk = 0;
++              ext4_warning(sb, "Unable to create kmmpd thread "
++                           "for %s.", sb->s_id);
++              goto failed;
++      }
++
++      brelse(bh);
++      return 0;
++
++failed:
++      brelse(bh);
++      return 1;
++}
++
+ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
+                                       u64 ino, u32 generation)
+ {
+@@ -2816,6 +3158,11 @@
+                         EXT4_HAS_INCOMPAT_FEATURE(sb,
+                                   EXT4_FEATURE_INCOMPAT_RECOVER));
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
++          !(sb->s_flags & MS_RDONLY))
++              if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
++                      goto failed_mount3;
++
+       /*
+        * The first inode we look at is the journal inode.  Don't try
+        * root first: it may be modified in the journal!
+@@ -3052,6 +3399,8 @@
+       percpu_counter_destroy(&sbi->s_freeinodes_counter);
+       percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
++      if (sbi->s_mmp_tsk)
++              kthread_stop(sbi->s_mmp_tsk);
+ failed_mount2:
+       for (i = 0; i < db_count; i++)
+               brelse(sbi->s_group_desc[i]);
+@@ -3560,7 +3909,7 @@
+       struct ext4_mount_options old_opts;
+       ext4_group_t g;
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+-      int err;
++      int err = 0;
+ #ifdef CONFIG_QUOTA
+       int i;
+ #endif
+@@ -3682,6 +4031,13 @@
+                               goto restore_opts;
+                       if (!ext4_setup_super(sb, es, 0))
+                               sb->s_flags &= ~MS_RDONLY;
++                      if (EXT4_HAS_INCOMPAT_FEATURE(sb,
++                                                  EXT4_FEATURE_INCOMPAT_MMP))
++                              if (ext4_multi_mount_protect(sb,
++                                              le64_to_cpu(es->s_mmp_block))) {
++                                      err = -EROFS;
++                                      goto restore_opts;
++                              }
+               }
+       }
+       ext4_setup_system_zone(sb);
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-03 15:25:02.507538421 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-05 12:25:16.343986732 +0800
+@@ -894,7 +894,7 @@
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+-      __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
++      __le16  s_mmp_update_interval;  /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+@@ -1041,6 +1041,9 @@
+       /* workqueue for dio unwritten */
+       struct workqueue_struct *dio_unwritten_wq;
++
++      /* Kernel thread for multiple mount protection */
++      struct task_struct *s_mmp_tsk;
+ };
+ static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
+@@ -1177,7 +1180,8 @@
+                                        EXT4_FEATURE_INCOMPAT_META_BG| \
+                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+-                                       EXT4_FEATURE_INCOMPAT_FLEX_BG)
++                                       EXT4_FEATURE_INCOMPAT_FLEX_BG| \
++                                       EXT4_FEATURE_INCOMPAT_MMP)
+ #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+@@ -1384,6 +1388,34 @@
+ extern struct proc_dir_entry *ext4_proc_root;
+ /*
++ * This structure will be used for multiple mount protection. It will be
++ * written into the block number saved in the s_mmp_block field in the
++ * superblock. Programs that check MMP should assume that if
++ * SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
++ * to use the filesystem, regardless of how old the timestamp is.
++ */
++#define EXT4_MMP_MAGIC     0x004D4D50U /* ASCII for MMP */
++#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
++#define EXT4_MMP_SEQ_FSCK  0xE24D4D50U /* mmp_seq value when being fscked */
++#define EXT4_MMP_SEQ_MAX   0xE24D4D4FU /* maximum valid mmp_seq value */
++
++struct mmp_struct {
++      __le32  mmp_magic;
++      __le32  mmp_seq;
++      __le64  mmp_time;
++      char    mmp_nodename[64];
++      char    mmp_bdevname[32];
++      __le16  mmp_check_interval;
++      __le16  mmp_pad1;
++      __le32  mmp_pad2[227];
++};
++
++/*
++ * Minimum interval for MMP checking in seconds.
++ */
++#define EXT4_MMP_MIN_CHECK_INTERVAL   5
++
++/*
+  * Function prototypes
+  */
diff --git a/ldiskfs/kernel_patches/patches/ext4-osd-iam-exports-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-osd-iam-exports-rhel6.patch
new file mode 100644 (file)
index 0000000..3bae32f
--- /dev/null
@@ -0,0 +1,64 @@
+diff -rupN 2.6.27.21_2/fs/ext4/ext4.h 2.6.27.21_3/fs/ext4/ext4.h
+--- 2.6.27.21_2/fs/ext4/ext4.h 2009-07-17 12:19:59.000000000 +0530
++++ 2.6.27.21_3/fs/ext4/ext4.h 2009-07-17 12:38:59.000000000 +0530
+@@ -1181,6 +1181,9 @@ extern int ext4_orphan_add(handle_t *, s
+ #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
+ extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
+                              struct inode *inode);
++extern struct buffer_head *ext4_append(handle_t *handle,
++                                     struct inode *inode,
++                                     ext4_lblk_t *block, int *err);
+ /* resize.c */
+ extern int ext4_group_add(struct super_block *sb,
+diff -rupN 2.6.27.21_2/fs/ext4/hash.c 2.6.27.21_3/fs/ext4/hash.c
+--- 2.6.27.21_2/fs/ext4/hash.c 2009-07-17 12:12:56.000000000 +0530
++++ 2.6.27.21_3/fs/ext4/hash.c 2009-07-17 12:40:22.000000000 +0530
+@@ -9,6 +9,7 @@
+  * License.
+  */
++#include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/jbd2.h>
+ #include <linux/cryptohash.h>
+@@ -206,3 +207,4 @@ int ext4fs_dirhash(const char *name, int
+       hinfo->minor_hash = minor_hash;
+       return 0;
+ }
++EXPORT_SYMBOL(ext4fs_dirhash);
+diff -rupN 2.6.27.21_2/fs/ext4/namei.c 2.6.27.21_3/fs/ext4/namei.c
+--- 2.6.27.21_2/fs/ext4/namei.c        2009-07-17 12:23:51.000000000 +0530
++++ 2.6.27.21_3/fs/ext4/namei.c        2009-07-17 12:37:59.000000000 +0530
+@@ -51,9 +51,9 @@
+ #define NAMEI_RA_SIZE      (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+-static struct buffer_head *ext4_append(handle_t *handle,
+-                                      struct inode *inode,
+-                                      ext4_lblk_t *block, int *err)
++struct buffer_head *ext4_append(handle_t *handle,
++                              struct inode *inode,
++                              ext4_lblk_t *block, int *err)
+ {
+       struct buffer_head *bh;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+@@ -72,6 +72,7 @@ static struct buffer_head *ext4_append(h
+       up(&ei->i_append_sem);
+       return bh;
+ }
++EXPORT_SYMBOL(ext4_append);
+ #ifndef assert
+ #define assert(test) J_ASSERT(test)
+diff -rupN 2.6.27.21_2/fs/ext4/super.c 2.6.27.21_3/fs/ext4/super.c
+--- 2.6.27.21_2/fs/ext4/super.c        2009-07-17 12:12:57.000000000 +0530
++++ 2.6.27.21_3/fs/ext4/super.c        2009-07-17 12:40:52.000000000 +0530
+@@ -377,6 +377,7 @@ void __ext4_std_error(struct super_block
+       ext4_handle_error(sb);
+ }
++EXPORT_SYMBOL(__ext4_std_error);
+ /*
+  * ext4_abort is a much stronger failure handler than ext4_error.  The
diff --git a/ldiskfs/kernel_patches/patches/ext4-osd-iop-common-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-osd-iop-common-rhel6.patch
new file mode 100644 (file)
index 0000000..228c1c4
--- /dev/null
@@ -0,0 +1,226 @@
+Index: linux-2.6.32.i386/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/ext4.h      2010-04-16 04:57:39.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/ext4.h   2010-04-16 05:27:02.000000000 +0530
+@@ -1512,6 +1512,19 @@
+ extern int ext4_orphan_del(handle_t *, struct inode *);
+ extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+                               __u32 start_minor_hash, __u32 *next_hash);
++extern struct inode *ext4_create_inode(handle_t *handle,
++                                     struct inode * dir, int mode);
++extern int ext4_add_entry(handle_t *handle, struct dentry *dentry,
++                        struct inode *inode);
++extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
++                           struct ext4_dir_entry_2 * de_del,
++                           struct buffer_head * bh);
++extern struct buffer_head * ext4_find_entry(struct inode *dir,
++                                          const struct qstr *d_name,
++                                          struct ext4_dir_entry_2 ** res_dir);
++#define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
++extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
++                             struct inode *inode);
+ /* resize.c */
+ extern int ext4_group_add(struct super_block *sb,
+Index: linux-2.6.32.i386/fs/ext4/namei.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/namei.c     2010-04-16 04:57:39.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/namei.c  2010-04-16 05:28:25.000000000 +0530
+@@ -24,6 +24,7 @@
+  *    Theodore Ts'o, 2002
+  */
++#include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+ #include <linux/jbd2.h>
+@@ -902,9 +903,9 @@
+  * The returned buffer_head has ->b_count elevated.  The caller is expected
+  * to brelse() it when appropriate.
+  */
+-static struct buffer_head * ext4_find_entry (struct inode *dir,
+-                                      const struct qstr *d_name,
+-                                      struct ext4_dir_entry_2 ** res_dir)
++struct buffer_head * ext4_find_entry(struct inode *dir,
++                                    const struct qstr *d_name,
++                                    struct ext4_dir_entry_2 ** res_dir)
+ {
+       struct super_block *sb;
+       struct buffer_head *bh_use[NAMEI_RA_SIZE];
+@@ -1011,6 +1012,7 @@
+               brelse(bh_use[ra_ptr]);
+       return ret;
+ }
++EXPORT_SYMBOL(ext4_find_entry);
+ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
+                      struct ext4_dir_entry_2 **res_dir, int *err)
+@@ -1538,8 +1540,8 @@
+  * may not sleep between calling this and putting something into
+  * the entry, as someone else might have used it while you slept.
+  */
+-static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+-                        struct inode *inode)
++int ext4_add_entry(handle_t *handle, struct dentry *dentry,
++                 struct inode *inode)
+ {
+       struct inode *dir = dentry->d_parent->d_inode;
+       struct buffer_head *bh;
+@@ -1588,6 +1590,7 @@
+       brelse(bh);
+       return retval;
+ }
++EXPORT_SYMBOL(ext4_add_entry);
+ /*
+  * Returns 0 for success, or a negative error value
+@@ -1728,10 +1731,10 @@
+  * ext4_delete_entry deletes a directory entry by merging it with the
+  * previous entry
+  */
+-static int ext4_delete_entry(handle_t *handle,
+-                           struct inode *dir,
+-                           struct ext4_dir_entry_2 *de_del,
+-                           struct buffer_head *bh)
++int ext4_delete_entry(handle_t *handle,
++                    struct inode *dir,
++                    struct ext4_dir_entry_2 *de_del,
++                    struct buffer_head *bh)
+ {
+       struct ext4_dir_entry_2 *de, *pde;
+       unsigned int blocksize = dir->i_sb->s_blocksize;
+@@ -1766,7 +1769,7 @@
+       }
+       return -ENOENT;
+ }
+-
++EXPORT_SYMBOL(ext4_delete_entry);
+ /*
+  * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
+  * since this indicates that nlinks count was previously 1.
+@@ -1831,6 +1834,26 @@
+       return inum;
+ }
++struct inode * ext4_create_inode(handle_t *handle, struct inode * dir, int mode)
++{
++      struct inode *inode;
++
++      inode = ext4_new_inode(handle, dir, mode, 0, 0);
++      if (!IS_ERR(inode)) {
++              if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
++#ifdef CONFIG_LDISKFS_FS_XATTR
++                      inode->i_op = &ext4_special_inode_operations;
++#endif
++              } else {
++                      inode->i_op = &ext4_file_inode_operations;
++                      inode->i_fop = &ext4_file_operations;
++                      ext4_set_aops(inode);
++              }
++      }
++      return inode;
++}
++EXPORT_SYMBOL(ext4_create_inode);
++
+ /*
+  * By the time this is called, we already have created
+  * the directory cache entry for the new file, but it
+@@ -1905,40 +1928,33 @@
+       return err;
+ }
+-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++/* Initialize @inode as a subdirectory of @dir, and add the
++ * "." and ".." entries into the first directory block. */
++int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
++                      struct inode *inode)
+ {
+-      handle_t *handle;
+-      struct inode *inode;
+-      struct buffer_head *dir_block;
+-      struct ext4_dir_entry_2 *de;
++      struct buffer_head * dir_block;
++      struct ext4_dir_entry_2 * de;
+       unsigned int blocksize = dir->i_sb->s_blocksize;
+-      int err, retries = 0;
+-
+-      if (EXT4_DIR_LINK_MAX(dir))
+-              return -EMLINK;
++      int err = 0;
+-retry:
+-      handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+-                                      EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+-                                      EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+       if (IS_DIRSYNC(dir))
+               ext4_handle_sync(handle);
+-      inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
+-                             &dentry->d_name, 0);
+-      err = PTR_ERR(inode);
+-      if (IS_ERR(inode))
+-              goto out_stop;
+       inode->i_op = &ext4_dir_inode_operations;
+       inode->i_fop = &ext4_dir_operations;
+       inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+       dir_block = ext4_bread(handle, inode, 0, 1, &err);
+-      if (!dir_block)
+-              goto out_clear_inode;
++      if (!dir_block) {
++              clear_nlink(inode);
++              ext4_mark_inode_dirty(handle, inode);
++              iput (inode);
++              goto get_out;
++      }
+       BUFFER_TRACE(dir_block, "get_write_access");
+       ext4_journal_get_write_access(handle, dir_block);
+       de = (struct ext4_dir_entry_2 *) dir_block->b_data;
+@@ -1960,9 +1976,43 @@
+       ext4_handle_dirty_metadata(handle, dir, dir_block);
+       brelse(dir_block);
+       ext4_mark_inode_dirty(handle, inode);
++get_out:
++      return err;
++}
++EXPORT_SYMBOL(ext4_add_dot_dotdot);
++
++
++static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++{
++      handle_t *handle;
++      struct inode *inode;
++      int err, retries = 0;
++
++      if (EXT4_DIR_LINK_MAX(dir))
++              return -EMLINK;
++
++retry:
++      handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
++                                      EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
++                                      2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      if (IS_DIRSYNC(dir))
++              handle->h_sync = 1;
++
++      inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
++                             &dentry->d_name, ext4_dentry_goal(dir->i_sb, dentry));
++      err = PTR_ERR(inode);
++      if (IS_ERR(inode))
++              goto out_stop;
++
++      err = ext4_add_dot_dotdot(handle, dir, inode);
++      if (err)
++              goto out_stop;
++
+       err = ext4_add_entry(handle, dentry, inode);
+       if (err) {
+-out_clear_inode:
+               clear_nlink(inode);
+               unlock_new_inode(inode);
+               ext4_mark_inode_dirty(handle, inode);
diff --git a/ldiskfs/kernel_patches/patches/ext4-pdir-fix-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-pdir-fix-rhel6.patch
new file mode 100644 (file)
index 0000000..fc7c791
--- /dev/null
@@ -0,0 +1,62 @@
+Index: linux-2.6.32.i386/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/ext4.h      2010-04-16 03:39:11.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/ext4.h   2010-04-16 04:27:41.000000000 +0530
+@@ -29,6 +29,7 @@
+ #ifndef _EXT4_H
+ #define _EXT4_H
++#include <linux/dynlocks.h>
+ #include <linux/types.h>
+ #include <linux/blkdev.h>
+ #include <linux/magic.h>
+@@ -621,6 +622,10 @@
+       ext4_fsblk_t    i_file_acl;
+       __u32   i_dtime;
++      /* following fields for parallel directory operations -bzzz */
++      struct dynlock   i_htree_lock;
++      struct semaphore i_append_sem;
++
+       /*
+        * i_block_group is the number of the block group which contains
+        * this file's inode.  Constant across the lifetime of the inode,
+Index: linux-2.6.32.i386/fs/ext4/namei.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/namei.c     2010-04-15 07:42:15.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/namei.c  2010-04-16 04:26:03.000000000 +0530
+@@ -54,6 +54,11 @@
+                                       ext4_lblk_t *block, int *err)
+ {
+       struct buffer_head *bh;
++      struct ext4_inode_info *ei = EXT4_I(inode);
++
++      /* with parallel dir operations all appends
++      * have to be serialized -bzzz */
++      down(&ei->i_append_sem);
+       *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+@@ -66,7 +71,9 @@
+                       brelse(bh);
+                       bh = NULL;
+               }
++              ei->i_disksize = inode->i_size;
+       }
++      up(&ei->i_append_sem);
+       return bh;
+ }
+Index: linux-2.6.32.i386/fs/ext4/super.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/super.c     2010-04-16 03:39:11.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/super.c  2010-04-16 04:26:03.000000000 +0530
+@@ -700,6 +700,8 @@
+       ei->vfs_inode.i_version = 1;
+       ei->vfs_inode.i_data.writeback_index = 0;
++      dynlock_init(&ei->i_htree_lock);
++      sema_init(&ei->i_append_sem, 1);
+       memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
+       INIT_LIST_HEAD(&ei->i_prealloc_list);
+       spin_lock_init(&ei->i_prealloc_lock);
diff --git a/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel6.patch
new file mode 100644 (file)
index 0000000..dd3252d
--- /dev/null
@@ -0,0 +1,366 @@
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h    2011-03-11 14:17:02.000000000 +0800
++++ linux-stage/fs/ext4/ext4.h 2011-03-11 14:20:08.269063193 +0800
+@@ -999,11 +999,14 @@
+       /* tunables */
+       unsigned long s_stripe;
+-      unsigned int s_mb_stream_request;
++      unsigned long s_mb_small_req;
++      unsigned long s_mb_large_req;
+       unsigned int s_mb_max_to_scan;
+       unsigned int s_mb_min_to_scan;
+       unsigned int s_mb_stats;
+       unsigned int s_mb_order2_reqs;
++      unsigned long *s_mb_prealloc_table;
++      unsigned long s_mb_prealloc_table_size;
+       unsigned int s_mb_group_prealloc;
+       unsigned int s_max_writeback_mb_bump;
+       /* where last allocation was done - for stream allocation */
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c 2011-03-11 14:03:32.000000000 +0800
++++ linux-stage/fs/ext4/mballoc.c      2011-03-11 14:44:49.106543493 +0800
+@@ -1823,6 +1823,26 @@
+       }
+ }
++static void ext4_mb_prealloc_table_add(struct ext4_sb_info *sbi, int value)
++{
++      int i;
++
++      if (value > (sbi->s_blocks_per_group - 1 - 1 - sbi->s_itb_per_group))
++              return;
++
++      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++) {
++              if (sbi->s_mb_prealloc_table[i] == 0) {
++                      sbi->s_mb_prealloc_table[i] = value;
++                      return;
++              }
++
++              /* they should add values in order */
++              if (value <= sbi->s_mb_prealloc_table[i])
++                      return;
++      }
++}
++
++
+ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
+                               ext4_group_t group, int cr)
+ {
+@@ -2173,6 +2193,80 @@
+       .show   = ext4_mb_seq_groups_show,
+ };
++#define EXT4_MB_PREALLOC_TABLE          "prealloc_table"
++
++static int ext4_mb_prealloc_table_proc_read(char *page, char **start, off_t off,
++                                          int count, int *eof, void *data)
++{
++      struct ext4_sb_info *sbi = data;
++      int len = 0;
++      int i;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++)
++              len += sprintf(page + len, "%ld ",
++                             sbi->s_mb_prealloc_table[i]);
++      len += sprintf(page + len, "\n");
++
++      *start = page;
++      return len;
++}
++
++static int ext4_mb_prealloc_table_proc_write(struct file *file,
++                                           const char __user *buf,
++                                           unsigned long cnt, void *data)
++{
++      struct ext4_sb_info *sbi = data;
++      unsigned long value;
++      unsigned long prev = 0;
++      char str[128];
++      char *cur;
++      char *end;
++      unsigned long *new_table;
++      int num = 0;
++      int i = 0;
++
++      if (cnt >= sizeof(str))
++              return -EINVAL;
++      if (copy_from_user(str, buf, cnt))
++              return -EFAULT;
++
++      num = 0;
++      cur = str;
++      end = str + cnt;
++      while (cur < end) {
++              while ((cur < end) && (*cur == ' ')) cur++;
++              value = simple_strtol(cur, &cur, 0);
++              if (value == 0)
++                      break;
++              if (value <= prev)
++                      return -EINVAL;
++              prev = value;
++              num++;
++      }
++
++      new_table = kmalloc(num * sizeof(*new_table), GFP_KERNEL);
++      if (new_table == NULL)
++              return -ENOMEM;
++      kfree(sbi->s_mb_prealloc_table);
++      memset(new_table, 0, num * sizeof(*new_table));
++      sbi->s_mb_prealloc_table = new_table;
++      sbi->s_mb_prealloc_table_size = num;
++      cur = str;
++      end = str + cnt;
++      while (cur < end && i < num) {
++      while ((cur < end) && (*cur == ' ')) cur++;
++              value = simple_strtol(cur, &cur, 0);
++              ext4_mb_prealloc_table_add(sbi, value);
++              i++;
++      }
++
++      return cnt;
++}
++
+ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
+ {
+       struct super_block *sb = PDE(inode)->data;
+@@ -2411,12 +2505,56 @@
+       sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
+       sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
+       sbi->s_mb_stats = MB_DEFAULT_STATS;
+-      sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
+       sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
+-      sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
++
++      if (sbi->s_stripe == 0) {
++              sbi->s_mb_prealloc_table_size = 10;
++              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
++              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
++              if (sbi->s_mb_prealloc_table == NULL) {
++                      kfree(sbi->s_mb_offsets);
++                      kfree(sbi->s_mb_maxs);
++                      return -ENOMEM;
++              }
++              memset(sbi->s_mb_prealloc_table, 0, i);
++
++              ext4_mb_prealloc_table_add(sbi, 4);
++              ext4_mb_prealloc_table_add(sbi, 8);
++              ext4_mb_prealloc_table_add(sbi, 16);
++              ext4_mb_prealloc_table_add(sbi, 32);
++              ext4_mb_prealloc_table_add(sbi, 64);
++              ext4_mb_prealloc_table_add(sbi, 128);
++              ext4_mb_prealloc_table_add(sbi, 256);
++              ext4_mb_prealloc_table_add(sbi, 512);
++              ext4_mb_prealloc_table_add(sbi, 1024);
++              ext4_mb_prealloc_table_add(sbi, 2048);
++
++              sbi->s_mb_small_req = 256;
++              sbi->s_mb_large_req = 1024;
++              sbi->s_mb_group_prealloc = 512;
++      } else {
++              sbi->s_mb_prealloc_table_size = 3;
++              i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long);
++              sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS);
++              if (sbi->s_mb_prealloc_table == NULL) {
++                      kfree(sbi->s_mb_offsets);
++                      kfree(sbi->s_mb_maxs);
++                      return -ENOMEM;
++              }
++              memset(sbi->s_mb_prealloc_table, 0, i);
++
++              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe);
++              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 2);
++              ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 4);
++
++              sbi->s_mb_small_req = sbi->s_stripe;
++              sbi->s_mb_large_req = sbi->s_stripe * 8;
++              sbi->s_mb_group_prealloc = sbi->s_stripe * 4;
++      }
+       sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
+       if (sbi->s_locality_groups == NULL) {
++              kfree(sbi->s_mb_prealloc_table);
+               kfree(sbi->s_mb_offsets);
+               kfree(sbi->s_mb_maxs);
+               return -ENOMEM;
+@@ -2430,9 +2568,18 @@
+               spin_lock_init(&lg->lg_prealloc_lock);
+       }
+-      if (sbi->s_proc)
++      if (sbi->s_proc) {
++              struct proc_dir_entry *p;
+               proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
+                                &ext4_mb_seq_groups_fops, sb);
++              p = create_proc_entry(EXT4_MB_PREALLOC_TABLE, S_IFREG |
++                                    S_IRUGO | S_IWUSR, sbi->s_proc);
++              if (p) {
++                      p->data = sbi;
++                      p->read_proc = ext4_mb_prealloc_table_proc_read;
++                      p->write_proc = ext4_mb_prealloc_table_proc_write;
++              }
++      }
+       if (sbi->s_journal)
+               sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+@@ -2512,8 +2659,10 @@
+       }
+       free_percpu(sbi->s_locality_groups);
+-      if (sbi->s_proc)
++      if (sbi->s_proc) {
+               remove_proc_entry("mb_groups", sbi->s_proc);
++              remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc);
++      }
+       return 0;
+ }
+@@ -2807,11 +2956,12 @@
+ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+                               struct ext4_allocation_request *ar)
+ {
+-      int bsbits, max;
++      int bsbits, i, wind;
+       ext4_lblk_t end;
+-      loff_t size, orig_size, start_off;
++      loff_t size, orig_size;
+       ext4_lblk_t start, orig_start;
+       struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
++      struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+       struct ext4_prealloc_space *pa;
+       /* do normalize only data requests, metadata requests
+@@ -2841,49 +2991,35 @@
+       size = size << bsbits;
+       if (size < i_size_read(ac->ac_inode))
+               size = i_size_read(ac->ac_inode);
++      size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits;
+-      /* max size of free chunks */
+-      max = 2 << bsbits;
++      start = wind = 0;
+-#define NRL_CHECK_SIZE(req, size, max, chunk_size)    \
+-              (req <= (size) || max <= (chunk_size))
++      /* let's choose preallocation window depending on file size */
++      for (i = 0; i < sbi->s_mb_prealloc_table_size; i++) {
++              if (size <= sbi->s_mb_prealloc_table[i]) {
++                      wind = sbi->s_mb_prealloc_table[i];
++                      break;
++              }
++      }
++      size = wind;
+-      /* first, try to predict filesize */
+-      /* XXX: should this table be tunable? */
+-      start_off = 0;
+-      if (size <= 16 * 1024) {
+-              size = 16 * 1024;
+-      } else if (size <= 32 * 1024) {
+-              size = 32 * 1024;
+-      } else if (size <= 64 * 1024) {
+-              size = 64 * 1024;
+-      } else if (size <= 128 * 1024) {
+-              size = 128 * 1024;
+-      } else if (size <= 256 * 1024) {
+-              size = 256 * 1024;
+-      } else if (size <= 512 * 1024) {
+-              size = 512 * 1024;
+-      } else if (size <= 1024 * 1024) {
+-              size = 1024 * 1024;
+-      } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
+-              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+-                                              (21 - bsbits)) << 21;
+-              size = 2 * 1024 * 1024;
+-      } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
+-              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+-                                                      (22 - bsbits)) << 22;
+-              size = 4 * 1024 * 1024;
+-      } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
+-                                      (8<<20)>>bsbits, max, 8 * 1024)) {
+-              start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+-                                                      (23 - bsbits)) << 23;
+-              size = 8 * 1024 * 1024;
+-      } else {
+-              start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
+-              size      = ac->ac_o_ex.fe_len << bsbits;
++      if (wind == 0) {
++              __u64 tstart, tend;
++              /* file is quite large, we now preallocate with
++               * the biggest configured window with regart to
++               * logical offset */
++              wind = sbi->s_mb_prealloc_table[i - 1];
++              tstart = ac->ac_o_ex.fe_logical;
++              do_div(tstart, wind);
++              start = tstart * wind;
++              tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1;
++              do_div(tend, wind);
++              tend = tend * wind + wind;
++              size = tend - start;
+       }
+-      orig_size = size = size >> bsbits;
+-      orig_start = start = start_off >> bsbits;
++      orig_size = size;
++      orig_start = start;
+       /* don't cover already allocated blocks in selected range */
+       if (ar->pleft && start <= ar->lleft) {
+@@ -2955,7 +3091,6 @@
+       }
+       BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
+                       start > ac->ac_o_ex.fe_logical);
+-      BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+       /* now prepare goal request */
+@@ -3939,11 +4074,19 @@
+       /* don't use group allocation for large files */
+       size = max(size, isize);
+-      if (size > sbi->s_mb_stream_request) {
++      if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) ||
++          (size >= sbi->s_mb_large_req)) {
+               ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+               return;
+       }
++      /*
++       * request is so large that we don't care about
++       * streaming - it overweights any possible seek
++       */
++      if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req)
++              return;
++
+       BUG_ON(ac->ac_lg != NULL);
+       /*
+        * locality group prealloc space are per cpu. The reason for having
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c   2011-03-11 14:16:56.000000000 +0800
++++ linux-stage/fs/ext4/super.c        2011-03-11 14:19:24.664467626 +0800
+@@ -2632,7 +2632,8 @@
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
+-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
++EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req);
++EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
+ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
+@@ -2647,7 +2648,8 @@
+       ATTR_LIST(mb_max_to_scan),
+       ATTR_LIST(mb_min_to_scan),
+       ATTR_LIST(mb_order2_req),
+-      ATTR_LIST(mb_stream_req),
++      ATTR_LIST(mb_small_req),
++      ATTR_LIST(mb_large_req),
+       ATTR_LIST(mb_group_prealloc),
+       ATTR_LIST(max_writeback_mb_bump),
+       NULL,
diff --git a/ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-print-inum-in-htree-warning-rhel6.patch
new file mode 100644 (file)
index 0000000..fecb1a7
--- /dev/null
@@ -0,0 +1,15 @@
+Index: linux-stage/fs/ext4/namei.c
+===================================================================
+--- linux-stage.orig/fs/ext4/namei.c
++++ linux-stage/fs/ext4/namei.c
+@@ -371,8 +371,8 @@ dx_probe(const struct qstr *d_name, stru
+       if (root->info.hash_version != DX_HASH_TEA &&
+           root->info.hash_version != DX_HASH_HALF_MD4 &&
+           root->info.hash_version != DX_HASH_LEGACY) {
+-              ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
+-                           root->info.hash_version);
++              ext4_warning(dir->i_sb, "Unrecognised inode hash code %d for directory "
++                             "#%lu", root->info.hash_version, dir->i_ino);
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
diff --git a/ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4-wantedi-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..dec376f
--- /dev/null
@@ -0,0 +1,42 @@
+Index: linux-2.6.32.i386/fs/ext4/namei.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/namei.c     2010-04-07 00:16:32.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/namei.c  2010-04-07 00:17:09.000000000 +0530
+@@ -144,6 +144,17 @@
+       u16 size;
+ };
++/*
++ * dentry_param used by ext4_new_inode_wantedi()
++ */
++#define LVFS_DENTRY_PARAM_MAGIC               20070216UL
++struct lvfs_dentry_params
++{
++      unsigned long   ldp_inum;
++      unsigned long   ldp_flags;
++      u32             ldp_magic;
++};
++
+ static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
+ static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
+ static inline unsigned dx_get_hash(struct dx_entry *entry);
+@@ -1751,6 +1762,19 @@
+       return err;
+ }
++static unsigned ext4_dentry_goal(struct super_block *sb, struct dentry *dentry)
++{
++      unsigned inum = EXT4_SB(sb)->s_inode_goal;
++
++      if (dentry->d_fsdata != NULL) {
++              struct lvfs_dentry_params *param = dentry->d_fsdata;
++
++              if (param->ldp_magic == LVFS_DENTRY_PARAM_MAGIC)
++                      inum = param->ldp_inum;
++      }
++      return inum;
++}
++
+ /*
+  * By the time this is called, we already have created
+  * the directory cache entry for the new file, but it
diff --git a/ldiskfs/kernel_patches/patches/ext4_data_in_dirent-rhel6.patch b/ldiskfs/kernel_patches/patches/ext4_data_in_dirent-rhel6.patch
new file mode 100644 (file)
index 0000000..9e68778
--- /dev/null
@@ -0,0 +1,503 @@
+this patch implements feature which allows ext4 fs users (e.g. Lustre)
+to store data in ext4 dirent.
+data is stored in ext4 dirent after file-name, this space is accounted
+in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
+is present.
+
+make use of dentry->d_fsdata to pass fid to ext4. so no
+changes in ext4_add_entry() interface required.
+
+Index: linux-2.6.32.i386/fs/ext4/dir.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/dir.c       2009-12-03 09:21:21.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/dir.c    2010-04-16 06:25:43.000000000 +0530
+@@ -53,11 +53,18 @@
+ static unsigned char get_dtype(struct super_block *sb, int filetype)
+ {
++      int fl_index = filetype & EXT4_FT_MASK;
++
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
+-          (filetype >= EXT4_FT_MAX))
++          (fl_index >= EXT4_FT_MAX))
+               return DT_UNKNOWN;
+-      return (ext4_filetype_table[filetype]);
++      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
++              return (ext4_filetype_table[fl_index]);
++
++      return (ext4_filetype_table[fl_index]) |
++              (filetype & EXT4_DIRENT_LUFID);
++
+ }
+@@ -70,11 +77,11 @@
+       const int rlen = ext4_rec_len_from_disk(de->rec_len,
+                                               dir->i_sb->s_blocksize);
+-      if (rlen < EXT4_DIR_REC_LEN(1))
++      if (rlen < __EXT4_DIR_REC_LEN(1))
+               error_msg = "rec_len is smaller than minimal";
+       else if (rlen % 4 != 0)
+               error_msg = "rec_len % 4 != 0";
+-      else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
++      else if (rlen < EXT4_DIR_REC_LEN(de))
+               error_msg = "rec_len is too small for name_len";
+       else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+               error_msg = "directory entry across blocks";
+@@ -179,7 +186,7 @@
+                                * failure will be detected in the
+                                * dirent test below. */
+                               if (ext4_rec_len_from_disk(de->rec_len,
+-                                      sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
++                                      sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
+                                       break;
+                               i += ext4_rec_len_from_disk(de->rec_len,
+                                                           sb->s_blocksize);
+@@ -342,12 +349,17 @@
+       struct fname *fname, *new_fn;
+       struct dir_private_info *info;
+       int len;
++      int extra_data = 1;
+       info = (struct dir_private_info *) dir_file->private_data;
+       p = &info->root.rb_node;
+       /* Create and allocate the fname structure */
+-      len = sizeof(struct fname) + dirent->name_len + 1;
++      if (dirent->file_type & EXT4_DIRENT_LUFID)
++              extra_data = ext4_get_dirent_data_len(dirent);
++
++      len = sizeof(struct fname) + dirent->name_len + extra_data;
++
+       new_fn = kzalloc(len, GFP_KERNEL);
+       if (!new_fn)
+               return -ENOMEM;
+@@ -356,7 +368,7 @@
+       new_fn->inode = le32_to_cpu(dirent->inode);
+       new_fn->name_len = dirent->name_len;
+       new_fn->file_type = dirent->file_type;
+-      memcpy(new_fn->name, dirent->name, dirent->name_len);
++      memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
+       new_fn->name[dirent->name_len] = 0;
+       while (*p) {
+Index: linux-2.6.32.i386/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/ext4.h      2010-04-16 06:10:06.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/ext4.h   2010-04-16 06:27:40.000000000 +0530
+@@ -1135,6 +1135,7 @@
+ #define EXT4_FEATURE_INCOMPAT_64BIT           0x0080
+ #define EXT4_FEATURE_INCOMPAT_MMP               0x0100
+ #define EXT4_FEATURE_INCOMPAT_FLEX_BG         0x0200
++#define EXT4_FEATURE_INCOMPAT_DIRDATA         0x1000
+ #define EXT4_FEATURE_COMPAT_SUPP      EXT2_FEATURE_COMPAT_EXT_ATTR
+ #define EXT4_FEATURE_INCOMPAT_SUPP    (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+@@ -1143,7 +1144,9 @@
+                                        EXT4_FEATURE_INCOMPAT_EXTENTS| \
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+                                        EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+-                                       EXT4_FEATURE_INCOMPAT_MMP)
++                                       EXT4_FEATURE_INCOMPAT_MMP| \
++                                       EXT4_FEATURE_INCOMPAT_DIRDATA)
++
+ #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+@@ -1225,6 +1228,43 @@
+ #define EXT4_FT_SYMLINK               7
+ #define EXT4_FT_MAX           8
++#define EXT4_FT_MASK          0xf
++
++#if EXT4_FT_MAX > EXT4_FT_MASK
++#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
++#endif
++
++/*
++ * d_type has 4 unused bits, so it can hold four types data. these different
++ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
++ * stored, in flag order, after file-name in ext4 dirent.
++*/
++/*
++ * this flag is added to d_type if ext4 dirent has extra data after
++ * filename. this data length is variable and length is stored in first byte
++ * of data. data start after filename NUL byte.
++ * This is used by Lustre FS.
++  */
++#define EXT4_DIRENT_LUFID             0x10
++
++#define EXT4_LUFID_MAGIC    0xAD200907UL
++struct ext4_dentry_param {
++      __u32  edp_magic;       /* EXT4_LUFID_MAGIC */
++      char   edp_len;         /* size of edp_data in bytes */
++      char   edp_data[0];     /* packed array of data */
++} __attribute__((packed));
++
++static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
++              struct ext4_dentry_param* p)
++
++{
++      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
++              return NULL;
++      if (p && p->edp_magic == EXT4_LUFID_MAGIC)
++              return &p->edp_len;
++      else
++              return NULL;
++}
+ /*
+  * EXT4_DIR_PAD defines the directory entries boundaries
+@@ -1233,8 +1273,11 @@
+  */
+ #define EXT4_DIR_PAD                  4
+ #define EXT4_DIR_ROUND                        (EXT4_DIR_PAD - 1)
+-#define EXT4_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT4_DIR_ROUND) & \
++#define __EXT4_DIR_REC_LEN(name_len)  (((name_len) + 8 + EXT4_DIR_ROUND) & \
+                                        ~EXT4_DIR_ROUND)
++#define EXT4_DIR_REC_LEN(de)          (__EXT4_DIR_REC_LEN(de->name_len +\
++                                      ext4_get_dirent_data_len(de)))
++
+ #define EXT4_MAX_REC_LEN              ((1<<16)-1)
+ /*
+@@ -1524,7 +1567,7 @@
+                                           struct ext4_dir_entry_2 ** res_dir);
+ #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir)
+ extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
+-                             struct inode *inode);
++                             struct inode *inode, const void *, const void *);
+ extern struct buffer_head *ext4_append(handle_t *handle,
+                                      struct inode *inode,
+                                      ext4_lblk_t *block, int *err);
+@@ -1851,6 +1894,28 @@
+       set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
+ }
++/*
++ * Compute the total directory entry data length.
++ * This includes the filename and an implicit NUL terminator (always present),
++ * and optional extensions.  Each extension has a bit set in the high 4 bits of
++ * de->file_type, and the extension length is the first byte in each entry.
++ */
++static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
++{
++      char *len = de->name + de->name_len + 1 /* NUL terminator */;
++      int dlen = 0;
++      __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
++
++      while (extra_data_flags) {
++              if (extra_data_flags & 1) {
++                      dlen += *len + (dlen == 0);
++                      len += *len;
++              }
++              extra_data_flags >>= 1;
++      }
++      return dlen;
++}
++
+ #endif        /* __KERNEL__ */
+ #endif        /* _EXT4_H */
+Index: linux-2.6.32.i386/fs/ext4/namei.c
+===================================================================
+--- linux-2.6.32.i386.orig/fs/ext4/namei.c     2010-04-16 05:47:41.000000000 +0530
++++ linux-2.6.32.i386/fs/ext4/namei.c  2010-04-16 06:40:38.000000000 +0530
+@@ -170,7 +170,8 @@
+ static unsigned dx_get_limit(struct dx_entry *entries);
+ static void dx_set_count(struct dx_entry *entries, unsigned value);
+ static void dx_set_limit(struct dx_entry *entries, unsigned value);
+-static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
++static inline unsigned dx_root_limit(__u32 blocksize,
++              struct ext4_dir_entry_2 *dot_de, unsigned infosize);
+ static unsigned dx_node_limit(struct inode *dir);
+ static struct dx_frame *dx_probe(const struct qstr *d_name,
+                                struct inode *dir,
+@@ -237,11 +238,12 @@
+  */
+ struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de)
+ {
+-       /* get dotdot first */
+-       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
++      BUG_ON(de->name_len != 1);
++      /* get dotdot first */
++      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
+-       /* dx root info is after dotdot entry */
+-       de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
++      /* dx root info is after dotdot entry */
++      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
+        return (struct dx_root_info *) de;
+ }
+@@ -286,16 +288,23 @@
+       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+-static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
++static inline unsigned dx_root_limit(__u32 blocksize,
++              struct ext4_dir_entry_2 *dot_de, unsigned infosize)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
+-              EXT4_DIR_REC_LEN(2) - infosize;
++      struct ext4_dir_entry_2 *dotdot_de;
++      unsigned entry_space;
++
++      BUG_ON(dot_de->name_len != 1);
++      dotdot_de = ext4_next_entry(dot_de, blocksize);
++      entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) -
++                       EXT4_DIR_REC_LEN(dotdot_de) - infosize;
++
+       return entry_space / sizeof(struct dx_entry);
+ }
+ static inline unsigned dx_node_limit(struct inode *dir)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
++      unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
+       return entry_space / sizeof(struct dx_entry);
+ }
+@@ -342,7 +351,7 @@
+                               printk(":%x.%u ", h.hash,
+                                      ((char *) de - base));
+                       }
+-                      space += EXT4_DIR_REC_LEN(de->name_len);
++                      space += EXT4_DIR_REC_LEN(de);
+                       names++;
+               }
+               de = ext4_next_entry(de, size);
+@@ -447,7 +456,8 @@
+       entries = (struct dx_entry *) (((char *)info) + info->info_length);
+-      if (dx_get_limit(entries) != dx_root_limit(dir,
++      if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize,
++                                                 (struct ext4_dir_entry_2*)bh->b_data,
+                                                  info->info_length)) {
+               ext4_warning(dir->i_sb, __func__,
+                            "dx entry: limit != root limit");
+@@ -637,7 +647,7 @@
+       de = (struct ext4_dir_entry_2 *) bh->b_data;
+       top = (struct ext4_dir_entry_2 *) ((char *) de +
+                                          dir->i_sb->s_blocksize -
+-                                         EXT4_DIR_REC_LEN(0));
++                                         __EXT4_DIR_REC_LEN(0));
+       for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
+               if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
+                                       (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+@@ -1050,7 +1060,7 @@
+                       goto errout;
+               de = (struct ext4_dir_entry_2 *) bh->b_data;
+               top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+-                                     EXT4_DIR_REC_LEN(0));
++                                     __EXT4_DIR_REC_LEN(0));
+               for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
+                       int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+                                 + ((char *) de - bh->b_data);
+@@ -1216,7 +1226,7 @@
+       while (count--) {
+               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) 
+                                               (from + (map->offs<<2));
+-              rec_len = EXT4_DIR_REC_LEN(de->name_len);
++              rec_len = EXT4_DIR_REC_LEN(de);
+               memcpy (to, de, rec_len);
+               ((struct ext4_dir_entry_2 *) to)->rec_len =
+                               ext4_rec_len_to_disk(rec_len, blocksize);
+@@ -1240,7 +1250,7 @@
+       while ((char*)de < base + blocksize) {
+               next = ext4_next_entry(de, blocksize);
+               if (de->inode && de->name_len) {
+-                      rec_len = EXT4_DIR_REC_LEN(de->name_len);
++                      rec_len = EXT4_DIR_REC_LEN(de);
+                       if (de > to)
+                               memmove(to, de, rec_len);
+                       to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
+@@ -1370,10 +1380,16 @@
+       unsigned int    offset = 0;
+       unsigned int    blocksize = dir->i_sb->s_blocksize;
+       unsigned short  reclen;
+-      int             nlen, rlen, err;
++      int             nlen, rlen, err, dlen = 0;
++      unsigned char   *data;
+       char            *top;
+-      reclen = EXT4_DIR_REC_LEN(namelen);
++      data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
++                                              dentry->d_fsdata);
++      if (data)
++              dlen = (*data) + 1;
++
++      reclen = __EXT4_DIR_REC_LEN(namelen + dlen);
+       if (!de) {
+               de = (struct ext4_dir_entry_2 *)bh->b_data;
+               top = bh->b_data + blocksize - reclen;
+@@ -1383,7 +1399,7 @@
+                               return -EIO;
+                       if (ext4_match(namelen, name, de))
+                               return -EEXIST;
+-                      nlen = EXT4_DIR_REC_LEN(de->name_len);
++                      nlen = EXT4_DIR_REC_LEN(de);
+                       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+                       if ((de->inode? rlen - nlen: rlen) >= reclen)
+                               break;
+@@ -1401,7 +1417,7 @@
+       }
+       /* By now the buffer is marked for journaling */
+-      nlen = EXT4_DIR_REC_LEN(de->name_len);
++      nlen = EXT4_DIR_REC_LEN(de);
+       rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
+       if (de->inode) {
+               struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
+@@ -1417,6 +1433,12 @@
+               de->inode = 0;
+       de->name_len = namelen;
+       memcpy(de->name, name, namelen);
++      if (data) {
++              de->name[namelen] = 0;
++              memcpy(&de->name[namelen + 1], data, *(char *) data);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
++
+       /*
+        * XXX shouldn't update any times until successful
+        * completion of syscall, but too many callers depend
+@@ -1515,7 +1537,8 @@
+       dx_set_block(entries, 1);
+       dx_set_count(entries, 1);
+-      dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
++      dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize,
++                                       dot_de, sizeof(*dx_info)));
+       /* Initialize as for dx_probe */
+       hinfo.hash_version = dx_info->hash_version;
+@@ -1546,6 +1569,8 @@
+       struct buffer_head * dir_block;
+       struct ext4_dir_entry_2 * de;
+       int len, journal = 0, err = 0;
++      int dlen = 0;
++      char *data;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1561,19 +1586,24 @@
+       /* the first item must be "." */
+       assert(de->name_len == 1 && de->name[0] == '.');
+       len = le16_to_cpu(de->rec_len);
+-      assert(len >= EXT4_DIR_REC_LEN(1));
+-      if (len > EXT4_DIR_REC_LEN(1)) {
++      assert(len >= __EXT4_DIR_REC_LEN(1));
++      if (len > __EXT4_DIR_REC_LEN(1)) {
+               BUFFER_TRACE(dir_block, "get_write_access");
+               err = ext4_journal_get_write_access(handle, dir_block);
+               if (err)
+                       goto out_journal;
+               journal = 1;
+-              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
++              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
+       }
+-      len -= EXT4_DIR_REC_LEN(1);
+-      assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
++      len -= EXT4_DIR_REC_LEN(de);
++      data = ext4_dentry_get_data(dir->i_sb,
++                      (struct ext4_dentry_param *) dentry->d_fsdata);
++      if (data)
++              dlen = *data + 1;
++      assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
++
+       de = (struct ext4_dir_entry_2 *)
+                       ((char *) de + le16_to_cpu(de->rec_len));
+       if (!journal) {
+@@ -1587,10 +1617,15 @@
+       if (len > 0)
+               de->rec_len = cpu_to_le16(len);
+       else
+-              assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
++              assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
+       de->name_len = 2;
+       strcpy (de->name, "..");
+       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
++      if (data) {
++              de->name[2] = 0;
++              memcpy(&de->name[2 + 1], data, dlen);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
+ out_journal:
+       if (journal) {
+@@ -2011,12 +2046,13 @@
+ /* Initialize @inode as a subdirectory of @dir, and add the
+  * "." and ".." entries into the first directory block. */
+ int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir,
+-                      struct inode *inode)
++                      struct inode *inode,
++                        const void *data1, const void *data2)
+ {
+       struct buffer_head * dir_block;
+       struct ext4_dir_entry_2 * de;
+       unsigned int blocksize = dir->i_sb->s_blocksize;
+-      int err = 0;
++      int err = 0, dot_reclen;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -2040,17 +2076,32 @@
+       de = (struct ext4_dir_entry_2 *) dir_block->b_data;
+       de->inode = cpu_to_le32(inode->i_ino);
+       de->name_len = 1;
+-      de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+-                                         blocksize);
+       strcpy(de->name, ".");
+       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
++      /* get packed fid data*/
++      data1 = ext4_dentry_get_data(dir->i_sb,
++                              (struct ext4_dentry_param *) data1);
++      if (data1) {
++              de->name[1] = 0;
++              memcpy(&de->name[2], data1, *(char *) data1);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
++      de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
++      dot_reclen = cpu_to_le16(de->rec_len);
+       de = ext4_next_entry(de, blocksize);
+       de->inode = cpu_to_le32(dir->i_ino);
+-      de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
++      de->rec_len = ext4_rec_len_to_disk(blocksize - dot_reclen,
+                                          blocksize);
+       de->name_len = 2;
+       strcpy(de->name, "..");
+       ext4_set_de_type(dir->i_sb, de, S_IFDIR);
++      data2 = ext4_dentry_get_data(dir->i_sb,
++                      (struct ext4_dentry_param *) data2);
++      if (data2) {
++              de->name[2] = 0;
++              memcpy(&de->name[3], data2, *(char *) data2);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
+       inode->i_nlink = 2;
+       BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+       ext4_handle_dirty_metadata(handle, dir, dir_block);
+@@ -2087,7 +2138,7 @@
+       if (IS_ERR(inode))
+               goto out_stop;
+-      err = ext4_add_dot_dotdot(handle, dir, inode);
++      err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL);
+       if (err)
+               goto out_stop;
+@@ -2123,7 +2174,7 @@
+       int err = 0;
+       sb = inode->i_sb;
+-      if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
++      if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) ||
+           !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
+               if (err)
+                       ext4_error(inode->i_sb, __func__,
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
new file mode 100644 (file)
index 0000000..bb9fdbd
--- /dev/null
@@ -0,0 +1,29 @@
+ext4-wantedi-2.6-rhel6.patch
+ext4-map_inode_page-2.6-rhel6.patch
+export-ext4-2.6-rhel6.patch
+ext4-remove-cond_resched-calls-rhel5.patch
+ext4-ext_generation-sles11.patch
+ext4-inode-version-rhel6.patch
+ext4-mmp-rhel6.patch
+ext4-lookup-dotdot-rhel5.patch
+ext4-max-dir-size-rhel6.patch
+ext4-print-inum-in-htree-warning-rhel6.patch
+ext4-xattr-no-update-ctime-rhel5.patch
+ext4-prealloc-rhel6.patch
+ext4-mballoc-extra-checks-rhel6.patch
+ext4-misc-rhel6.patch
+ext4-big-endian-check-2.6-rhel6.patch
+ext4-alloc-policy-2.6-rhel5.patch
+ext4-force_over_16tb-rhel6.patch
+ext4-pdir-fix-rhel6.patch
+ext4-osd-iop-common-rhel6.patch
+ext4-osd-iam-exports-rhel6.patch
+ext4-dynlocks-common-rhel6.patch
+ext4-hash-indexed-dir-dotdot-update-rhel5.patch
+ext4-kill-dx_root-rhel6.patch
+ext4-extents-mount-option-rhel6.patch
+ext4-fiemap-2.6-rhel6.patch
+ext4-mballoc-pa_free-mismatch-rhel6.patch
+ext4_data_in_dirent-rhel6.patch
+ext4-disable-mb-cache-rhel6.patch
+ext4-back-dquot-to-rhel6.patch
index 71d312d..0e1e6c2 100644 (file)
@@ -7,6 +7,7 @@ backfs_extra := $(wildcard @LINUX@/fs/@BACKFS@/Makefile)
 
 backfs_headers := $(wildcard @LINUX@/fs/@BACKFS@/*.h)
 linux_headers := $(wildcard @LINUX@/include/linux/@BACKFS@*.h)
 
 backfs_headers := $(wildcard @LINUX@/fs/@BACKFS@/*.h)
 linux_headers := $(wildcard @LINUX@/include/linux/@BACKFS@*.h)
+trace_headers := $(wildcard @LINUX@/include/trace/events/@BACKFS@*.h)
 
 backfs_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/@BACKFS@/*.c))
 
 
 backfs_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/@BACKFS@/*.c))
 
index 65d04a7..d6460a2 100644 (file)
@@ -24,13 +24,17 @@ linux/ldiskfs%.h: linux-stage/include/linux/@BACKFS@%.h
 series := @top_srcdir@/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
 patches := @top_srcdir@/kernel_patches/patches
 
 series := @top_srcdir@/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
 patches := @top_srcdir@/kernel_patches/patches
 
-sources: $(backfs_sources) $(backfs_headers) $(linux_headers) $(series)
-       rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
-       mkdir -p linux-stage/fs/@BACKFS@ linux-stage/include/linux
+sources: $(backfs_sources) $(backfs_headers) $(linux_headers) $(series) $(trace_headers)
+       rm -rf linux-stage linux sources trace $(ldiskfs_SOURCES)
+       mkdir -p linux-stage/fs/@BACKFS@ linux-stage/include/linux \
+                linux-stage/include/trace/events
        cp $(backfs_sources) $(backfs_headers) $(backfs_extra) linux-stage/fs/@BACKFS@
        if test -n "$(linux_headers)" ; then \
                cp $(linux_headers) linux-stage/include/linux; \
        fi
        cp $(backfs_sources) $(backfs_headers) $(backfs_extra) linux-stage/fs/@BACKFS@
        if test -n "$(linux_headers)" ; then \
                cp $(linux_headers) linux-stage/include/linux; \
        fi
+       if test -n "$(trace_headers)" ; then \
+               cp $(trace_headers) linux-stage/include/trace/events; \
+       fi
 if USE_QUILT
        ln -s ../$(patches) linux-stage/patches
        ln -s ../$(series) linux-stage/series
 if USE_QUILT
        ln -s ../$(patches) linux-stage/patches
        ln -s ../$(series) linux-stage/series
@@ -43,7 +47,7 @@ else
        done
        @echo
 endif
        done
        @echo
 endif
-       mkdir linux
+       mkdir -p linux trace/events
        @echo -n "Replacing '@BACKFS@' with 'ldiskfs':"
        for i in $(notdir $(backfs_headers) $(backfs_sources)) $(new_sources) ; do \
                echo -n " $$i" ; \
        @echo -n "Replacing '@BACKFS@' with 'ldiskfs':"
        for i in $(notdir $(backfs_headers) $(backfs_sources)) $(new_sources) ; do \
                echo -n " $$i" ; \
@@ -62,6 +66,12 @@ endif
                        linux-stage/include/linux/@BACKFS@$$i \
                        > linux/ldiskfs$$i ; \
        done
                        linux-stage/include/linux/@BACKFS@$$i \
                        > linux/ldiskfs$$i ; \
        done
+       for i in $(subst @BACKFS@,,$(notdir $(trace_headers))) ; do \
+               echo -n " @BACKFS@$$i"; \
+               sed $(strip $(ldiskfs_sed_flags)) \
+                       linux-stage/include/trace/events/@BACKFS@$$i \
+                       > trace/events/ldiskfs$$i ; \
+       done
        sed $(strip $(ldiskfs_sed_flags)) \
         linux-stage/include/linux/dynlocks.h \
         > linux/dynlocks.h
        sed $(strip $(ldiskfs_sed_flags)) \
         linux-stage/include/linux/dynlocks.h \
         > linux/dynlocks.h
@@ -79,7 +89,7 @@ foo-check:
        @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)"
 
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
        @echo "ldiskfs_LDADD: $(ldiskfs_LDADD)"
 
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
-CLEANFILES = sources $(notdir $(linux_headers) $(backfs_headers) $(backfs_sources) $(new_sources) $(new_headers))
+CLEANFILES = sources $(notdir $(linux_headers) $(backfs_headers) $(backfs_sources) $(new_sources) $(new_headers) $(trace_headers))
 
 clean: clean-am
 
 clean: clean-am
-       rm -rf linux linux-stage ldiskfs*.h
+       rm -rf linux linux-stage ldiskfs*.h trace
index 1c38ef4..fe3afd5 100644 (file)
@@ -49,20 +49,76 @@ esac
 ])
 
 #
 ])
 
 #
-# Ensure stack size big than 8k in Lustre server (all kernels)
+# LC_CONFIG_OBD_BUFFER_SIZE
 #
 #
-AC_DEFUN([LC_STACK_SIZE],
-[AC_MSG_CHECKING([stack size big than 8k])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/thread_info.h>
+# the maximum buffer size of lctl ioctls
+#
+AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
+[AC_MSG_CHECKING([maximum OBD ioctl size])
+AC_ARG_WITH([obd-buffer-size],
+       AC_HELP_STRING([--with-obd-buffer-size=[size]],
+                       [set lctl ioctl maximum bytes (default=8192)]),
+       [
+               OBD_BUFFER_SIZE=$with_obd_buffer_size
+       ],[
+               OBD_BUFFER_SIZE=8192
+       ])
+AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
+])
+
+#
+# LC_READLINK_SSIZE_T
+#
+AC_DEFUN([LC_READLINK_SSIZE_T],
+[AC_MSG_CHECKING([if readlink returns ssize_t])
+AC_TRY_COMPILE([
+       #include <unistd.h>
 ],[
 ],[
-        #if THREAD_SIZE < 8192
-        #error "stack size < 8192"
-        #endif
+       ssize_t readlink(const char *, char *, size_t);
 ],[
 ],[
-        AC_MSG_RESULT(yes)
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
 ],[
 ],[
-        AC_MSG_ERROR([Lustre requires that Linux is configured with at least a 8KB stack.])
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_FUNC_RELEASEPAGE_WITH_GFP
+#
+# 2.6.9 ->releasepage() takes a gfp_t arg
+# This kernel defines gfp_t (HAS_GFP_T) but doesn't use it for this function,
+# while others either don't have gfp_t or pass gfp_t as the parameter.
+#
+AC_DEFUN([LC_FUNC_RELEASEPAGE_WITH_GFP],
+[AC_MSG_CHECKING([if releasepage has a gfp_t parameter])
+RELEASEPAGE_WITH_GFP="$(grep -c 'releasepage.*gfp_t' $LINUX/include/linux/fs.h)"
+if test "$RELEASEPAGE_WITH_GFP" != 0 ; then
+       AC_DEFINE(HAVE_RELEASEPAGE_WITH_GFP, 1,
+                  [releasepage with gfp_t parameter])
+       AC_MSG_RESULT([yes])
+else
+       AC_MSG_RESULT([no])
+fi
+])
+
+
+
+#
+# only for Lustre-patched kernels
+#
+AC_DEFUN([LC_LUSTRE_VERSION_H],
+[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
+       rm -f "$LUSTRE/include/linux/lustre_version.h"
+],[
+       touch "$LUSTRE/include/linux/lustre_version.h"
+       if test x$enable_server = xyes ; then
+               AC_MSG_WARN([Unpatched kernel detected.])
+               AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
+               AC_MSG_WARN([disabling server build])
+               enable_server='no'
+       fi
 ])
 ])
 
 ])
 ])
 
@@ -91,6 +147,24 @@ kernel patches from Lustre version 1.4.3 or above.])
 ])
 
 #
 ])
 
 #
+# Ensure stack size big than 8k in Lustre server (all kernels)
+#
+AC_DEFUN([LC_STACK_SIZE],
+[AC_MSG_CHECKING([stack size big than 8k])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/thread_info.h>
+],[
+        #if THREAD_SIZE < 8192
+        #error "stack size < 8192"
+        #endif
+],[
+        AC_MSG_RESULT(yes)
+],[
+        AC_MSG_ERROR([Lustre requires that Linux is configured with at least a 8KB stack.])
+])
+])
+
+#
 # LC_CONFIG_BACKINGFS
 #
 # setup, check the backing filesystem
 # LC_CONFIG_BACKINGFS
 #
 # setup, check the backing filesystem
@@ -148,27 +222,18 @@ fi
 ])
 
 #
 ])
 
 #
-# LC_HEADER_LDISKFS_XATTR
-#
-# CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h
+# LC_CONFIG_LIBLUSTRE_RECOVERY
 #
 #
-AC_DEFUN([LC_HEADER_LDISKFS_XATTR],
-[AC_MSG_CHECKING([if ldiskfs has xattr.h header])
-tmp_flags="$EXTRA_KCFLAGS"
-EXTRA_KCFLAGS="-I$LINUX/fs -I$LDISKFS_DIR -I$LDISKFS_DIR/ldiskfs"
-LB_LINUX_TRY_COMPILE([
-       #include <ldiskfs/xattr.h>
-],[
-        ldiskfs_xattr_get(NULL, 0, "", NULL, 0);
-        ldiskfs_xattr_set_handle(NULL, NULL, 0, "", NULL, 0, 0);
-
-],[
-       AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_LDISKFS_XATTR_H, 1, [ldiskfs/xattr.h found])
-],[
-       AC_MSG_RESULT([no])
-])
-EXTRA_KCFLAGS="$tmp_flags"
+AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
+[AC_MSG_CHECKING([whether to enable liblustre recovery support])
+AC_ARG_ENABLE([liblustre-recovery],
+       AC_HELP_STRING([--disable-liblustre-recovery],
+                       [disable liblustre recovery support]),
+       [],[enable_liblustre_recovery='yes'])
+AC_MSG_RESULT([$enable_liblustre_recovery])
+if test x$enable_liblustre_recovery != xno ; then
+  AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
+fi
 ])
 
 #
 ])
 
 #
@@ -188,137 +253,170 @@ if test x$enable_health_write != xno ; then
 fi
 ])
 
 fi
 ])
 
-#
-# LC_CONFIG_LIBLUSTRE_RECOVERY
-#
-AC_DEFUN([LC_CONFIG_LIBLUSTRE_RECOVERY],
-[AC_MSG_CHECKING([whether to enable liblustre recovery support])
-AC_ARG_ENABLE([liblustre-recovery],
-       AC_HELP_STRING([--disable-liblustre-recovery],
-                       [disable liblustre recovery support]),
-       [],[enable_liblustre_recovery='yes'])
-AC_MSG_RESULT([$enable_liblustre_recovery])
-if test x$enable_liblustre_recovery != xno ; then
-  AC_DEFINE(ENABLE_LIBLUSTRE_RECOVERY, 1, Liblustre Can Recover)
+AC_DEFUN([LC_CONFIG_LRU_RESIZE],
+[AC_MSG_CHECKING([whether to enable lru self-adjusting])
+AC_ARG_ENABLE([lru_resize],
+       AC_HELP_STRING([--enable-lru-resize],
+                       [enable lru resize support]),
+       [],[enable_lru_resize='yes'])
+AC_MSG_RESULT([$enable_lru_resize])
+if test x$enable_lru_resize != xno; then
+   AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
 fi
 ])
 
 fi
 ])
 
-#
-# LC_CONFIG_OBD_BUFFER_SIZE
-#
-# the maximum buffer size of lctl ioctls
-#
-AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
-[AC_MSG_CHECKING([maximum OBD ioctl size])
-AC_ARG_WITH([obd-buffer-size],
-       AC_HELP_STRING([--with-obd-buffer-size=[size]],
-                       [set lctl ioctl maximum bytes (default=8192)]),
-       [
-               OBD_BUFFER_SIZE=$with_obd_buffer_size
-       ],[
-               OBD_BUFFER_SIZE=8192
-       ])
-AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
-AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
+# whether to enable quota support(kernel modules)
+AC_DEFUN([LC_QUOTA_MODULE],
+[if test x$enable_quota != xno; then
+    LB_LINUX_CONFIG([QUOTA],[
+       enable_quota_module='yes'
+       AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
+    ],[
+       enable_quota_module='no'
+       AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
+    ])
+fi
 ])
 
 ])
 
-#
-# LC_STRUCT_STATFS
-#
-# AIX does not have statfs.f_namelen
-#
-AC_DEFUN([LC_STRUCT_STATFS],
-[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/vfs.h>
-],[
-       struct statfs sfs;
-       sfs.f_namelen = 1;
-],[
-       AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
+AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
+[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
+[mm/truncate.c],[
+AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
+            [kernel export truncate_complete_page])
 ],[
 ],[
-       AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-#
-# LC_READLINK_SSIZE_T
-#
-AC_DEFUN([LC_READLINK_SSIZE_T],
-[AC_MSG_CHECKING([if readlink returns ssize_t])
-AC_TRY_COMPILE([
-       #include <unistd.h>
-],[
-       ssize_t readlink(const char *, char *, size_t);
-],[
-       AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t])
+AC_DEFUN([LC_EXPORT_TRUNCATE_RANGE],
+[LB_CHECK_SYMBOL_EXPORT([truncate_inode_pages_range],
+[mm/truncate.c],[
+AC_DEFINE(HAVE_TRUNCATE_RANGE, 1,
+            [kernel export truncate_inode_pages_range])
 ],[
 ],[
-       AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-#
-# LC_FUNC_MS_FLOCK_LOCK
-#
-# 2.6.5 kernel has MS_FLOCK_LOCK sb flag
-#
-AC_DEFUN([LC_FUNC_MS_FLOCK_LOCK],
-[AC_MSG_CHECKING([if kernel has MS_FLOCK_LOCK sb flag])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        int flags = MS_FLOCK_LOCK;
-],[
-        AC_DEFINE(HAVE_MS_FLOCK_LOCK, 1,
-                [kernel has MS_FLOCK_LOCK flag])
-        AC_MSG_RESULT([yes])
+AC_DEFUN([LC_EXPORT_D_REHASH_COND],
+[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
+[fs/dcache.c],[
+AC_DEFINE(HAVE_D_REHASH_COND, 1,
+            [d_rehash_cond is exported by the kernel])
 ],[
 ],[
-        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-#
-# LC_FUNC_HAVE_CAN_SLEEP_ARG
-#
-# 2.6.5 kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()
-#
-AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
-[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        int cansleep;
-        struct file *file;
-        struct file_lock *file_lock;
-        flock_lock_file_wait(file, file_lock, cansleep);
-],[
-        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
-                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
-        AC_MSG_RESULT([yes])
+AC_DEFUN([LC_EXPORT___D_REHASH],
+[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
+[fs/dcache.c],[
+AC_DEFINE(HAVE___D_REHASH, 1,
+            [__d_rehash is exported by the kernel])
 ],[
 ],[
-        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
+AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
+[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
+[fs/dcache.c],[
+AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
+            [d_move_locked is exported by the kernel])
+],[
+])
+])
+
+AC_DEFUN([LC_EXPORT___D_MOVE],
+[LB_CHECK_SYMBOL_EXPORT([__d_move],
+[fs/dcache.c],[
+AC_DEFINE(HAVE___D_MOVE, 1,
+            [__d_move is exported by the kernel])
+],[
+])
+])
+
+# The actual symbol exported varies among architectures, so we need
+# to check many symbols (but only in the current architecture.)  No
+# matter what symbol is exported, the kernel #defines node_to_cpumask
+# to the appropriate function and that's what we use.
+AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
+         [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
+                                 [arch/$LINUX_ARCH/mm/numa.c],
+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
+                                            [node_to_cpumask is exported by
+                                             the kernel])]) # x86_64
+          LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
+                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
+                                            [node_to_cpumask is exported by
+                                             the kernel])]) # ia64
+          LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
+                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
+                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
+                                            [node_to_cpumask is exported by
+                                             the kernel])]) # i386
+          ])
+
 #
 #
-# LC_FUNC_RELEASEPAGE_WITH_GFP
-#
-# 2.6.9 ->releasepage() takes a gfp_t arg
-# This kernel defines gfp_t (HAS_GFP_T) but doesn't use it for this function,
-# while others either don't have gfp_t or pass gfp_t as the parameter.
-#
-AC_DEFUN([LC_FUNC_RELEASEPAGE_WITH_GFP],
-[AC_MSG_CHECKING([if releasepage has a gfp_t parameter])
-RELEASEPAGE_WITH_GFP="$(grep -c 'releasepage.*gfp_t' $LINUX/include/linux/fs.h)"
-if test "$RELEASEPAGE_WITH_GFP" != 0 ; then
-       AC_DEFINE(HAVE_RELEASEPAGE_WITH_GFP, 1,
-                  [releasepage with gfp_t parameter])
+# LC_HEADER_LDISKFS_XATTR
+#
+# CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h
+#
+AC_DEFUN([LC_HEADER_LDISKFS_XATTR],
+[AC_MSG_CHECKING([if ldiskfs has xattr.h header])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-I$LINUX/fs -I$LDISKFS_DIR -I$LDISKFS_DIR/ldiskfs"
+LB_LINUX_TRY_COMPILE([
+       #include <ldiskfs/xattr.h>
+],[
+        ldiskfs_xattr_get(NULL, 0, "", NULL, 0);
+        ldiskfs_xattr_set_handle(NULL, NULL, 0, "", NULL, 0, 0);
+
+],[
        AC_MSG_RESULT([yes])
        AC_MSG_RESULT([yes])
-else
+       AC_DEFINE(HAVE_LDISKFS_XATTR_H, 1, [ldiskfs/xattr.h found])
+],[
+       AC_MSG_RESULT([no])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+])
+
+#
+# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
+#
+# Check for our patched grab_cache_page_nowait_gfp() function
+# after 2.6.29 we can emulate this using add_to_page_cache_lru()
+#
+AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
+[LB_CHECK_SYMBOL_EXPORT([grab_cache_page_nowait_gfp],
+[mm/filemap.c],[
+        AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
+                  [kernel exports grab_cache_page_nowait_gfp])
+        ],
+        [LB_CHECK_SYMBOL_EXPORT([add_to_page_cache_lru],
+        [mm/filemap.c],[
+                AC_DEFINE(HAVE_ADD_TO_PAGE_CACHE_LRU, 1,
+                        [kernel exports add_to_page_cache_lru])
+        ],[
+        ])
+        ])
+])
+
+#
+# LC_STRUCT_STATFS
+#
+# AIX does not have statfs.f_namelen
+#
+AC_DEFUN([LC_STRUCT_STATFS],
+[AC_MSG_CHECKING([if struct statfs has a f_namelen field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/vfs.h>
+],[
+       struct statfs sfs;
+       sfs.f_namelen = 1;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_STATFS_NAMELEN, 1, [struct statfs has a namelen field])
+],[
        AC_MSG_RESULT([no])
        AC_MSG_RESULT([no])
-fi
+])
 ])
 
 #
 ])
 
 #
@@ -394,26 +492,6 @@ AC_DEFUN([LC_XATTR_ACL],
 [])
 ])
 
 [])
 ])
 
-
-# added in 2.6.16
-#
-AC_DEFUN([LC_STRUCT_INTENT_FILE],
-[AC_MSG_CHECKING([if struct open_intent has a file field])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-        #include <linux/namei.h>
-],[
-        struct open_intent intent;
-        &intent.file;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-
 #
 # After 2.6.16 the xattr_acl API is removed, and posix_acl is used instead
 #
 #
 # After 2.6.16 the xattr_acl API is removed, and posix_acl is used instead
 #
@@ -436,20 +514,65 @@ $1
 ])
 ])
 
 ])
 ])
 
+AC_DEFUN([LC_CONST_ACL_SIZE],
+[AC_MSG_CHECKING([calc acl size])
+tmp_flags="$CFLAGS"
+CFLAGS="$CFLAGS -I$LINUX/include -I$LINUX_OBJ/include -I$LINUX_OBJ/include2 -I$LINUX/arch/`uname -m|sed -e 's/ppc.*/powerpc/' -e 's/x86_64/x86/' -e 's/i.86/x86/'`/include $EXTRA_KCFLAGS"
+AC_TRY_RUN([
+        #define __KERNEL__
+        #include <linux/autoconf.h>
+        #include <linux/types.h>
+        #undef __KERNEL__
+        // block include
+        #define __LINUX_POSIX_ACL_H
+
+        # ifdef CONFIG_FS_POSIX_ACL
+        #  ifdef HAVE_XATTR_ACL
+        #   include <linux/xattr_acl.h>
+        #  endif
+        #  ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
+        #   include <linux/posix_acl_xattr.h>
+        #  endif
+        # endif
+
+        #include <lustre_acl.h>
+
+        #include <stdio.h>
+
+        int main(void)
+        {
+            int size = mds_xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES);
+            FILE *f = fopen("acl.size","w+");
+            fprintf(f,"%d", size);
+            fclose(f);
+
+            return 0;
+        }
+],[
+       acl_size=`cat acl.size`
+       AC_MSG_RESULT([ACL size $acl_size])
+        AC_DEFINE_UNQUOTED(XATTR_ACL_SIZE, AS_TR_SH([$acl_size]), [size of xattr acl])
+],[
+        AC_ERROR([ACL size can't computed])
+])
+CFLAGS="$tmp_flags"
+])
+
+# added in 2.6.16
 #
 #
-# only for Lustre-patched kernels
-#
-AC_DEFUN([LC_LUSTRE_VERSION_H],
-[LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
-       rm -f "$LUSTRE/include/linux/lustre_version.h"
+AC_DEFUN([LC_STRUCT_INTENT_FILE],
+[AC_MSG_CHECKING([if struct open_intent has a file field])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+        #include <linux/namei.h>
 ],[
 ],[
-       touch "$LUSTRE/include/linux/lustre_version.h"
-       if test x$enable_server = xyes ; then
-               AC_MSG_WARN([Unpatched kernel detected.])
-               AC_MSG_WARN([Lustre servers cannot be built with an unpatched kernel;])
-               AC_MSG_WARN([disabling server build])
-               enable_server='no'
-       fi
+        struct open_intent intent;
+        &intent.file;
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_FILE_IN_STRUCT_INTENT, 1, [struct open_intent has a file field])
+],[
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
@@ -480,27 +603,6 @@ AC_DEFUN([LC_CONFIG_RMTCLIENT],
 ])
 ])
 
 ])
 ])
 
-AC_DEFUN([LC_SUNRPC_CACHE],
-[AC_MSG_CHECKING([if sunrpc struct cache_head uses kref])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/sunrpc/cache.h>
-],[
-        struct cache_head ch;
-        &ch.ref.refcount;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_SUNRPC_CACHE_V2, 1, [sunrpc cache facility v2])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-AC_DEFUN([LC_CONFIG_SUNRPC],
-[LB_LINUX_CONFIG_IM([SUNRPC],[],
-                    [AC_MSG_ERROR([kernel SUNRPC support is required by using GSS.])])
- LC_SUNRPC_CACHE
-])
-
 #
 # LC_CONFIG_GSS_KEYRING (default enabled, if gss is enabled)
 #
 #
 # LC_CONFIG_GSS_KEYRING (default enabled, if gss is enabled)
 #
@@ -524,6 +626,27 @@ AC_DEFUN([LC_CONFIG_GSS_KEYRING],
  fi
 ])
 
  fi
 ])
 
+AC_DEFUN([LC_SUNRPC_CACHE],
+[AC_MSG_CHECKING([if sunrpc struct cache_head uses kref])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/sunrpc/cache.h>
+],[
+        struct cache_head ch;
+        &ch.ref.refcount;
+],[
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_SUNRPC_CACHE_V2, 1, [sunrpc cache facility v2])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_CONFIG_SUNRPC],
+[LB_LINUX_CONFIG_IM([SUNRPC],[],
+                    [AC_MSG_ERROR([kernel SUNRPC support is required by using GSS.])])
+ LC_SUNRPC_CACHE
+])
+
 #
 # LC_CONFIG_GSS (default disabled)
 #
 #
 # LC_CONFIG_GSS (default disabled)
 #
@@ -564,54 +687,282 @@ AC_DEFUN([LC_CONFIG_GSS],
  fi
 ])
 
  fi
 ])
 
-# LC_EXPORT_SYNCHRONIZE_RCU
-# after 2.6.12 synchronize_rcu is preferred over synchronize_kernel
-AC_DEFUN([LC_EXPORT_SYNCHRONIZE_RCU],
-[LB_CHECK_SYMBOL_EXPORT([synchronize_rcu],
-[kernel/rcupdate.c],[
-        AC_DEFINE(HAVE_SYNCHRONIZE_RCU, 1,
-                [in 2.6.12 synchronize_rcu preferred over synchronize_kernel])
-],[
-])
-])
-
-# LC_INODE_I_MUTEX
-# after 2.6.15 inode have i_mutex intead of i_sem
-AC_DEFUN([LC_INODE_I_MUTEX],
-[AC_MSG_CHECKING([if inode has i_mutex ])
+#
+# LC_FUNC_HAVE_CAN_SLEEP_ARG
+#
+# 2.6.5 kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()
+#
+AC_DEFUN([LC_FUNC_HAVE_CAN_SLEEP_ARG],
+[AC_MSG_CHECKING([if kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-       #include <linux/mutex.h>
-       #include <linux/fs.h>
-       #undef i_mutex
+        #include <linux/fs.h>
 ],[
 ],[
-       struct inode i;
-
-       mutex_unlock(&i.i_mutex);
+        int cansleep;
+        struct file *file;
+        struct file_lock *file_lock;
+        flock_lock_file_wait(file, file_lock, cansleep);
 ],[
 ],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
-                [after 2.6.15 inode have i_mutex intead of i_sem])
+        AC_DEFINE(HAVE_CAN_SLEEP_ARG, 1,
+                [kernel has third arg can_sleep in fs/locks.c: flock_lock_file_wait()])
+        AC_MSG_RESULT([yes])
 ],[
 ],[
-        AC_MSG_RESULT(no)
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-# LC_SEQ_LOCK
-# after 2.6.18 seq_file has lock intead of sem
-AC_DEFUN([LC_SEQ_LOCK],
-[AC_MSG_CHECKING([if struct seq_file has lock field])
+#
+# LC_FUNC_F_OP_FLOCK
+#
+# rhel4.2 kernel has f_op->flock field
+#
+AC_DEFUN([LC_FUNC_F_OP_FLOCK],
+[AC_MSG_CHECKING([if struct file_operations has flock field])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/seq_file.h>
+        #include <linux/fs.h>
 ],[
 ],[
-       struct seq_file seq;
+        struct file_operations ll_file_operations_flock;
+        ll_file_operations_flock.flock = NULL;
+],[
+        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
+                [struct file_operations has flock field])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_QUOTA_READ],
+[AC_MSG_CHECKING([if kernel supports quota_read])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/fs.h>
+],[
+       struct super_operations sp;
+        void *i = (void *)sp.quota_read;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_COOKIE_FOLLOW_LINK
+#
+# kernel 2.6.13+ ->follow_link returns a cookie
+#
+
+AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
+[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+        #include <linux/namei.h>
+],[
+        struct dentry dentry;
+        struct nameidata nd;
+
+        dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
+],[
+        AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_FUNC_RCU
+#
+# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE),
+# call_rcu takes three parameters.
+#
+AC_DEFUN([LC_FUNC_RCU],
+[AC_MSG_CHECKING([if kernel have RCU supported])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/rcupdate.h>
+],[],[
+        AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
+        AC_MSG_RESULT([yes])
+
+        AC_MSG_CHECKING([if call_rcu takes three parameters])
+        LB_LINUX_TRY_COMPILE([
+                #include <linux/rcupdate.h>
+        ],[
+                struct rcu_head rh;
+                call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
+        ],[
+                AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
+                AC_MSG_RESULT([yes])
+        ],[
+                AC_MSG_RESULT([no])
+        ])
 
 
-       mutex_unlock(&seq.lock);
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_PERCPU_COUNTER],
+[AC_MSG_CHECKING([if have struct percpu_counter defined])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/percpu_counter.h>
+],[],[
+        AC_DEFINE(HAVE_PERCPU_COUNTER, 1, [percpu_counter found])
+        AC_MSG_RESULT([yes])
+
+        AC_MSG_CHECKING([if percpu_counter_inc takes the 2nd argument])
+        LB_LINUX_TRY_COMPILE([
+                #include <linux/percpu_counter.h>
+        ],[
+                struct percpu_counter c;
+                percpu_counter_init(&c, 0);
+        ],[
+                AC_DEFINE(HAVE_PERCPU_2ND_ARG, 1, [percpu_counter_init has two
+                                                   arguments])
+                AC_MSG_RESULT([yes])
+        ],[
+                AC_MSG_RESULT([no])
+        ])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_TASK_CLENV_STORE],
+[
+        AC_MSG_CHECKING([if we can store cl_env in task_struct])
+        if test x$have_task_clenv_store != xyes ; then
+                LC_TASK_CLENV_TUX_INFO
+        fi
+])
+
+# ~2.6.11
+
+AC_DEFUN([LC_S_TIME_GRAN],
+[AC_MSG_CHECKING([if super block has s_time_gran member])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+       struct super_block sb;
+
+        return sb.s_time_gran;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_S_TIME_GRAN, 1, [super block has s_time_gran member])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+AC_DEFUN([LC_SB_TIME_GRAN],
+[AC_MSG_CHECKING([if kernel has old get_sb_time_gran])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+       return get_sb_time_gran(NULL);
+],[
+        AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_SB_TIME_GRAN, 1, [kernel has old get_sb_time_gran])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+# 2.6.12
+
+# ~2.6.12 merge patch from oracle to convert tree_lock from spinlock to rwlock
+AC_DEFUN([LC_RW_TREE_LOCK],
+[AC_MSG_CHECKING([if kernel has tree_lock as rwlock])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+       struct address_space a;
+
+       write_lock(&a.tree_lock);
+],[
+        AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_RW_TREE_LOCK, 1, [kernel has tree_lock as rw_lock])
+],[
+        AC_MSG_RESULT([no])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+])
+
+# LC_EXPORT_SYNCHRONIZE_RCU
+# after 2.6.12 synchronize_rcu is preferred over synchronize_kernel
+AC_DEFUN([LC_EXPORT_SYNCHRONIZE_RCU],
+[LB_CHECK_SYMBOL_EXPORT([synchronize_rcu],
+[kernel/rcupdate.c],[
+        AC_DEFINE(HAVE_SYNCHRONIZE_RCU, 1,
+                [in 2.6.12 synchronize_rcu preferred over synchronize_kernel])
+],[
+])
+])
+
+# 2.6.15
+
+# LC_INODE_I_MUTEX
+# after 2.6.15 inode have i_mutex intead of i_sem
+AC_DEFUN([LC_INODE_I_MUTEX],
+[AC_MSG_CHECKING([if inode has i_mutex ])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/mutex.h>
+       #include <linux/fs.h>
+       #undef i_mutex
+],[
+       struct inode i;
+
+       mutex_unlock(&i.i_mutex);
 ],[
         AC_MSG_RESULT(yes)
 ],[
         AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_SEQ_LOCK, 1,
-                [after 2.6.18 seq_file has lock intead of sem])
+        AC_DEFINE(HAVE_INODE_I_MUTEX, 1,
+                [after 2.6.15 inode have i_mutex intead of i_sem])
 ],[
 ],[
-        AC_MSG_RESULT(NO)
+        AC_MSG_RESULT(no)
+])
+])
+
+# 2.6.16
+
+# LC_SECURITY_PLUG  # for SLES10 SP2
+# check security plug in sles10 sp2 kernel
+AC_DEFUN([LC_SECURITY_PLUG],
+[AC_MSG_CHECKING([If kernel has security plug support])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+        struct dentry   *dentry;
+        struct vfsmount *mnt;
+        struct iattr    *iattr;
+
+        notify_change(dentry, mnt, iattr);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_SECURITY_PLUG, 1,
+                [SLES10 SP2 use extra parameter in vfs])
+],[
+        AC_MSG_RESULT(no)
+])
+])
+
+# 2.6.17
+
+# inode have i_private field since 2.6.17
+AC_DEFUN([LC_INODE_IPRIVATE],
+[AC_MSG_CHECKING([if inode has a i_private field])
+LB_LINUX_TRY_COMPILE([
+#include <linux/fs.h>
+],[
+       struct inode i;
+       i.i_private = NULL; 
+],[
+       AC_MSG_RESULT(yes)
+       AC_DEFINE(HAVE_INODE_IPRIVATE, 1,
+               [struct inode has i_private field])
+],[
+       AC_MSG_RESULT(no)
 ])
 ])
 
 ])
 ])
 
@@ -636,26 +987,22 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# LC_FLUSH_OWNER_ID
-# starting from 2.6.18 the file_operations .flush
-# method has a new "fl_owner_t id" parameter
-#
-AC_DEFUN([LC_FLUSH_OWNER_ID],
-[AC_MSG_CHECKING([if file_operations .flush has an fl_owner_t id])
+# 2.6.18
+
+# LC_NR_PAGECACHE
+# 2.6.18 don't export nr_pagecahe
+AC_DEFUN([LC_NR_PAGECACHE],
+[AC_MSG_CHECKING([kernel export nr_pagecache])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
+        #include <linux/pagemap.h>
 ],[
 ],[
-        struct file_operations *fops = NULL;
-        fl_owner_t id;
-        int i;
-
-        i = fops->flush(NULL, id);
+        return atomic_read(&nr_pagecache);
 ],[
 ],[
-        AC_DEFINE(HAVE_FLUSH_OWNER_ID, 1,
-                [file_operations .flush method has an fl_owner_t id])
-        AC_MSG_RESULT([yes])
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_NR_PAGECACHE, 1,
+                [is kernel export nr_pagecache])
 ],[
 ],[
-        AC_MSG_RESULT([no])
+        AC_MSG_RESULT(no)
 ])
 ])
 
 ])
 ])
 
@@ -748,23 +1095,195 @@ LB_LINUX_TRY_COMPILE([
 EXTRA_KCFLAGS="$tmp_flags"
 ])
 
 EXTRA_KCFLAGS="$tmp_flags"
 ])
 
-# inode have i_private field since 2.6.17
-AC_DEFUN([LC_INODE_IPRIVATE],
-[AC_MSG_CHECKING([if inode has a i_private field])
+# LC_SEQ_LOCK
+# after 2.6.18 seq_file has lock intead of sem
+AC_DEFUN([LC_SEQ_LOCK],
+[AC_MSG_CHECKING([if struct seq_file has lock field])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-#include <linux/fs.h>
+        #include <linux/seq_file.h>
 ],[
 ],[
-       struct inode i;
-       i.i_private = NULL; 
+       struct seq_file seq;
+
+       mutex_unlock(&seq.lock);
 ],[
 ],[
-       AC_MSG_RESULT(yes)
-       AC_DEFINE(HAVE_INODE_IPRIVATE, 1,
-               [struct inode has i_private field])
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_SEQ_LOCK, 1,
+                [after 2.6.18 seq_file has lock intead of sem])
 ],[
 ],[
-       AC_MSG_RESULT(no)
+        AC_MSG_RESULT(NO)
+])
+])
+
+#
+# LC_EXPORT_FILEMAP_FDATAWRITE_RANGE
+#
+# No standard kernels export this
+#
+AC_DEFUN([LC_EXPORT_FILEMAP_FDATAWRITE_RANGE],
+[LB_CHECK_SYMBOL_EXPORT([filemap_fdatawrite_range],
+[mm/filemap.c],[
+AC_DEFINE(HAVE_FILEMAP_FDATAWRITE_RANGE, 1,
+            [filemap_fdatawrite_range is exported by the kernel])
+],[
+])
+])
+
+# LC_FLUSH_OWNER_ID
+# starting from 2.6.18 the file_operations .flush
+# method has a new "fl_owner_t id" parameter
+#
+AC_DEFUN([LC_FLUSH_OWNER_ID],
+[AC_MSG_CHECKING([if file_operations .flush has an fl_owner_t id])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/fs.h>
+],[
+        struct file_operations *fops = NULL;
+        fl_owner_t id;
+        int i;
+
+        i = fops->flush(NULL, id);
+],[
+        AC_DEFINE(HAVE_FLUSH_OWNER_ID, 1,
+                [file_operations .flush method has an fl_owner_t id])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_EXPORT_INVALIDATE_MAPPING_PAGES
+#
+# SLES9, RHEL4, RHEL5, vanilla 2.6.24 export invalidate_mapping_pages() but
+# SLES10 2.6.16 does not, for some reason.  For filter cache invalidation.
+#
+AC_DEFUN([LC_EXPORT_INVALIDATE_MAPPING_PAGES],
+    [LB_CHECK_SYMBOL_EXPORT([invalidate_mapping_pages], [mm/truncate.c], [
+         AC_DEFINE(HAVE_INVALIDATE_MAPPING_PAGES, 1,
+                        [exported invalidate_mapping_pages])],
+    [LB_CHECK_SYMBOL_EXPORT([invalidate_inode_pages], [mm/truncate.c], [
+         AC_DEFINE(HAVE_INVALIDATE_INODE_PAGES, 1,
+                        [exported invalidate_inode_pages])], [
+       AC_MSG_ERROR([no way to invalidate pages])
+  ])
+    ],[])
+])
+
+#
+# LC_EXT4_DISCARD_PREALLOCATIONS
+#
+AC_DEFUN([LC_EXT4_DISCARD_PREALLOCATIONS],
+[AC_MSG_CHECKING([if ext4_discard_preallocatoins defined])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-I$LINUX/fs"
+LB_LINUX_TRY_COMPILE([
+        #include <ext4/ext4.h>
+],[
+        struct inode i;
+        ext4_discard_preallocations(&i);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(LDISKFS_DISCARD_PREALLOCATIONS, 1,
+                  [ext4_discard_preacllocations defined])
+],[
+        AC_MSG_RESULT(no)
+])
+EXTRA_KCFLAGS="$tmp_flags"
+])
+
+#
+# LC_EXT_INSERT_EXTENT_WITH_5ARGS
+#
+AC_DEFUN([LC_EXT_INSERT_EXTENT_WITH_5ARGS],
+[AC_MSG_CHECKING([ext4_ext_insert_extent needs 5 arguments])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-I$LINUX/fs"
+LB_LINUX_TRY_COMPILE([
+        #include <ext4/ext4_extents.h>
+],[
+        ext4_ext_insert_extent(NULL, NULL, NULL, NULL, 0);
+],[
+        AC_DEFINE([EXT_INSERT_EXTENT_WITH_5ARGS], 1,
+                  [ext4_ext_insert_exent needs 5 arguments])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+])
+
+#2.6.18 + RHEL5 (fc6)
+
+# RHEL5 in FS-cache patch rename PG_checked flag into PG_fs_misc
+AC_DEFUN([LC_PG_FS_MISC],
+[AC_MSG_CHECKING([kernel has PG_fs_misc])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/mm.h>
+        #include <linux/page-flags.h>
+],[
+        #ifndef PG_fs_misc
+        #error PG_fs_misc not defined in kernel
+        #endif
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_PG_FS_MISC, 1,
+                  [is kernel have PG_fs_misc])
+],[
+        AC_MSG_RESULT(no)
+])
+])
+
+# RHEL5 PageChecked and SetPageChecked defined
+AC_DEFUN([LC_PAGE_CHECKED],
+[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/autoconf.h>
+#ifdef HAVE_LINUX_MMTYPES_H
+        #include <linux/mm_types.h>
+#endif
+       #include <linux/page-flags.h>
+],[
+       struct page *p;
+
+        /* before 2.6.26 this define*/
+        #ifndef PageChecked    
+       /* 2.6.26 use function instead of define for it */
+       SetPageChecked(p);
+       PageChecked(p);
+       #endif
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_PAGE_CHECKED, 1,
+                  [does kernel have PageChecked and SetPageChecked])
+],[
+        AC_MSG_RESULT(no)
+])
 ])
 ])
+
+#
+# LC_LINUX_FIEMAP_H
+#
+# If we have fiemap.h
+# after 2.6.27 use fiemap.h in include/linux
+#
+AC_DEFUN([LC_LINUX_FIEMAP_H],
+[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
+        AC_MSG_CHECKING([if fiemap.h can be compiled])
+        LB_LINUX_TRY_COMPILE([
+                #include <linux/types.h>
+                #include <linux/fiemap.h>
+        ],[],[
+                AC_MSG_RESULT([yes])
+                AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
+        ],[
+                AC_MSG_RESULT([no])
+        ])
+],
+[])
 ])
 
 ])
 
+# 2.6.19
+
 # 2.6.19 API changes
 # inode don't have i_blksize field
 AC_DEFUN([LC_INODE_BLKSIZE],
 # 2.6.19 API changes
 # inode don't have i_blksize field
 AC_DEFUN([LC_INODE_BLKSIZE],
@@ -829,7 +1348,7 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# LC_GENERIC_FILE_READ
+# LC_FILE_READV
 # 2.6.19 replaced readv with aio_read
 AC_DEFUN([LC_FILE_READV],
 [AC_MSG_CHECKING([readv in fops])
 # 2.6.19 replaced readv with aio_read
 AC_DEFUN([LC_FILE_READV],
 [AC_MSG_CHECKING([readv in fops])
@@ -847,22 +1366,7 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# LC_NR_PAGECACHE
-# 2.6.18 don't export nr_pagecahe
-AC_DEFUN([LC_NR_PAGECACHE],
-[AC_MSG_CHECKING([kernel export nr_pagecache])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/pagemap.h>
-],[
-        return atomic_read(&nr_pagecache);
-],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_NR_PAGECACHE, 1,
-                [is kernel export nr_pagecache])
-],[
-        AC_MSG_RESULT(no)
-])
-])
+# 2.6.20
 
 # LC_CANCEL_DIRTY_PAGE
 # 2.6.20 introduced cancel_dirty_page instead of clear_page_dirty.
 
 # LC_CANCEL_DIRTY_PAGE
 # 2.6.20 introduced cancel_dirty_page instead of clear_page_dirty.
@@ -889,6 +1393,8 @@ AC_DEFUN([LC_CANCEL_DIRTY_PAGE],
         fi
 ])
 
         fi
 ])
 
+# raid5-zerocopy patch
+
 #
 # LC_PAGE_CONSTANT
 #
 #
 # LC_PAGE_CONSTANT
 #
@@ -913,178 +1419,97 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# RHEL5 in FS-cache patch rename PG_checked flag into PG_fs_misc
-AC_DEFUN([LC_PG_FS_MISC],
-[AC_MSG_CHECKING([kernel has PG_fs_misc])
+# 2.6.22
+
+# 2.6.22 lost second parameter for invalidate_bdev
+AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
+[AC_MSG_CHECKING([if invalidate_bdev has second argument])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/mm.h>
-        #include <linux/page-flags.h>
+        #include <linux/buffer_head.h>
 ],[
 ],[
-        #ifndef PG_fs_misc
-        #error PG_fs_misc not defined in kernel
-        #endif
+        invalidate_bdev(NULL,0);
 ],[
 ],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_PG_FS_MISC, 1,
-                  [is kernel have PG_fs_misc])
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
+                [invalidate_bdev has second argument])
 ],[
 ],[
-        AC_MSG_RESULT(no)
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-# RHEL5 PageChecked and SetPageChecked defined
-AC_DEFUN([LC_PAGE_CHECKED],
-[AC_MSG_CHECKING([kernel has PageChecked and SetPageChecked])
+#
+# check for crypto API
+#
+AC_DEFUN([LC_ASYNC_BLOCK_CIPHER],
+[AC_MSG_CHECKING([if kernel has block cipher support])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/autoconf.h>
-#ifdef HAVE_LINUX_MMTYPES_H
-        #include <linux/mm_types.h>
-#endif
-       #include <linux/page-flags.h>
+        #include <linux/err.h>
+        #include <linux/crypto.h>
 ],[
 ],[
-       struct page *p;
-
-        /* before 2.6.26 this define*/
-        #ifndef PageChecked    
-       /* 2.6.26 use function instead of define for it */
-       SetPageChecked(p);
-       PageChecked(p);
-       #endif
+        struct crypto_blkcipher *tfm;
+        tfm = crypto_alloc_blkcipher("aes", 0, 0 );
 ],[
 ],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_PAGE_CHECKED, 1,
-                  [does kernel have PageChecked and SetPageChecked])
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_ASYNC_BLOCK_CIPHER, 1, [kernel has block cipher support])
 ],[
 ],[
-        AC_MSG_RESULT(no)
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-AC_DEFUN([LC_EXPORT_TRUNCATE_COMPLETE],
-[LB_CHECK_SYMBOL_EXPORT([truncate_complete_page],
-[mm/truncate.c],[
-AC_DEFINE(HAVE_TRUNCATE_COMPLETE_PAGE, 1,
-            [kernel export truncate_complete_page])
+#
+# check for struct hash_desc
+#
+AC_DEFUN([LC_STRUCT_HASH_DESC],
+[AC_MSG_CHECKING([if kernel has struct hash_desc])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/err.h>
+        #include <linux/crypto.h>
 ],[
 ],[
-])
-])
-
-AC_DEFUN([LC_EXPORT_TRUNCATE_RANGE],
-[LB_CHECK_SYMBOL_EXPORT([truncate_inode_pages_range],
-[mm/truncate.c],[
-AC_DEFINE(HAVE_TRUNCATE_RANGE, 1,
-            [kernel export truncate_inode_pages_range])
+        struct hash_desc foo;
 ],[
 ],[
-])
-])
-
-AC_DEFUN([LC_EXPORT_D_REHASH_COND],
-[LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-[fs/dcache.c],[
-AC_DEFINE(HAVE_D_REHASH_COND, 1,
-            [d_rehash_cond is exported by the kernel])
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_STRUCT_HASH_DESC, 1, [kernel has struct hash_desc])
 ],[
 ],[
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-AC_DEFUN([LC_EXPORT___D_REHASH],
-[LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-[fs/dcache.c],[
-AC_DEFINE(HAVE___D_REHASH, 1,
-            [__d_rehash is exported by the kernel])
+#
+# check for struct blkcipher_desc
+#
+AC_DEFUN([LC_STRUCT_BLKCIPHER_DESC],
+[AC_MSG_CHECKING([if kernel has struct blkcipher_desc])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/err.h>
+        #include <linux/crypto.h>
 ],[
 ],[
-])
-])
-
-AC_DEFUN([LC_EXPORT_D_MOVE_LOCKED],
-[LB_CHECK_SYMBOL_EXPORT([d_move_locked],
-[fs/dcache.c],[
-AC_DEFINE(HAVE_D_MOVE_LOCKED, 1,
-            [d_move_locked is exported by the kernel])
+        struct blkcipher_desc foo;
 ],[
 ],[
-])
-])
-
-AC_DEFUN([LC_EXPORT___D_MOVE],
-[LB_CHECK_SYMBOL_EXPORT([__d_move],
-[fs/dcache.c],[
-AC_DEFINE(HAVE___D_MOVE, 1,
-            [__d_move is exported by the kernel])
+        AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_STRUCT_BLKCIPHER_DESC, 1, [kernel has struct blkcipher_desc])
 ],[
 ],[
+        AC_MSG_RESULT([no])
 ])
 ])
 
 #
 ])
 ])
 
 #
-# LC_EXPORT_INVALIDATE_MAPPING_PAGES
-#
-# SLES9, RHEL4, RHEL5, vanilla 2.6.24 export invalidate_mapping_pages() but
-# SLES10 2.6.16 does not, for some reason.  For filter cache invalidation.
-#
-AC_DEFUN([LC_EXPORT_INVALIDATE_MAPPING_PAGES],
-    [LB_CHECK_SYMBOL_EXPORT([invalidate_mapping_pages], [mm/truncate.c], [
-         AC_DEFINE(HAVE_INVALIDATE_MAPPING_PAGES, 1,
-                        [exported invalidate_mapping_pages])],
-    [LB_CHECK_SYMBOL_EXPORT([invalidate_inode_pages], [mm/truncate.c], [
-         AC_DEFINE(HAVE_INVALIDATE_INODE_PAGES, 1,
-                        [exported invalidate_inode_pages])], [
-       AC_MSG_ERROR([no way to invalidate pages])
-  ])
-    ],[])
-])
-
-#
-# LC_EXPORT_FILEMAP_FDATASYNC_RANGE
-#
-# No standard kernels export this
+# 2.6.19 check for FS_RENAME_DOES_D_MOVE flag
 #
 #
-AC_DEFUN([LC_EXPORT_FILEMAP_FDATAWRITE_RANGE],
-[LB_CHECK_SYMBOL_EXPORT([filemap_fdatawrite_range],
-[mm/filemap.c],[
-AC_DEFINE(HAVE_FILEMAP_FDATAWRITE_RANGE, 1,
-            [filemap_fdatawrite_range is exported by the kernel])
-],[
-])
-])
-
-# The actual symbol exported varies among architectures, so we need
-# to check many symbols (but only in the current architecture.)  No
-# matter what symbol is exported, the kernel #defines node_to_cpumask
-# to the appropriate function and that's what we use.
-AC_DEFUN([LC_EXPORT_NODE_TO_CPUMASK],
-         [LB_CHECK_SYMBOL_EXPORT([node_to_cpumask],
-                                 [arch/$LINUX_ARCH/mm/numa.c],
-                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-                                            [node_to_cpumask is exported by
-                                             the kernel])]) # x86_64
-          LB_CHECK_SYMBOL_EXPORT([node_to_cpu_mask],
-                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
-                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-                                            [node_to_cpumask is exported by
-                                             the kernel])]) # ia64
-          LB_CHECK_SYMBOL_EXPORT([node_2_cpu_mask],
-                                 [arch/$LINUX_ARCH/kernel/smpboot.c],
-                                 [AC_DEFINE(HAVE_NODE_TO_CPUMASK, 1,
-                                            [node_to_cpumask is exported by
-                                             the kernel])]) # i386
-          ])
-
-# 2.6.22 lost second parameter for invalidate_bdev
-AC_DEFUN([LC_INVALIDATE_BDEV_2ARG],
-[AC_MSG_CHECKING([if invalidate_bdev has second argument])
+AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
+[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/buffer_head.h>
+        #include <linux/fs.h>
 ],[
 ],[
-        invalidate_bdev(NULL,0);
+        int v = FS_RENAME_DOES_D_MOVE;
 ],[
         AC_MSG_RESULT([yes])
 ],[
         AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_INVALIDATE_BDEV_2ARG, 1,
-                [invalidate_bdev has second argument])
+        AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
-# 2.6.18
-
+# 2.6.23
 
 # 2.6.23 have return type 'void' for unregister_blkdev
 AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
 
 # 2.6.23 have return type 'void' for unregister_blkdev
 AC_DEFUN([LC_UNREGISTER_BLKDEV_RETURN_INT],
@@ -1103,37 +1528,37 @@ LB_LINUX_TRY_COMPILE([
 ])
 
 # 2.6.23 change .sendfile to .splice_read
 ])
 
 # 2.6.23 change .sendfile to .splice_read
-# RHEL4 (-92 kernel) have both sendfile and .splice_read API
-AC_DEFUN([LC_KERNEL_SENDFILE],
-[AC_MSG_CHECKING([if kernel has .sendfile])
+AC_DEFUN([LC_KERNEL_SPLICE_READ],
+[AC_MSG_CHECKING([if kernel has .splice_read])
 LB_LINUX_TRY_COMPILE([
         #include <linux/fs.h>
 ],[
         struct file_operations file;
 
 LB_LINUX_TRY_COMPILE([
         #include <linux/fs.h>
 ],[
         struct file_operations file;
 
-        file.sendfile = NULL;
+        file.splice_read = NULL;
 ], [
         AC_MSG_RESULT([yes])
 ], [
         AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
-                [kernel has .sendfile])
+        AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
+                [kernel has .slice_read])
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
 # 2.6.23 change .sendfile to .splice_read
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
 # 2.6.23 change .sendfile to .splice_read
-AC_DEFUN([LC_KERNEL_SPLICE_READ],
-[AC_MSG_CHECKING([if kernel has .splice_read])
+# RHEL4 (-92 kernel) have both sendfile and .splice_read API
+AC_DEFUN([LC_KERNEL_SENDFILE],
+[AC_MSG_CHECKING([if kernel has .sendfile])
 LB_LINUX_TRY_COMPILE([
         #include <linux/fs.h>
 ],[
         struct file_operations file;
 
 LB_LINUX_TRY_COMPILE([
         #include <linux/fs.h>
 ],[
         struct file_operations file;
 
-        file.splice_read = NULL;
+        file.sendfile = NULL;
 ], [
         AC_MSG_RESULT([yes])
 ], [
         AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_KERNEL_SPLICE_READ, 1,
-                [kernel has .slice_read])
+        AC_DEFINE(HAVE_KERNEL_SENDFILE, 1,
+                [kernel has .sendfile])
 ],[
         AC_MSG_RESULT([no])
 ])
 ],[
         AC_MSG_RESULT([no])
 ])
@@ -1167,19 +1592,13 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-#2.6.23 has new shrinker API
+# 2.6.23 has new shrinker API
 AC_DEFUN([LC_REGISTER_SHRINKER],
 AC_DEFUN([LC_REGISTER_SHRINKER],
-[AC_MSG_CHECKING([if kernel has register_shrinker])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/mm.h>
-],[
-        register_shrinker(NULL);
-], [
-        AC_MSG_RESULT([yes])
+[LB_CHECK_SYMBOL_EXPORT([register_shrinker],
+[mm/vmscan.c],[
         AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
         AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-                [kernel has register_shrinker])
+                  [kernel exports register_shrinker])
 ],[
 ],[
-        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
@@ -1201,6 +1620,28 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
+# 2.6.23 exports exportfs_decode_fh
+AC_DEFUN([LC_EXPORTFS_DECODE_FH],
+[LB_CHECK_SYMBOL_EXPORT([exportfs_decode_fh],
+[fs/exportfs/expfs.c],[
+        AC_DEFINE(HAVE_EXPORTFS_DECODE_FH, 1,
+                [exportfs_decode_fh has been export])
+],[
+])
+])
+
+# 2.6.24
+
+# 2.6.24 need linux/mm_types.h included
+AC_DEFUN([LC_HAVE_MMTYPES_H],
+[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
+        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
+                [kernel has include/mm_types.h])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
 # 2.6.24 has bio_endio with 2 args
 AC_DEFUN([LC_BIO_ENDIO_2ARG],
 [AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
 # 2.6.24 has bio_endio with 2 args
 AC_DEFUN([LC_BIO_ENDIO_2ARG],
 [AC_MSG_CHECKING([if kernel has bio_endio with 2 args])
@@ -1239,16 +1680,6 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# 2.6.24 need linux/mm_types.h included
-AC_DEFUN([LC_HAVE_MMTYPES_H],
-[LB_CHECK_FILE([$LINUX/include/linux/mm_types.h], [
-        AC_DEFINE(HAVE_LINUX_MMTYPES_H, 1,
-                [kernel has include/mm_types.h])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
 # 2.6.24 removes long aged procfs entry -> deleted member
 AC_DEFUN([LC_PROCFS_DELETED],
 [AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
 # 2.6.24 removes long aged procfs entry -> deleted member
 AC_DEFUN([LC_PROCFS_DELETED],
 [AC_MSG_CHECKING([if kernel has deleted member in procfs entry struct])
@@ -1267,6 +1698,18 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
+# 2.6.24 has bdi_init()/bdi_destroy() functions.
+AC_DEFUN([LC_EXPORT_BDI_INIT],
+[LB_CHECK_SYMBOL_EXPORT([bdi_init],
+[mm/backing-dev.c],[
+        AC_DEFINE(HAVE_BDI_INIT, 1,
+                [bdi_init/bdi_destroy functions are present])
+],[
+])
+])
+
+# 2.6.25
+
 # 2.6.25 change define to inline
 AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
 [AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
 # 2.6.25 change define to inline
 AC_DEFUN([LC_MAPPING_CAP_WRITEBACK_DIRTY],
 [AC_MSG_CHECKING([if kernel have mapping_cap_writeback_dirty])
@@ -1285,7 +1728,7 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-
+# 2.6.26
 
 # 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
 AC_DEFUN([LC_FS_STRUCT_USE_PATH],
 
 # 2.6.26 isn't export set_fs_pwd and change paramter in fs struct
 AC_DEFUN([LC_FS_STRUCT_USE_PATH],
@@ -1302,280 +1745,14 @@ LB_LINUX_TRY_COMPILE([
 ], [
         AC_MSG_RESULT([yes])
         AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
 ], [
         AC_MSG_RESULT([yes])
         AC_DEFINE(HAVE_FS_STRUCT_USE_PATH, 1,
-                [fs_struct use path structure])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# LC_VFS_INTENT_PATCHES
-#
-# check if the kernel has the VFS intent patches
-AC_DEFUN([LC_VFS_INTENT_PATCHES],
-[AC_MSG_CHECKING([if the kernel has the VFS intent patches])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/fs.h>
-        #include <linux/namei.h>
-],[
-        struct nameidata nd;
-        struct lookup_intent *it;
-
-        it = &nd.intent;
-        intent_init(it, IT_OPEN);
-        it->d.lustre.it_disposition = 0;
-        it->d.lustre.it_data = NULL;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_VFS_INTENT_PATCHES, 1, [VFS intent patches are applied])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-AC_DEFUN([LC_S_TIME_GRAN],
-[AC_MSG_CHECKING([if super block has s_time_gran member])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-       struct super_block sb;
-
-        return sb.s_time_gran;
-],[
-       AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_S_TIME_GRAN, 1, [super block has s_time_gran member])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-AC_DEFUN([LC_SB_TIME_GRAN],
-[AC_MSG_CHECKING([if kernel has old get_sb_time_gran])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-       return get_sb_time_gran(NULL);
-],[
-        AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_SB_TIME_GRAN, 1, [kernel has old get_sb_time_gran])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP
-#
-# Check for our patched grab_cache_page_nowait_gfp() function
-# after 2.6.29 we can emulate this using add_to_page_cache_lru()
-#
-AC_DEFUN([LC_FUNC_GRAB_CACHE_PAGE_NOWAIT_GFP],
-[LB_CHECK_SYMBOL_EXPORT([grab_cache_page_nowait_gfp],
-[mm/filemap.c],[
-        AC_DEFINE(HAVE_GRAB_CACHE_PAGE_NOWAIT_GFP, 1,
-                  [kernel exports grab_cache_page_nowait_gfp])
-        ],
-        [LB_CHECK_SYMBOL_EXPORT([add_to_page_cache_lru],
-        [mm/filemap.c],[
-                AC_DEFINE(HAVE_ADD_TO_PAGE_CACHE_LRU, 1,
-                        [kernel exports add_to_page_cache_lru])
-        ],[
-        ])
-        ])
-])
-
-# ~2.6.12 merge patch from oracle to convert tree_lock from spinlock to rwlock
-AC_DEFUN([LC_RW_TREE_LOCK],
-[AC_MSG_CHECKING([if kernel has tree_lock as rwlock])
-tmp_flags="$EXTRA_KCFLAGS"
-EXTRA_KCFLAGS="-Werror"
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-       struct address_space a;
-
-       write_lock(&a.tree_lock);
-],[
-        AC_MSG_RESULT([yes])
-       AC_DEFINE(HAVE_RW_TREE_LOCK, 1, [kernel has tree_lock as rw_lock])
-],[
-        AC_MSG_RESULT([no])
-])
-EXTRA_KCFLAGS="$tmp_flags"
-])
-
-AC_DEFUN([LC_CONST_ACL_SIZE],
-[AC_MSG_CHECKING([calc acl size])
-tmp_flags="$CFLAGS"
-CFLAGS="$CFLAGS -I$LINUX/include -I$LINUX_OBJ/include -I$LINUX_OBJ/include2 -I$LINUX/arch/`uname -m|sed -e 's/ppc.*/powerpc/' -e 's/x86_64/x86/' -e 's/i.86/x86/'`/include $EXTRA_KCFLAGS"
-AC_TRY_RUN([
-#define __KERNEL__
-#include <linux/autoconf.h>
-#include <linux/types.h>
-#undef __KERNEL__
-// block include
-#define __LINUX_POSIX_ACL_H
-
-# ifdef CONFIG_FS_POSIX_ACL
-#  ifdef HAVE_XATTR_ACL
-#   include <linux/xattr_acl.h>
-#  endif
-#  ifdef HAVE_LINUX_POSIX_ACL_XATTR_H
-#   include <linux/posix_acl_xattr.h>
-#  endif
-# endif
-
-#include <lustre_acl.h>
-
-#include <stdio.h>
-
-int main(void)
-{
-    int size = mds_xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES);
-    FILE *f = fopen("acl.size","w+");
-    fprintf(f,"%d", size);
-    fclose(f);
-
-    return 0;
-}
-
-],[
-       acl_size=`cat acl.size`
-       AC_MSG_RESULT([ACL size $acl_size])
-        AC_DEFINE_UNQUOTED(XATTR_ACL_SIZE, AS_TR_SH([$acl_size]), [size of xattr acl])
-],[
-        AC_ERROR([ACL size can't computed])
-])
-CFLAGS="$tmp_flags"
-])
-
-#
-# check for crypto API
-#
-AC_DEFUN([LC_ASYNC_BLOCK_CIPHER],
-[AC_MSG_CHECKING([if kernel has block cipher support])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/err.h>
-        #include <linux/crypto.h>
-],[
-        struct crypto_blkcipher *tfm;
-        tfm = crypto_alloc_blkcipher("aes", 0, 0 );
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_ASYNC_BLOCK_CIPHER, 1, [kernel has block cipher support])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# check for struct hash_desc
-#
-AC_DEFUN([LC_STRUCT_HASH_DESC],
-[AC_MSG_CHECKING([if kernel has struct hash_desc])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/err.h>
-        #include <linux/crypto.h>
-],[
-        struct hash_desc foo;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_STRUCT_HASH_DESC, 1, [kernel has struct hash_desc])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# check for struct blkcipher_desc
-#
-AC_DEFUN([LC_STRUCT_BLKCIPHER_DESC],
-[AC_MSG_CHECKING([if kernel has struct blkcipher_desc])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/err.h>
-        #include <linux/crypto.h>
-],[
-        struct blkcipher_desc foo;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_STRUCT_BLKCIPHER_DESC, 1, [kernel has struct blkcipher_desc])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# 2.6.19 check for FS_RENAME_DOES_D_MOVE flag
-#
-AC_DEFUN([LC_FS_RENAME_DOES_D_MOVE],
-[AC_MSG_CHECKING([if kernel has FS_RENAME_DOES_D_MOVE flag])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        int v = FS_RENAME_DOES_D_MOVE;
-],[
-        AC_MSG_RESULT([yes])
-        AC_DEFINE(HAVE_FS_RENAME_DOES_D_MOVE, 1, [kernel has FS_RENAME_DOES_D_MOVE flag])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# LC_FUNC_F_OP_FLOCK
-#
-# rhel4.2 kernel has f_op->flock field
-#
-AC_DEFUN([LC_FUNC_F_OP_FLOCK],
-[AC_MSG_CHECKING([if struct file_operations has flock field])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        struct file_operations ll_file_operations_flock;
-        ll_file_operations_flock.flock = NULL;
-],[
-        AC_DEFINE(HAVE_F_OP_FLOCK, 1,
-                [struct file_operations has flock field])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-# vfs_symlink seems to have started out with 3 args until 2.6.7 where a
-# "mode" argument was added, but then again, in some later version it was
-# removed
-AC_DEFUN([LC_4ARGS_VFS_SYMLINK],
-[AC_MSG_CHECKING([if vfs_symlink wants 4 args])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/fs.h>
-],[
-       struct inode *dir;
-       struct dentry *dentry;
-       const char *oldname = NULL;
-       int mode = 0;
-
-       vfs_symlink(dir, dentry, oldname, mode);
-],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_4ARGS_VFS_SYMLINK, 1,
-                  [vfs_symlink wants 4 args])
+                [fs_struct use path structure])
 ],[
 ],[
-        AC_MSG_RESULT(no)
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
-# 2.6.23 has new shrinker API
-AC_DEFUN([LC_REGISTER_SHRINKER],
-[LB_CHECK_SYMBOL_EXPORT([register_shrinker],
-[mm/vmscan.c],[
-        AC_DEFINE(HAVE_REGISTER_SHRINKER, 1,
-                  [kernel exports register_shrinker])
-],[
-])
-])
+# 2.6.27
 
 
-#2.6.27
 AC_DEFUN([LC_INODE_PERMISION_2ARGS],
 [AC_MSG_CHECKING([inode_operations->permission has two args])
 LB_LINUX_TRY_COMPILE([
 AC_DEFUN([LC_INODE_PERMISION_2ARGS],
 [AC_MSG_CHECKING([inode_operations->permission has two args])
 LB_LINUX_TRY_COMPILE([
@@ -1625,82 +1802,6 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# vfs_symlink seems to have started out with 3 args until 2.6.7 where a
-# "mode" argument was added, but then again, in some later version it was
-# removed
-AC_DEFUN([LC_4ARGS_VFS_SYMLINK],
-[AC_MSG_CHECKING([if vfs_symlink wants 4 args])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/fs.h>
-],[
-       struct inode *dir;
-       struct dentry *dentry;
-       const char *oldname = NULL;
-       int mode = 0;
-
-       vfs_symlink(dir, dentry, oldname, mode);
-],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_4ARGS_VFS_SYMLINK, 1,
-                  [vfs_symlink wants 4 args])
-],[
-        AC_MSG_RESULT(no)
-])
-])
-
-# 2.6.27 sles11 remove the bi_hw_segments
-AC_DEFUN([LC_BI_HW_SEGMENTS],
-[AC_MSG_CHECKING([struct bio has a bi_hw_segments field])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/bio.h>
-],[
-        struct bio io;
-        io.bi_hw_segments = 0;
-],[
-        AC_DEFINE(HAVE_BI_HW_SEGMENTS, 1,
-                [struct bio has a bi_hw_segments field])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# 2.6.27 sles11 move the quotaio_v1{2}.h from include/linux to fs
-# 2.6.32 move the quotaio_v1{2}.h from fs to fs/quota
-AC_DEFUN([LC_HAVE_QUOTAIO_V1_H],
-[LB_CHECK_FILE([$LINUX/include/linux/quotaio_v1.h],[
-        AC_DEFINE(HAVE_QUOTAIO_V1_H, 1,
-                [kernel has include/linux/quotaio_v1.h])
-],[LB_CHECK_FILE([$LINUX/fs/quota/quotaio_v1.h],[
-               AC_DEFINE(HAVE_FS_QUOTA_QUOTAIO_V1_H, 1,
-                [kernel has fs/quota/quotaio_v1.h])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-])
-
-# sles10 sp2 need 5 parameter for vfs_symlink
-AC_DEFUN([LC_VFS_SYMLINK_5ARGS],
-[AC_MSG_CHECKING([vfs_symlink need 5 parameter])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        struct inode *dir = NULL;
-        struct dentry *dentry = NULL;
-        struct vfsmount *mnt = NULL;
-        const char * path = NULL;
-        vfs_symlink(dir, dentry, mnt, path, 0);
-],[
-        AC_DEFINE(HAVE_VFS_SYMLINK_5ARGS, 1,
-                [vfs_symlink need 5 parameteres])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
 # 2.6.27 removed the read_inode from super_operations.
 AC_DEFUN([LC_READ_INODE_IN_SBOPS],
 [AC_MSG_CHECKING([super_operations has a read_inode field])
 # 2.6.27 removed the read_inode from super_operations.
 AC_DEFUN([LC_READ_INODE_IN_SBOPS],
 [AC_MSG_CHECKING([super_operations has a read_inode field])
@@ -1718,38 +1819,6 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# 2.6.27 sles11 has sb_any_quota_active
-AC_DEFUN([LC_SB_ANY_QUOTA_ACTIVE],
-[AC_MSG_CHECKING([Kernel has sb_any_quota_active])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/quotaops.h>
-],[
-        sb_any_quota_active(NULL);
-],[
-        AC_DEFINE(HAVE_SB_ANY_QUOTA_ACTIVE, 1,
-                [Kernel has a sb_any_quota_active])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-# 2.6.27 sles11 has sb_has_quota_active
-AC_DEFUN([LC_SB_HAS_QUOTA_ACTIVE],
-[AC_MSG_CHECKING([Kernel has sb_has_quota_active])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/quotaops.h>
-],[
-        sb_has_quota_active(NULL, 0);
-],[
-        AC_DEFINE(HAVE_SB_HAS_QUOTA_ACTIVE, 1,
-                [Kernel has a sb_has_quota_active])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
 # 2.6.27 has inode_permission instead of permisson
 AC_DEFUN([LC_EXPORT_INODE_PERMISSION],
 [LB_CHECK_SYMBOL_EXPORT([inode_permission],
 # 2.6.27 has inode_permission instead of permisson
 AC_DEFUN([LC_EXPORT_INODE_PERMISSION],
 [LB_CHECK_SYMBOL_EXPORT([inode_permission],
@@ -1809,28 +1878,6 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-#
-# LC_LINUX_FIEMAP_H
-#
-# If we have fiemap.h
-# after 2.6.27 use fiemap.h in include/linux
-#
-AC_DEFUN([LC_LINUX_FIEMAP_H],
-[LB_CHECK_FILE([$LINUX/include/linux/fiemap.h],[
-        AC_MSG_CHECKING([if fiemap.h can be compiled])
-        LB_LINUX_TRY_COMPILE([
-                #include <linux/types.h>
-                #include <linux/fiemap.h>
-        ],[],[
-                AC_MSG_RESULT([yes])
-                AC_DEFINE(HAVE_LINUX_FIEMAP_H, 1, [Kernel has fiemap.h])
-        ],[
-                AC_MSG_RESULT([no])
-        ])
-],
-[])
-])
-
 # LC_LOCK_MAP_ACQUIRE
 # after 2.6.27 lock_map_acquire replaces lock_acquire
 AC_DEFUN([LC_LOCK_MAP_ACQUIRE],
 # LC_LOCK_MAP_ACQUIRE
 # after 2.6.27 lock_map_acquire replaces lock_acquire
 AC_DEFUN([LC_LOCK_MAP_ACQUIRE],
@@ -1848,26 +1895,95 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
+# 2.6.27.15-2 sles11
+
+# 2.6.27 sles11 remove the bi_hw_segments
+AC_DEFUN([LC_BI_HW_SEGMENTS],
+[AC_MSG_CHECKING([struct bio has a bi_hw_segments field])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/bio.h>
+],[
+        struct bio io;
+        io.bi_hw_segments = 0;
+],[
+        AC_DEFINE(HAVE_BI_HW_SEGMENTS, 1,
+                [struct bio has a bi_hw_segments field])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
 #
 #
-# LC_D_OBTAIN_ALIAS
-# starting from 2.6.28 kernel replaces d_alloc_anon() with
-# d_obtain_alias() for getting anonymous dentries
-#
-AC_DEFUN([LC_D_OBTAIN_ALIAS],
-[AC_MSG_CHECKING([d_obtain_alias exist in kernel])
+# 2.6.27 sles11 move the quotaio_v1{2}.h from include/linux to fs
+# 2.6.32 move the quotaio_v1{2}.h from fs to fs/quota
+AC_DEFUN([LC_HAVE_QUOTAIO_V1_H],
+[LB_CHECK_FILE([$LINUX/include/linux/quotaio_v1.h],[
+        AC_DEFINE(HAVE_QUOTAIO_V1_H, 1,
+                [kernel has include/linux/quotaio_v1.h])
+],[LB_CHECK_FILE([$LINUX/fs/quota/quotaio_v1.h],[
+               AC_DEFINE(HAVE_FS_QUOTA_QUOTAIO_V1_H, 1,
+                [kernel has fs/quota/quotaio_v1.h])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+])
+
+# sles10 sp2 need 5 parameter for vfs_symlink
+AC_DEFUN([LC_VFS_SYMLINK_5ARGS],
+[AC_MSG_CHECKING([vfs_symlink need 5 parameter])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/dcache.h>
+        #include <linux/fs.h>
 ],[
 ],[
-        d_obtain_alias(NULL);
+        struct inode *dir = NULL;
+        struct dentry *dentry = NULL;
+        struct vfsmount *mnt = NULL;
+        const char * path = NULL;
+        vfs_symlink(dir, dentry, mnt, path, 0);
 ],[
 ],[
-        AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1,
-                [d_obtain_alias exist in kernel])
+        AC_DEFINE(HAVE_VFS_SYMLINK_5ARGS, 1,
+                [vfs_symlink need 5 parameteres])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+# 2.6.27 sles11 has sb_any_quota_active
+AC_DEFUN([LC_SB_ANY_QUOTA_ACTIVE],
+[AC_MSG_CHECKING([Kernel has sb_any_quota_active])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/quotaops.h>
+],[
+        sb_any_quota_active(NULL);
+],[
+        AC_DEFINE(HAVE_SB_ANY_QUOTA_ACTIVE, 1,
+                [Kernel has a sb_any_quota_active])
+        AC_MSG_RESULT([yes])
+],[
+        AC_MSG_RESULT([no])
+])
+])
+
+# 2.6.27 sles11 has sb_has_quota_active
+AC_DEFUN([LC_SB_HAS_QUOTA_ACTIVE],
+[AC_MSG_CHECKING([Kernel has sb_has_quota_active])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/quotaops.h>
+],[
+        sb_has_quota_active(NULL, 0);
+],[
+        AC_DEFINE(HAVE_SB_HAS_QUOTA_ACTIVE, 1,
+                [Kernel has a sb_has_quota_active])
         AC_MSG_RESULT([yes])
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
         AC_MSG_RESULT([yes])
 ],[
         AC_MSG_RESULT([no])
 ])
 ])
 
+# 2.6.31
+
 # 2.6.31 replaces blk_queue_hardsect_size by blk_queue_logical_block_size function
 AC_DEFUN([LC_BLK_QUEUE_LOG_BLK_SIZE],
 [AC_MSG_CHECKING([if blk_queue_logical_block_size is defined])
 # 2.6.31 replaces blk_queue_hardsect_size by blk_queue_logical_block_size function
 AC_DEFUN([LC_BLK_QUEUE_LOG_BLK_SIZE],
 [AC_MSG_CHECKING([if blk_queue_logical_block_size is defined])
@@ -1884,6 +2000,8 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
+# 2.6.32
+
 # 2.6.32 add a limits member in struct request_queue.
 AC_DEFUN([LC_REQUEST_QUEUE_LIMITS],
 [AC_MSG_CHECKING([if request_queue has a limits field])
 # 2.6.32 add a limits member in struct request_queue.
 AC_DEFUN([LC_REQUEST_QUEUE_LIMITS],
 [AC_MSG_CHECKING([if request_queue has a limits field])
@@ -1901,24 +2019,34 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# RHEL6(backport from 2.6.34) removes 2 functions blk_queue_max_phys_segments and
-# blk_queue_max_hw_segments add blk_queue_max_segments
-AC_DEFUN([LC_BLK_QUEUE_MAX_SEGMENTS],
-[AC_MSG_CHECKING([if blk_queue_max_segments is defined])
+# 2.6.32 has bdi_register() functions.
+AC_DEFUN([LC_EXPORT_BDI_REGISTER],
+[LB_CHECK_SYMBOL_EXPORT([bdi_register],
+[mm/backing-dev.c],[
+        AC_DEFINE(HAVE_BDI_REGISTER, 1,
+                [bdi_register function is present])
+],[
+])
+])
+
+# 2.6.32 add s_bdi for super block
+AC_DEFUN([LC_SB_BDI],
+[AC_MSG_CHECKING([if super_block has s_bdi field])
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/blkdev.h>
+        #include <linux/fs.h>
 ],[
 ],[
-        blk_queue_max_segments(NULL, 0);
+        struct super_block sb;
+        sb.s_bdi = NULL;
 ],[
         AC_MSG_RESULT(yes)
 ],[
         AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1,
-                  [blk_queue_max_segments is defined])
+        AC_DEFINE(HAVE_SB_BDI, 1,
+                  [super_block has s_bdi field])
 ],[
         AC_MSG_RESULT(no)
 ])
 ])
 
 ],[
         AC_MSG_RESULT(no)
 ])
 ])
 
-# RHEL6(backport from 2.6.34) removes blk_queue_max_sectors and add blk_queue_max_hw_sectors
+# 2.6.32 removes blk_queue_max_sectors and add blk_queue_max_hw_sectors
 # check blk_queue_max_sectors and use it until disappear.
 AC_DEFUN([LC_BLK_QUEUE_MAX_SECTORS],
 [AC_MSG_CHECKING([if blk_queue_max_sectors is defined])
 # check blk_queue_max_sectors and use it until disappear.
 AC_DEFUN([LC_BLK_QUEUE_MAX_SECTORS],
 [AC_MSG_CHECKING([if blk_queue_max_sectors is defined])
@@ -1935,43 +2063,118 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-# 2.6.32 has new BDI interface.
-AC_DEFUN([LC_NEW_BACKING_DEV_INFO],
-[AC_MSG_CHECKING([if backing_dev_info has a wb_cnt field])
+# 2.6.32 replaces 2 functions blk_queue_max_phys_segments and blk_queue_max_hw_segments by blk_queue_max_segments
+AC_DEFUN([LC_BLK_QUEUE_MAX_SEGMENTS],
+[AC_MSG_CHECKING([if blk_queue_max_segments is defined])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/blkdev.h>
+],[
+        blk_queue_max_segments(NULL, 0);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1,
+                  [blk_queue_max_segments is defined])
+],[
+        AC_MSG_RESULT(no)
+])
+])
+
+# 2.6.32-71 adds an argument to shrink callback
+AC_DEFUN([LC_SHRINK_3ARGS],
+[AC_MSG_CHECKING([if shrink has 3 arguments])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/mm.h>
+],[
+        struct shrinker s;
+        return s.shrink(NULL, 0, 0);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_SHRINK_3ARGS, 1,
+                  [shrink has 3 arguments])
+],[
+        AC_MSG_RESULT(no)
+])
+])
+
+#
+# LC_EXT4_SINGLEDATA_TRANS_BLOCKS_SB
+#
+AC_DEFUN([LC_EXT4_SINGLEDATA_TRANS_BLOCKS_SB],
+[AC_MSG_CHECKING([if EXT4_SINGLEDATA_TRANS_BLOCKS takes the sb as argument])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-I$LINUX/fs"
 LB_LINUX_TRY_COMPILE([
 LB_LINUX_TRY_COMPILE([
-        #include <linux/backing-dev.h>
+        #include <ext4/ext4.h>
+        #include <ext4/ext4_jbd2.h>
 ],[
 ],[
-        struct backing_dev_info bdi;
-        bdi.wb_cnt = 0;
+        struct super_block sb;
+        EXT4_SINGLEDATA_TRANS_BLOCKS(&sb);
 ],[
         AC_MSG_RESULT(yes)
 ],[
         AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_NEW_BACKING_DEV_INFO, 1,
-                  [backing_dev_info has a wb_cnt field])
+        AC_DEFINE(LDISKFS_SINGLEDATA_TRANS_BLOCKS_HAS_SB, 1,
+                  [EXT4_SINGLEDATA_TRANS_BLOCKS takes sb as argument])
 ],[
         AC_MSG_RESULT(no)
 ])
 ],[
         AC_MSG_RESULT(no)
 ])
+EXTRA_KCFLAGS="$tmp_flags"
 ])
 
 ])
 
-# 2.6.24 has bdi_init()/bdi_destroy() functions.
-AC_DEFUN([LC_EXPORT_BDI_INIT],
-[LB_CHECK_SYMBOL_EXPORT([bdi_init],
-[mm/backing-dev.c],[
-        AC_DEFINE(HAVE_BDI_INIT, 1,
-                [bdi_init/bdi_destroy functions are present])
-],[
-])
+#
+# LC_QUOTA64
+# linux kernel have 64-bit limits support
+#
+AC_DEFUN([LC_QUOTA64],[
+        AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-I$LINUX/fs"
+        LB_LINUX_TRY_COMPILE([
+                #include <linux/kernel.h>
+                #include <linux/fs.h>
+                #ifdef HAVE_QUOTAIO_V1_H
+                # include <linux/quotaio_v2.h>
+                int versions[] = V2_INITQVERSIONS_R1;
+                struct v2_disk_dqblk_r1 dqblk_r1;
+                #else
+                # ifdef HAVE_FS_QUOTA_QUOTAIO_V1_H
+                #  include <quota/quotaio_v2.h>
+                # else
+                #  include <quotaio_v2.h>
+                # endif
+                struct v2r1_disk_dqblk dqblk_r1;
+                #endif
+        ],[],[
+                AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
+                AC_MSG_RESULT([yes])
+        ],[
+                LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
+                        AC_MSG_ERROR([You have got no 64-bit kernel quota support.])
+                ],[])
+                AC_MSG_RESULT([no])
+        ])
+EXTRA_KCFLAGS=$tmp_flags
 ])
 
 ])
 
-# 2.6.23 exports exportfs_decode_fh
-AC_DEFUN([LC_EXPORTFS_DECODE_FH],
-[LB_CHECK_SYMBOL_EXPORT([exportfs_decode_fh],
-[fs/exportfs/expfs.c],[
-        AC_DEFINE(HAVE_EXPORTFS_DECODE_FH, 1,
-                [exportfs_decode_fh has been export])
+#
+# LC_D_OBTAIN_ALIAS
+# starting from 2.6.28 kernel replaces d_alloc_anon() with
+# d_obtain_alias() for getting anonymous dentries
+#
+AC_DEFUN([LC_D_OBTAIN_ALIAS],
+[AC_MSG_CHECKING([d_obtain_alias exist in kernel])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/dcache.h>
+],[
+        d_obtain_alias(NULL);
+],[
+        AC_DEFINE(HAVE_D_OBTAIN_ALIAS, 1,
+                [d_obtain_alias exist in kernel])
+        AC_MSG_RESULT([yes])
 ],[
 ],[
+        AC_MSG_RESULT([no])
 ])
 ])
 
 ])
 ])
 
+
 #
 # LC_PROG_LINUX
 #
 #
 # LC_PROG_LINUX
 #
@@ -2018,7 +2221,6 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_CAPA_CRYPTO
          LC_CONFIG_RMTCLIENT
          LC_CONFIG_GSS
          LC_CAPA_CRYPTO
          LC_CONFIG_RMTCLIENT
          LC_CONFIG_GSS
-         LC_FUNC_MS_FLOCK_LOCK
          LC_FUNC_HAVE_CAN_SLEEP_ARG
          LC_FUNC_F_OP_FLOCK
          LC_QUOTA_READ
          LC_FUNC_HAVE_CAN_SLEEP_ARG
          LC_FUNC_F_OP_FLOCK
          LC_QUOTA_READ
@@ -2026,10 +2228,6 @@ AC_DEFUN([LC_PROG_LINUX],
          LC_FUNC_RCU
          LC_PERCPU_COUNTER
          LC_TASK_CLENV_STORE
          LC_FUNC_RCU
          LC_PERCPU_COUNTER
          LC_TASK_CLENV_STORE
-         LC_4ARGS_VFS_SYMLINK
-
-         # does the kernel have VFS intent patches?
-         LC_VFS_INTENT_PATCHES
 
          # ~2.6.11
          LC_S_TIME_GRAN
 
          # ~2.6.11
          LC_S_TIME_GRAN
@@ -2061,6 +2259,8 @@ AC_DEFUN([LC_PROG_LINUX],
          if test x$enable_server = xyes ; then
                 LC_EXPORT_INVALIDATE_MAPPING_PAGES
          fi
          if test x$enable_server = xyes ; then
                 LC_EXPORT_INVALIDATE_MAPPING_PAGES
          fi
+         LC_EXT4_DISCARD_PREALLOCATIONS
+         LC_EXT_INSERT_EXTENT_WITH_5ARGS
 
          #2.6.18 + RHEL5 (fc6)
          LC_PG_FS_MISC
 
          #2.6.18 + RHEL5 (fc6)
          LC_PG_FS_MISC
@@ -2132,9 +2332,12 @@ AC_DEFUN([LC_PROG_LINUX],
 
          # 2.6.32
          LC_REQUEST_QUEUE_LIMITS
 
          # 2.6.32
          LC_REQUEST_QUEUE_LIMITS
-         LC_NEW_BACKING_DEV_INFO
+         LC_EXPORT_BDI_REGISTER
+         LC_SB_BDI
          LC_BLK_QUEUE_MAX_SECTORS
          LC_BLK_QUEUE_MAX_SEGMENTS
          LC_BLK_QUEUE_MAX_SECTORS
          LC_BLK_QUEUE_MAX_SEGMENTS
+         LC_SHRINK_3ARGS
+         LC_EXT4_SINGLEDATA_TRANS_BLOCKS_SB
 
          #
          if test x$enable_server = xyes ; then
 
          #
          if test x$enable_server = xyes ; then
@@ -2310,18 +2513,6 @@ LC_CONFIG_PINGER
 LC_CONFIG_LIBLUSTRE_RECOVERY
 ])
 
 LC_CONFIG_LIBLUSTRE_RECOVERY
 ])
 
-AC_DEFUN([LC_CONFIG_LRU_RESIZE],
-[AC_MSG_CHECKING([whether to enable lru self-adjusting])
-AC_ARG_ENABLE([lru_resize],
-       AC_HELP_STRING([--enable-lru-resize],
-                       [enable lru resize support]),
-       [],[enable_lru_resize='yes'])
-AC_MSG_RESULT([$enable_lru_resize])
-if test x$enable_lru_resize != xno; then
-   AC_DEFINE(HAVE_LRU_RESIZE_SUPPORT, 1, [Enable lru resize support])
-fi
-])
-
 #
 # LC_CONFIG_QUOTA
 #
 #
 # LC_CONFIG_QUOTA
 #
@@ -2334,19 +2525,6 @@ AC_DEFUN([LC_CONFIG_QUOTA],
        [],[enable_quota='yes'])
 ])
 
        [],[enable_quota='yes'])
 ])
 
-# whether to enable quota support(kernel modules)
-AC_DEFUN([LC_QUOTA_MODULE],
-[if test x$enable_quota != xno; then
-    LB_LINUX_CONFIG([QUOTA],[
-       enable_quota_module='yes'
-       AC_DEFINE(HAVE_QUOTA_SUPPORT, 1, [Enable quota support])
-    ],[
-       enable_quota_module='no'
-       AC_MSG_WARN([quota is not enabled because the kernel - lacks quota support])
-    ])
-fi
-])
-
 AC_DEFUN([LC_QUOTA],
 [#check global
 LC_CONFIG_QUOTA
 AC_DEFUN([LC_QUOTA],
 [#check global
 LC_CONFIG_QUOTA
@@ -2356,21 +2534,6 @@ AC_CHECK_HEADER(sys/quota.h,
                 [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
 ])
 
                 [AC_MSG_ERROR([don't find <sys/quota.h> in your system])])
 ])
 
-AC_DEFUN([LC_QUOTA_READ],
-[AC_MSG_CHECKING([if kernel supports quota_read])
-LB_LINUX_TRY_COMPILE([
-       #include <linux/fs.h>
-],[
-       struct super_operations sp;
-        void *i = (void *)sp.quota_read;
-],[
-       AC_MSG_RESULT([yes])
-       AC_DEFINE(KERNEL_SUPPORTS_QUOTA_READ, 1, [quota_read found])
-],[
-       AC_MSG_RESULT([no])
-])
-])
-
 #
 # LC_CONFIG_SPLIT
 #
 #
 # LC_CONFIG_SPLIT
 #
@@ -2388,144 +2551,6 @@ if test x$enable_split != xno; then
 fi
 ])
 
 fi
 ])
 
-#
-# LC_COOKIE_FOLLOW_LINK
-#
-# kernel 2.6.13+ ->follow_link returns a cookie
-#
-
-AC_DEFUN([LC_COOKIE_FOLLOW_LINK],
-[AC_MSG_CHECKING([if inode_operations->follow_link returns a cookie])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-        #include <linux/namei.h>
-],[
-        struct dentry dentry;
-        struct nameidata nd;
-
-        dentry.d_inode->i_op->put_link(&dentry, &nd, NULL);
-],[
-        AC_DEFINE(HAVE_COOKIE_FOLLOW_LINK, 1, [inode_operations->follow_link returns a cookie])
-        AC_MSG_RESULT([yes])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# LC_FUNC_RCU
-#
-# kernels prior than 2.6.0(?) have no RCU supported; in kernel 2.6.5(SUSE),
-# call_rcu takes three parameters.
-#
-AC_DEFUN([LC_FUNC_RCU],
-[AC_MSG_CHECKING([if kernel have RCU supported])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/rcupdate.h>
-],[],[
-        AC_DEFINE(HAVE_RCU, 1, [have RCU defined])
-        AC_MSG_RESULT([yes])
-
-        AC_MSG_CHECKING([if call_rcu takes three parameters])
-        LB_LINUX_TRY_COMPILE([
-                #include <linux/rcupdate.h>
-        ],[
-                struct rcu_head rh;
-                call_rcu(&rh, (void (*)(struct rcu_head *))1, NULL);
-        ],[
-                AC_DEFINE(HAVE_CALL_RCU_PARAM, 1, [call_rcu takes three parameters])
-                AC_MSG_RESULT([yes])
-        ],[
-                AC_MSG_RESULT([no])
-        ])
-
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
-#
-# LC_QUOTA64
-# linux kernel have 64-bit limits support
-#
-AC_DEFUN([LC_QUOTA64],[
-        AC_MSG_CHECKING([if kernel has 64-bit quota limits support])
-tmp_flags="$EXTRA_KCFLAGS"
-EXTRA_KCFLAGS="-I$LINUX/fs"
-        LB_LINUX_TRY_COMPILE([
-                #include <linux/kernel.h>
-                #include <linux/fs.h>
-                #ifdef HAVE_QUOTAIO_V1_H
-                # include <linux/quotaio_v2.h>
-                int versions[] = V2_INITQVERSIONS_R1;
-                struct v2_disk_dqblk_r1 dqblk_r1;
-                #else
-                # ifdef HAVE_FS_QUOTA_QUOTAIO_V1_H
-                #  include <quota/quotaio_v2.h>
-                # else
-                #  include <quotaio_v2.h>
-                # endif
-                struct v2r1_disk_dqblk dqblk_r1;
-                #endif
-        ],[],[
-                AC_DEFINE(HAVE_QUOTA64, 1, [have quota64])
-                AC_MSG_RESULT([yes])
-        ],[
-                LB_CHECK_FILE([$LINUX/include/linux/lustre_version.h],[
-                        AC_MSG_ERROR([You have got no 64-bit kernel quota support.])
-                ],[])
-                AC_MSG_RESULT([no])
-        ])
-EXTRA_KCFLAGS=$tmp_flags
-])
-
-# LC_SECURITY_PLUG  # for SLES10 SP2
-# check security plug in sles10 sp2 kernel
-AC_DEFUN([LC_SECURITY_PLUG],
-[AC_MSG_CHECKING([If kernel has security plug support])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/fs.h>
-],[
-        struct dentry   *dentry;
-        struct vfsmount *mnt;
-        struct iattr    *iattr;
-
-        notify_change(dentry, mnt, iattr);
-],[
-        AC_MSG_RESULT(yes)
-        AC_DEFINE(HAVE_SECURITY_PLUG, 1,
-                [SLES10 SP2 use extra parameter in vfs])
-],[
-        AC_MSG_RESULT(no)
-])
-])
-
-AC_DEFUN([LC_PERCPU_COUNTER],
-[AC_MSG_CHECKING([if have struct percpu_counter defined])
-LB_LINUX_TRY_COMPILE([
-        #include <linux/percpu_counter.h>
-],[],[
-        AC_DEFINE(HAVE_PERCPU_COUNTER, 1, [percpu_counter found])
-        AC_MSG_RESULT([yes])
-
-        AC_MSG_CHECKING([if percpu_counter_inc takes the 2nd argument])
-        LB_LINUX_TRY_COMPILE([
-                #include <linux/percpu_counter.h>
-        ],[
-                struct percpu_counter c;
-                percpu_counter_init(&c, 0);
-        ],[
-                AC_DEFINE(HAVE_PERCPU_2ND_ARG, 1, [percpu_counter_init has two
-                                                   arguments])
-                AC_MSG_RESULT([yes])
-        ],[
-                AC_MSG_RESULT([no])
-        ])
-],[
-        AC_MSG_RESULT([no])
-])
-])
-
 AC_DEFUN([LC_TASK_CLENV_TUX_INFO],
 [AC_MSG_CHECKING([tux_info])
 LB_LINUX_TRY_COMPILE([
 AC_DEFUN([LC_TASK_CLENV_TUX_INFO],
 [AC_MSG_CHECKING([tux_info])
 LB_LINUX_TRY_COMPILE([
@@ -2542,14 +2567,6 @@ LB_LINUX_TRY_COMPILE([
 ])
 ])
 
 ])
 ])
 
-AC_DEFUN([LC_TASK_CLENV_STORE],
-[
-        AC_MSG_CHECKING([if we can store cl_env in task_struct])
-        if test x$have_task_clenv_store != xyes ; then
-                LC_TASK_CLENV_TUX_INFO
-        fi
-])
-
 #
 # LC_LLITE_LLOOP_MODULE
 # lloop_llite.ko does not currently work with page sizes
 #
 # LC_LLITE_LLOOP_MODULE
 # lloop_llite.ko does not currently work with page sizes
index 00875ee..cc504e5 100644 (file)
@@ -690,8 +690,16 @@ static inline int ll_crypto_hmac(struct crypto_tfm *tfm,
 #define cpu_to_node(cpu)         0
 #endif
 
 #define cpu_to_node(cpu)         0
 #endif
 
-#ifdef HAVE_REGISTER_SHRINKER
+#ifndef HAVE_REGISTER_SHRINKER
+#define KERN_SHRINKER(name) name(int nr_to_scan, gfp_t gfp_mask)
+#else
+#ifdef HAVE_SHRINK_3ARGS
+typedef int (*cfs_shrinker_t)(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
+#define KERN_SHRINKER(name) name(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+#else
 typedef int (*cfs_shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
 typedef int (*cfs_shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
+#define KERN_SHRINKER(name) name(int nr_to_scan, gfp_t gfp_mask)
+#endif
 
 static inline
 struct shrinker *cfs_set_shrinker(int seek, cfs_shrinker_t func)
 
 static inline
 struct shrinker *cfs_set_shrinker(int seek, cfs_shrinker_t func)
index 8c3e8c8..6c46056 100644 (file)
@@ -436,12 +436,13 @@ struct lustre_sb_info {
         struct ll_sb_info        *lsi_llsbi;   /* add'l client sbi info */
         struct vfsmount          *lsi_srv_mnt; /* the one server mount */
         cfs_atomic_t              lsi_mounts;  /* references to the srv_mnt */
         struct ll_sb_info        *lsi_llsbi;   /* add'l client sbi info */
         struct vfsmount          *lsi_srv_mnt; /* the one server mount */
         cfs_atomic_t              lsi_mounts;  /* references to the srv_mnt */
-        struct backing_dev_info   bdi;         /* Each client mountpoint needs own backing_dev_info */
+        struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs own backing_dev_info */
 };
 
 #define LSI_SERVER                       0x00000001
 #define LSI_UMOUNT_FORCE                 0x00000010
 #define LSI_UMOUNT_FAILOVER              0x00000020
 };
 
 #define LSI_SERVER                       0x00000001
 #define LSI_UMOUNT_FORCE                 0x00000010
 #define LSI_UMOUNT_FAILOVER              0x00000020
+#define LSI_BDI_INITIALIZED              0x00000040
 
 #define     s2lsi(sb)        ((struct lustre_sb_info *)((sb)->s_fs_info))
 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
 
 #define     s2lsi(sb)        ((struct lustre_sb_info *)((sb)->s_fs_info))
 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
diff --git a/lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel6.patch b/lustre/kernel_patches/patches/blkdev_tunables-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..d62c5bc
--- /dev/null
@@ -0,0 +1,13 @@
+Index: b/include/linux/blkdev.h
+===================================================================
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1026,7 +1026,7 @@ extern int blk_verify_command(unsigned c
+ enum blk_default_limits {
+       BLK_MAX_SEGMENTS        = 128,
+       BLK_SAFE_MAX_SECTORS    = 255,
+-      BLK_DEF_MAX_SECTORS     = 1024,
++      BLK_DEF_MAX_SECTORS     = 2048,
+       BLK_MAX_SEGMENT_SIZE    = 65536,
+       BLK_SEG_BOUNDARY_MASK   = 0xFFFFFFFFUL,
+ };
diff --git a/lustre/kernel_patches/patches/dev_read_only-2.6.32-rhel6.patch b/lustre/kernel_patches/patches/dev_read_only-2.6.32-rhel6.patch
new file mode 100644 (file)
index 0000000..30c7575
--- /dev/null
@@ -0,0 +1,172 @@
+This functionality is mainly used during testing, in order to
+simulate a server crash for ldiskfs by discarding all of the
+writes to the filesystem.  For recovery testing we could simulate
+this by using a special loopback or DM device that also discards
+writes to the device.
+
+This functionality is also used by target "failback" in order
+to speed up service shutdown and takeover by the other node
+during controlled operation.  However, it would also be possible
+to do this by simply allowing all of the in-flight requests to
+complete and then waiting for the service to stop.  This will
+also be needed by the DMU-OSD, because discarding of writes on
+a DMU-based target is not safe as it could trigger a storage
+failure if the data is ever read from disk again and the
+checksum does not match that expected by the block pointer.
+
+Initial efforts to remove this patch are under way in bug 20776.
+Once this work comes to fruition this patch can be dropped.
+
+Index: linux-2.6.32-71.18.1.el6-master/block/blk-core.c
+===================================================================
+--- linux-2.6.32-71.18.1.el6-master.orig/block/blk-core.c      2011-03-05 11:35:40.404043293 +0800
++++ linux-2.6.32-71.18.1.el6-master/block/blk-core.c   2011-03-11 20:21:10.492302510 +0800
+@@ -1405,6 +1405,8 @@
+ #endif /* CONFIG_FAIL_MAKE_REQUEST */
++int dev_check_rdonly(struct block_device *bdev);
++
+ /*
+  * Check whether this bio extends beyond the end of the device.
+  */
+@@ -1506,6 +1508,12 @@
+               if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
+                       goto end_io;
++              /* This is Lustre's dev_rdonly check */
++              if (bio_rw(bio) == WRITE && dev_check_rdonly(bio->bi_bdev)) {
++                      bio_endio(bio, 0);
++                      break;
++              }
++
+               if (should_fail_request(bio))
+                       goto end_io;
+@@ -2578,6 +2586,99 @@
+ }
+ EXPORT_SYMBOL(kblockd_schedule_work);
++ /*
++ * Debug code for turning block devices "read-only" (will discard writes
++ * silently).  This is for filesystem crash/recovery testing.
++ */
++struct deventry {
++      dev_t dev;
++      struct deventry *next;
++};
++
++static struct deventry *devlist = NULL;
++static spinlock_t devlock = SPIN_LOCK_UNLOCKED;
++
++int dev_check_rdonly(struct block_device *bdev)
++{
++      struct deventry *cur;
++
++      if (!bdev)
++              return 0;
++
++      spin_lock(&devlock);
++      cur = devlist;
++      while(cur) {
++              if (bdev->bd_dev == cur->dev) {
++                      spin_unlock(&devlock);
++                      return 1;
++              }
++              cur = cur->next;
++      }
++      spin_unlock(&devlock);
++      return 0;
++}
++
++void dev_set_rdonly(struct block_device *bdev)
++{
++      struct deventry *newdev, *cur;
++
++      if (!bdev)
++              return;
++
++      newdev = kmalloc(sizeof(struct deventry), GFP_KERNEL);
++      if (!newdev)
++              return;
++
++      spin_lock(&devlock);
++      cur = devlist;
++      while(cur) {
++              if (bdev->bd_dev == cur->dev) {
++                      spin_unlock(&devlock);
++                      kfree(newdev);
++                      return;
++              }
++              cur = cur->next;
++      }
++      newdev->dev = bdev->bd_dev;
++      newdev->next = devlist;
++      devlist = newdev;
++      spin_unlock(&devlock);
++      printk(KERN_WARNING "Turning device %s (%#x) read-only\n",
++              bdev->bd_disk ? bdev->bd_disk->disk_name : "", bdev->bd_dev);
++}
++
++void dev_clear_rdonly(struct block_device *bdev)
++{
++      struct deventry *cur, *last = NULL;
++
++      if (!bdev)
++              return;
++
++      spin_lock(&devlock);
++      cur = devlist;
++      while(cur) {
++              if (bdev->bd_dev == cur->dev) {
++                      if (last)
++                              last->next = cur->next;
++                      else
++                              devlist = cur->next;
++                      spin_unlock(&devlock);
++                      kfree(cur);
++                      printk(KERN_WARNING "Removing read-only on %s (%#x)\n",
++                              bdev->bd_disk ? bdev->bd_disk->disk_name :
++                                              "unknown block",
++                              bdev->bd_dev);
++                      return;
++              }
++              last = cur;
++              cur = cur->next;
++      }
++      spin_unlock(&devlock);
++}
++
++EXPORT_SYMBOL(dev_set_rdonly);
++EXPORT_SYMBOL(dev_clear_rdonly);
++EXPORT_SYMBOL(dev_check_rdonly);
+ int __init blk_dev_init(void)
+ {
+       BUILD_BUG_ON(__REQ_NR_BITS > 8 *
+Index: linux-2.6.32-71.18.1.el6-master/fs/block_dev.c
+===================================================================
+--- linux-2.6.32-71.18.1.el6-master.orig/fs/block_dev.c        2011-03-05 11:35:40.486042782 +0800
++++ linux-2.6.32-71.18.1.el6-master/fs/block_dev.c     2011-03-05 11:37:35.624324775 +0800
+@@ -1389,6 +1389,7 @@
+               if (bdev != bdev->bd_contains)
+                       victim = bdev->bd_contains;
+               bdev->bd_contains = NULL;
++              dev_clear_rdonly(bdev);
+       }
+       unlock_kernel();
+       mutex_unlock(&bdev->bd_mutex);
+Index: linux-2.6.32-71.18.1.el6-master/include/linux/fs.h
+===================================================================
+--- linux-2.6.32-71.18.1.el6-master.orig/include/linux/fs.h    2011-03-05 11:35:40.445043037 +0800
++++ linux-2.6.32-71.18.1.el6-master/include/linux/fs.h 2011-03-05 11:37:35.726324137 +0800
+@@ -2204,6 +2204,10 @@
+ extern void submit_bio(int, struct bio *);
+ extern int bdev_read_only(struct block_device *);
+ #endif
++#define HAVE_CLEAR_RDONLY_ON_PUT
++extern void dev_set_rdonly(struct block_device *bdev);
++extern int dev_check_rdonly(struct block_device *bdev);
++extern void dev_clear_rdonly(struct block_device *bdev);
+ extern int set_blocksize(struct block_device *, int);
+ extern int sb_set_blocksize(struct super_block *, int);
+ extern int sb_min_blocksize(struct super_block *, int);
diff --git a/lustre/kernel_patches/patches/export-2.6.32-vanilla.patch b/lustre/kernel_patches/patches/export-2.6.32-vanilla.patch
new file mode 100644 (file)
index 0000000..0cb7884
--- /dev/null
@@ -0,0 +1,17 @@
+security_inode_unlink() is used in filter_vfs_unlink()
+to avoid lock ordering problems.  I'm not sure if this
+is still needed with ext4, and it definitely looks to
+be gone with DMU changes.
+
+Index: linux+rh+chaos/security/security.c
+===================================================================
+--- linux+rh+chaos.orig/security/security.c
++++ linux+rh+chaos/security/security.c
+@@ -60,6 +60,7 @@ int __init security_init(void)
+       return 0;
+ }
++EXPORT_SYMBOL(security_inode_unlink);
+ /* Save user chosen LSM */
+ static int __init choose_lsm(char *str)
diff --git a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
new file mode 100644 (file)
index 0000000..f219771
--- /dev/null
@@ -0,0 +1,228 @@
+This allows the jbd transaction commit callbacks to be registered.
+The ext4 jbd2 code has a different commit callback (one per transaction)
+that could be used to provide equivalent functionality.  This would
+require modifying the existing ext4 commit callback (used by mballoc
+when freeing data blocks) to be mutiplexed so it will store 2 different
+callback functions and 2 different lists of callback data.
+
+Index: linux+rh+chaos/include/linux/jbd2.h
+===================================================================
+--- linux+rh+chaos.orig/include/linux/jbd2.h
++++ linux+rh+chaos/include/linux/jbd2.h
+@@ -415,6 +415,27 @@ struct jbd2_inode {
+       unsigned int i_flags;
+ };
++#define HAVE_JOURNAL_CALLBACK_STATUS
++/**
++ *   struct journal_callback - Base structure for callback information.
++ *   @jcb_list: list information for other callbacks attached to the same handle.
++ *   @jcb_func: Function to call with this callback structure.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to journal_callback_set().
++ *
++ *   This is used internally by jbd2 to maintain callback information.
++ *
++ *   See journal_callback_set for more information.
++ **/
++struct journal_callback {
++      struct list_head jcb_list;              /* t_jcb_lock */
++      void (*jcb_func)(struct journal_callback *jcb, int error);
++      /* user data goes here */
++};
++
+ struct jbd2_revoke_table_s;
+ /**
+@@ -423,6 +444,7 @@ struct jbd2_revoke_table_s;
+  * @h_transaction: Which compound transaction is this update a part of?
+  * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
+  * @h_ref: Reference count on this handle
++ * @h_jcb: List of application registered callbacks for this handle.
+  * @h_err: Field for caller's use to track errors through large fs operations
+  * @h_sync: flag for sync-on-close
+  * @h_jdata: flag to force data journaling
+@@ -448,6 +470,13 @@ struct handle_s
+       /* operations */
+       int                     h_err;
++      /*
++       * List of application registered callbacks for this handle. The
++       * function(s) will be called after the transaction that this handle is
++       * part of has been committed to disk. [t_jcb_lock]
++       */
++      struct list_head        h_jcb;
++
+       /* Flags [no locking] */
+       unsigned int    h_sync:         1;      /* sync-on-close */
+       unsigned int    h_jdata:        1;      /* force data journaling */
+@@ -503,6 +532,8 @@ struct transaction_chp_stats_s {
+  *    j_state_lock
+  *    ->j_list_lock                   (journal_unmap_buffer)
+  *
++ *    t_handle_lock
++ *    ->t_jcb_lock
+  */
+ struct transaction_s
+@@ -659,6 +690,16 @@ struct transaction_s
+        * structures associated with the transaction
+        */
+       struct list_head        t_private_list;
++ 
++      /*
++       * Protects the callback list
++       */
++      spinlock_t              t_jcb_lock;
++      /*
++       * List of registered callback functions for this transaction.
++       * Called when the transaction is committed. [t_jcb_lock]
++       */
++      struct list_head        t_jcb;
+ };
+ struct transaction_run_stats_s {
+@@ -1115,6 +1156,9 @@ extern int        jbd2_journal_stop(handle_t *
+ extern int     jbd2_journal_flush (journal_t *);
+ extern void    jbd2_journal_lock_updates (journal_t *);
+ extern void    jbd2_journal_unlock_updates (journal_t *);
++extern void    jbd2_journal_callback_set(handle_t *handle,
++                                      void (*fn)(struct journal_callback *,int),
++                                      struct journal_callback *jcb);
+ extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
+                               struct block_device *fs_dev,
+Index: linux+rh+chaos/fs/jbd2/checkpoint.c
+===================================================================
+--- linux+rh+chaos.orig/fs/jbd2/checkpoint.c
++++ linux+rh+chaos/fs/jbd2/checkpoint.c
+@@ -759,6 +759,7 @@ void __jbd2_journal_drop_transaction(jou
+       J_ASSERT(transaction->t_checkpoint_list == NULL);
+       J_ASSERT(transaction->t_checkpoint_io_list == NULL);
+       J_ASSERT(transaction->t_updates == 0);
++      J_ASSERT(list_empty(&transaction->t_jcb));
+       J_ASSERT(journal->j_committing_transaction != transaction);
+       J_ASSERT(journal->j_running_transaction != transaction);
+Index: linux+rh+chaos/fs/jbd2/commit.c
+===================================================================
+--- linux+rh+chaos.orig/fs/jbd2/commit.c
++++ linux+rh+chaos/fs/jbd2/commit.c
+@@ -857,6 +857,30 @@ wait_for_iobuf:
+            transaction can be removed from any checkpoint list it was on
+            before. */
++      /*
++       * Call any callbacks that had been registered for handles in this
++       * transaction.  It is up to the callback to free any allocated
++       * memory.
++       *
++       * The spinlocking (t_jcb_lock) here is surely unnecessary...
++       */
++      spin_lock(&commit_transaction->t_jcb_lock);
++      if (!list_empty(&commit_transaction->t_jcb)) {
++              struct list_head *p, *n;
++              int error = is_journal_aborted(journal);
++
++              list_for_each_safe(p, n, &commit_transaction->t_jcb) {
++                      struct journal_callback *jcb;
++
++                      jcb = list_entry(p, struct journal_callback, jcb_list);
++                      list_del(p);
++                      spin_unlock(&commit_transaction->t_jcb_lock);
++                      jcb->jcb_func(jcb, error);
++                      spin_lock(&commit_transaction->t_jcb_lock);
++              }
++      }
++      spin_unlock(&commit_transaction->t_jcb_lock);
++
+       jbd_debug(3, "JBD: commit phase 6\n");
+       J_ASSERT(list_empty(&commit_transaction->t_inode_list));
+Index: linux+rh+chaos/fs/jbd2/journal.c
+===================================================================
+--- linux+rh+chaos.orig/fs/jbd2/journal.c
++++ linux+rh+chaos/fs/jbd2/journal.c
+@@ -90,6 +90,8 @@ EXPORT_SYMBOL(jbd2_journal_file_inode);
+ EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
+ EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
+ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
++EXPORT_SYMBOL(jbd2_journal_callback_set);
++EXPORT_SYMBOL(jbd2_journal_bmap);
+ static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
+ static void __journal_abort_soft (journal_t *journal, int errno);
+Index: linux+rh+chaos/fs/jbd2/transaction.c
+===================================================================
+--- linux+rh+chaos.orig/fs/jbd2/transaction.c
++++ linux+rh+chaos/fs/jbd2/transaction.c
+@@ -52,7 +52,9 @@ jbd2_get_transaction(journal_t *journal,
+       transaction->t_start_time = ktime_get();
+       transaction->t_tid = journal->j_transaction_sequence++;
+       transaction->t_expires = jiffies + journal->j_commit_interval;
++      INIT_LIST_HEAD(&transaction->t_jcb);
+       spin_lock_init(&transaction->t_handle_lock);
++      spin_lock_init(&transaction->t_jcb_lock);
+       INIT_LIST_HEAD(&transaction->t_inode_list);
+       INIT_LIST_HEAD(&transaction->t_private_list);
+@@ -257,6 +259,7 @@ static handle_t *new_handle(int nblocks)
+       memset(handle, 0, sizeof(*handle));
+       handle->h_buffer_credits = nblocks;
+       handle->h_ref = 1;
++      INIT_LIST_HEAD(&handle->h_jcb);
+       lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
+                                               &jbd2_handle_key, 0);
+@@ -1216,6 +1219,36 @@ drop:
+ }
+ /**
++ * void jbd2_journal_callback_set() -  Register a callback function for this handle.
++ * @handle: handle to attach the callback to.
++ * @func: function to callback.
++ * @jcb:  structure with additional information required by func() , and
++ *    some space for jbd2 internal information.
++ *
++ * The function will be
++ * called when the transaction that this handle is part of has been
++ * committed to disk with the original callback data struct and the
++ * error status of the journal as parameters.  There is no guarantee of
++ * ordering between handles within a single transaction, nor between
++ * callbacks registered on the same handle.
++ *
++ * The caller is responsible for allocating the journal_callback struct.
++ * This is to allow the caller to add as much extra data to the callback
++ * as needed, but reduce the overhead of multiple allocations.  The caller
++ * allocated struct must start with a struct journal_callback at offset 0,
++ * and has the caller-specific data afterwards.
++ */
++void jbd2_journal_callback_set(handle_t *handle,
++                    void (*func)(struct journal_callback *jcb, int error),
++                    struct journal_callback *jcb)
++{
++      spin_lock(&handle->h_transaction->t_jcb_lock);
++      list_add_tail(&jcb->jcb_list, &handle->h_jcb);
++      spin_unlock(&handle->h_transaction->t_jcb_lock);
++      jcb->jcb_func = func;
++}
++
++/**
+  * int jbd2_journal_stop() - complete a transaction
+  * @handle: tranaction to complete.
+  *
+@@ -1321,6 +1354,11 @@ int jbd2_journal_stop(handle_t *handle)
+                       wake_up(&journal->j_wait_transaction_locked);
+       }
++      /* Move callbacks from the handle to the transaction. */
++      spin_lock(&transaction->t_jcb_lock);
++      list_splice(&handle->h_jcb, &transaction->t_jcb);
++      spin_unlock(&transaction->t_jcb_lock);
++
+       /*
+        * If the handle is marked SYNC, we need to set another commit
+        * going!  We also want to force a commit if the current
diff --git a/lustre/kernel_patches/patches/mpt-fusion-max-sge-rhel6.patch b/lustre/kernel_patches/patches/mpt-fusion-max-sge-rhel6.patch
new file mode 100644 (file)
index 0000000..1fa1d26
--- /dev/null
@@ -0,0 +1,37 @@
+Increase MAX_SGE for fusion mpt driver.
+
+Index: linux-2.6.32.i386/drivers/message/fusion/Kconfig
+===================================================================
+--- linux-2.6.32.i386.orig/drivers/message/fusion/Kconfig      2009-12-03 09:21:21.000000000 +0530
++++ linux-2.6.32.i386/drivers/message/fusion/Kconfig   2010-03-16 16:45:08.000000000 +0530
+@@ -61,9 +61,9 @@
+         LSISAS1078
+ config FUSION_MAX_SGE
+-      int "Maximum number of scatter gather entries (16 - 128)"
+-      default "128"
+-      range 16 128
++      int "Maximum number of scatter gather entries (16 - 256)"
++      default "256"
++      range 16 256
+       help
+         This option allows you to specify the maximum number of scatter-
+         gather entries per I/O. The driver default is 128, which matches
+Index: linux-2.6.32.i386/drivers/message/fusion/mptbase.h
+===================================================================
+--- linux-2.6.32.i386.orig/drivers/message/fusion/mptbase.h    2009-12-03 09:21:21.000000000 +0530
++++ linux-2.6.32.i386/drivers/message/fusion/mptbase.h 2010-03-16 16:46:54.000000000 +0530
+@@ -165,10 +165,10 @@
+  * Set the MAX_SGE value based on user input.
+  */
+ #ifdef CONFIG_FUSION_MAX_SGE
+-#if CONFIG_FUSION_MAX_SGE  < 16
++#if CONFIG_FUSION_MAX_SGE < 16
+ #define MPT_SCSI_SG_DEPTH     16
+-#elif CONFIG_FUSION_MAX_SGE  > 128
+-#define MPT_SCSI_SG_DEPTH     128
++#elif CONFIG_FUSION_MAX_SGE > 256
++#define MPT_SCSI_SG_DEPTH     256
+ #else
+ #define MPT_SCSI_SG_DEPTH     CONFIG_FUSION_MAX_SGE
+ #endif
diff --git a/lustre/kernel_patches/patches/raid5-mmp-unplug-dev-rhel6.patch b/lustre/kernel_patches/patches/raid5-mmp-unplug-dev-rhel6.patch
new file mode 100644 (file)
index 0000000..74bd529
--- /dev/null
@@ -0,0 +1,27 @@
+Force MD devices to pass SYNC reads directly to the disk
+instead of handling from cache.  This is needed for MMP
+on MD RAID devices, and in theory could be accepted in
+the upstream kernel.  Not needed for DMU.
+
+Index: linux-2.6.32-71.18.1.el6-master/drivers/md/raid5.c
+===================================================================
+--- linux-2.6.32-71.18.1.el6-master.orig/drivers/md/raid5.c    2011-02-28 16:57:31.222666050 +0800
++++ linux-2.6.32-71.18.1.el6-master/drivers/md/raid5.c 2011-02-28 16:58:27.011983275 +0800
+@@ -2098,6 +2098,8 @@
+               bi->bi_next = *bip;
+       *bip = bi;
+       bi->bi_phys_segments++;
++      if (bio_rw_flagged(bi, BIO_RW_SYNCIO) && !forwrite)
++              clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
+       spin_unlock_irq(&conf->device_lock);
+       spin_unlock(&sh->lock);
+@@ -4031,6 +4033,8 @@
+               wait_event(mddev->thread->wqueue,
+                          atomic_read(&conf->preread_active_stripes) == 0);
+       }
++      if (bio_rw_flagged(bi, BIO_RW_SYNCIO))
++              raid5_unplug_device(mddev->queue);
+       return 0;
+ }
diff --git a/lustre/kernel_patches/series/2.6-rhel6.series b/lustre/kernel_patches/series/2.6-rhel6.series
new file mode 100644 (file)
index 0000000..5e014dc
--- /dev/null
@@ -0,0 +1,7 @@
+lustre_version.patch
+mpt-fusion-max-sge-rhel6.patch
+raid5-mmp-unplug-dev-rhel6.patch
+dev_read_only-2.6.32-rhel6.patch
+blkdev_tunables-2.6-rhel6.patch
+export-2.6.32-vanilla.patch
+jbd2-jcberr-2.6-rhel6.patch
index b42ed55..fa55e42 100644 (file)
@@ -1151,14 +1151,14 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         return cached;
 }
 
         return cached;
 }
 
-static int ldlm_pools_srv_shrink(int nr, unsigned int gfp_mask)
+static int KERN_SHRINKER(ldlm_pools_srv_shrink)
 {
 {
-        return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, nr, gfp_mask);
+        return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, nr_to_scan, gfp_mask);
 }
 
 }
 
-static int ldlm_pools_cli_shrink(int nr, unsigned int gfp_mask)
+static int KERN_SHRINKER(ldlm_pools_cli_shrink)
 {
 {
-        return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, nr, gfp_mask);
+        return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, nr_to_scan, gfp_mask);
 }
 
 void ldlm_pools_recalc(ldlm_side_t client)
 }
 
 void ldlm_pools_recalc(ldlm_side_t client)
index 69b7e69..b9850ab 100644 (file)
@@ -818,9 +818,18 @@ void ll_lli_init(struct ll_inode_info *lli)
         cfs_sema_init(&lli->lli_readdir_sem, 1);
 }
 
         cfs_sema_init(&lli->lli_readdir_sem, 1);
 }
 
-#ifdef HAVE_NEW_BACKING_DEV_INFO
-static atomic_t ll_bdi_num = ATOMIC_INIT(0);
+static inline int ll_bdi_register(struct backing_dev_info *bdi)
+{
+#ifdef HAVE_BDI_REGISTER
+        static atomic_t ll_bdi_num = ATOMIC_INIT(0);
+
+        bdi->name = "lustre";
+        return bdi_register(bdi, NULL, "lustre-%d",
+                            atomic_inc_return(&ll_bdi_num));
+#else
+        return 0;
 #endif
 #endif
+}
 
 int ll_fill_super(struct super_block *sb)
 {
 
 int ll_fill_super(struct super_block *sb)
 {
@@ -849,16 +858,17 @@ int ll_fill_super(struct super_block *sb)
         if (err)
                 GOTO(out_free, err);
 
         if (err)
                 GOTO(out_free, err);
 
-        err = ll_bdi_init(&lsi->bdi);
+        err = ll_bdi_init(&lsi->lsi_bdi);
+        if (err)
+                GOTO(out_free, err);
+        lsi->lsi_flags |= LSI_BDI_INITIALIZED;
+        lsi->lsi_bdi.capabilities = BDI_CAP_MAP_COPY;
+        err = ll_bdi_register(&lsi->lsi_bdi);
         if (err)
                 GOTO(out_free, err);
 
         if (err)
                 GOTO(out_free, err);
 
-#ifdef HAVE_NEW_BACKING_DEV_INFO
-        lsi->bdi.name = "lustre";
-        lsi->bdi.capabilities = BDI_CAP_MAP_COPY;
-        err = bdi_register(&lsi->bdi, NULL, "lustre-%d",
-                           atomic_inc_return(&ll_bdi_num));
-        sb->s_bdi = &lsi->bdi;
+#ifdef HAVE_SB_BDI
+        sb->s_bdi = &lsi->lsi_bdi;
 #endif
 
         /* Generate a string unique to this super, in case some joker tries
 #endif
 
         /* Generate a string unique to this super, in case some joker tries
@@ -965,8 +975,10 @@ void ll_put_super(struct super_block *sb)
         if (profilenm)
                 class_del_profile(profilenm);
 
         if (profilenm)
                 class_del_profile(profilenm);
 
-        if (ll_bdi_wb_cnt(lsi->bdi) > 0)
-                ll_bdi_destroy(&lsi->bdi);
+        if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
+                ll_bdi_destroy(&lsi->lsi_bdi);
+                lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
+        }
 
         ll_free_sbi(sb);
         lsi->lsi_llsbi = NULL;
 
         ll_free_sbi(sb);
         lsi->lsi_llsbi = NULL;
@@ -1668,7 +1680,7 @@ void ll_read_inode2(struct inode *inode, void *opaque)
         /* OIDEBUG(inode); */
 
         /* initializing backing dev info. */
         /* OIDEBUG(inode); */
 
         /* initializing backing dev info. */
-        inode->i_mapping->backing_dev_info = &(s2lsi(inode->i_sb)->bdi);
+        inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi;
 
 
         if (S_ISREG(inode->i_mode)) {
 
 
         if (S_ISREG(inode->i_mode)) {
index f1ed390..8b60e31 100644 (file)
@@ -121,6 +121,11 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
                ext3_ext_insert_extent(handle, inode, path, newext)
 #endif
 
                ext3_ext_insert_extent(handle, inode, path, newext)
 #endif
 
+#ifdef EXT3_DISCARD_PREALLOCATIONS
+#define ext3_mb_discard_inode_preallocations(inode) \
+                 ext3_discard_preallocations(inode)
+#endif
+
 
 static cfs_mem_cache_t *fcb_cache;
 
 
 static cfs_mem_cache_t *fcb_cache;
 
index 8c61aa9..69aa691 100644 (file)
@@ -1626,24 +1626,24 @@ static void lu_site_stats_get(cfs_hash_t *hs,
 }
 
 #ifdef __KERNEL__
 }
 
 #ifdef __KERNEL__
-static int lu_cache_shrink(int nr, unsigned int gfp_mask)
+static int KERN_SHRINKER(lu_cache_shrink)
 {
         lu_site_stats_t stats;
         struct lu_site *s;
         struct lu_site *tmp;
         int cached = 0;
 {
         lu_site_stats_t stats;
         struct lu_site *s;
         struct lu_site *tmp;
         int cached = 0;
-        int remain = nr;
+        int remain = nr_to_scan;
         CFS_LIST_HEAD(splice);
 
         CFS_LIST_HEAD(splice);
 
-        if (nr != 0) {
+        if (nr_to_scan != 0) {
                 if (!(gfp_mask & __GFP_FS))
                         return -1;
                 if (!(gfp_mask & __GFP_FS))
                         return -1;
-                CDEBUG(D_INODE, "Shrink %d objects\n", nr);
+                CDEBUG(D_INODE, "Shrink %d objects\n", nr_to_scan);
         }
 
         cfs_down(&lu_sites_guard);
         cfs_list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
         }
 
         cfs_down(&lu_sites_guard);
         cfs_list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
-                if (nr != 0) {
+                if (nr_to_scan != 0) {
                         remain = lu_site_purge(&lu_shrink_env, s, remain);
                         /*
                          * Move just shrunk site to the tail of site list to
                         remain = lu_site_purge(&lu_shrink_env, s, remain);
                         /*
                          * Move just shrunk site to the tail of site list to
@@ -1655,14 +1655,14 @@ static int lu_cache_shrink(int nr, unsigned int gfp_mask)
                 memset(&stats, 0, sizeof(stats));
                 lu_site_stats_get(s->ls_obj_hash, &stats, 0);
                 cached += stats.lss_total - stats.lss_busy;
                 memset(&stats, 0, sizeof(stats));
                 lu_site_stats_get(s->ls_obj_hash, &stats, 0);
                 cached += stats.lss_total - stats.lss_busy;
-                if (nr && remain <= 0)
+                if (nr_to_scan && remain <= 0)
                         break;
         }
         cfs_list_splice(&splice, lu_sites.prev);
         cfs_up(&lu_sites_guard);
 
         cached = (cached / 100) * sysctl_vfs_cache_pressure;
                         break;
         }
         cfs_list_splice(&splice, lu_sites.prev);
         cfs_up(&lu_sites_guard);
 
         cached = (cached / 100) * sysctl_vfs_cache_pressure;
-        if (nr == 0)
+        if (nr_to_scan == 0)
                 CDEBUG(D_INODE, "%d objects cached\n", cached);
         return cached;
 }
                 CDEBUG(D_INODE, "%d objects cached\n", cached);
         return cached;
 }
index a9a9906..a42dacb 100644 (file)
@@ -239,7 +239,7 @@ static void enc_pools_release_free_pages(long npages)
  * could be called frequently for query (@nr_to_scan == 0).
  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
  */
  * could be called frequently for query (@nr_to_scan == 0).
  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
  */
-static int enc_pools_shrink(int nr_to_scan, unsigned int gfp_mask)
+static int KERN_SHRINKER(enc_pools_shrink)
 {
         if (unlikely(nr_to_scan != 0)) {
                 cfs_spin_lock(&page_pools.epp_lock);
 {
         if (unlikely(nr_to_scan != 0)) {
                 cfs_spin_lock(&page_pools.epp_lock);