From: Bobi Jam <bobijam@whamcloud.com>
Date: Tue, 21 Jun 2011 01:07:57 +0000 (+0800)
Subject: LU-433 remove jbd2-jcberr patch from kernel
X-Git-Tag: 2.1.51~27
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=42758c33664f682e6ed86065b2224287d1952713

LU-433 remove jbd2-jcberr patch from kernel

In the upstream ext4 code there is a new functionality to allow a
per-commit callback to be set (j_commit_callback), which is used by
the mballoc code to manage free space bitmaps after deleted blocks
have been released. We expand it to contain multiple different
callbacks.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I6397ccabd8d729658cf2ee13c9c3731a9eb31219
Reviewed-on: http://review.whamcloud.com/983
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---

diff --git a/build/autoconf/lustre-build.m4 b/build/autoconf/lustre-build.m4
index 20e3d0d..7feb9dc 100644
--- a/build/autoconf/lustre-build.m4
+++ b/build/autoconf/lustre-build.m4
@@ -320,6 +320,10 @@ AC_SUBST(LDISKFS_SUBDIR)
 AM_CONDITIONAL(LDISKFS_ENABLED, test x$with_ldiskfs != xno)
 AM_CONDITIONAL(LDISKFS_IN_KERNEL, test x$with_ldiskfs = xinkernel)
 
+if test x$with_ldiskfs != xno ; then
+	LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET
+fi
+
 if test x$enable_ext4 = xyes ; then
 	AC_DEFINE(HAVE_EXT4_LDISKFS, 1, [build ext4 based ldiskfs])
 fi
@@ -384,6 +388,28 @@ esac
 AC_SUBST(LIBCFS_SUBDIR)
 AC_SUBST(LIBCFS_INCLUDE_DIR)
 ])
+ 
+#
+# Check for jbd2_journal_callback_set(), which is needed for commit
+# callbacks.  When LU-433 lands jbd2_journal_callback_set() will only
+# remain for legacy reasons and AC_MSG_ERROR can be removed.
+#
+# 2.6.18 with ext3 still uses journal_callback_set() for commit callbacks.
+#
+AC_DEFUN([LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET],
+[
+	LB_CHECK_SYMBOL_EXPORT([jbd2_journal_callback_set],
+	[fs/jbd2/journal.c],
+	[AC_DEFINE(HAVE_JBD2_JOURNAL_CALLBACK_SET, 1,
+		   [kernel exports jbd2_journal_callback_set])],
+	[LB_CHECK_SYMBOL_EXPORT([journal_callback_set],
+		[fs/jbd/journal.c],
+		[AC_DEFINE(HAVE_JOURNAL_CALLBACK_SET, 1,
+			   [kernel exports journal_callback_set])],
+		[if test x$with_ldiskfs != xno ; then
+			AC_MSG_ERROR([ldiskfs needs jbd2-jcberr patch])
+		fi])])
+])
 
 #
 # LB_DEFINE_LDISKFS_OPTIONS
diff --git a/ldiskfs/configure.ac b/ldiskfs/configure.ac
index 4d40c0d..2b3d1c5 100644
--- a/ldiskfs/configure.ac
+++ b/ldiskfs/configure.ac
@@ -145,17 +145,6 @@ AC_SUBST(LDISKFS_SERIES)
 
 AC_SUBST(ac_configure_args)
 
-LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-		       [fs/dcache.c],
-		       [AC_DEFINE(HAVE_D_REHASH_COND, 1,
-				  [d_rehash_cond is exported by the kernel])],
-				  [])
-
-LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-		       [fs/dcache.c],
-		       [AC_DEFINE(HAVE___D_REHASH, 1,
-				  [__d_rehash is exported by the kernel])],
-				  [])
 
 LB_CONFIG_FILES
 AC_CONFIG_FILES([ldiskfs/autoMakefile ldiskfs/Makefile])
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch
new file mode 100644
index 0000000..4c08c9e
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch
@@ -0,0 +1,448 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ 
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++	/* list information for other callbacks attached to the same handle */
++	struct list_head jce_list;
++
++	/*  Function to call with this callback structure */
++	void (*jce_func)(struct super_block *sb,
++			 struct ext4_journal_cb_entry *jce, int error);
++
++	/* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++			void (*func)(struct super_block *sb,
++				     struct ext4_journal_cb_entry *jce,
++				     int rc),
++			struct ext4_journal_cb_entry *jce)
++{
++	struct ext4_sb_info *sbi =
++			EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++	/* Add the jce to transaction's private list */
++	jce->jce_func = func;
++	spin_lock(&sbi->s_md_lock);
++	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++	spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++					     struct ext4_journal_cb_entry *jce)
++{
++	struct ext4_sb_info *sbi =
++			EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++	spin_lock(&sbi->s_md_lock);
++	list_del_init(&jce->jce_list);
++	spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+ 		     struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
+ 
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+ 
+@@ -335,14 +336,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ 					ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ 						ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
+-
+-
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ 
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2942,8 +2941,6 @@ int ext4_mb_init(struct super_block *sb,
+ 
+ 	ext4_mb_history_init(sb);
+ 
+-	if (sbi->s_journal)
+-		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ 	return 0;
+ }
+ 
+@@ -3032,46 +3029,42 @@ int ext4_mb_release(struct super_block *
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++				    struct ext4_journal_cb_entry *jce,
++				    int rc)
+ {
+-	struct super_block *sb = journal->j_private;
++	struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+ 	struct ext4_buddy e4b;
+ 	struct ext4_group_info *db;
+ 	int err, count = 0, count2 = 0;
+-	struct ext4_free_data *entry;
+-	struct list_head *l, *ltmp;
+ 
+-	list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-		entry = list_entry(l, struct ext4_free_data, list);
+-
+-		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-			 entry->count, entry->group, entry);
+-
+-		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-		/* we expect to find existing buddy because it's pinned */
+-		BUG_ON(err != 0);
+-
+-		db = e4b.bd_info;
+-		/* there are blocks to put in buddy to make them really free */
+-		count += entry->count;
+-		count2++;
+-		ext4_lock_group(sb, entry->group);
+-		/* Take it out of per group rb tree */
+-		rb_erase(&entry->node, &(db->bb_free_root));
+-		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-		if (!db->bb_free_root.rb_node) {
+-			/* No more items in the per group rb tree
+-			 * balance refcounts from ext4_mb_free_metadata()
+-			 */
+-			page_cache_release(e4b.bd_buddy_page);
+-			page_cache_release(e4b.bd_bitmap_page);
+-		}
+-		ext4_unlock_group(sb, entry->group);
++	mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++		 entry->efd_count, entry->efd_group, entry);
+ 
+-		kmem_cache_free(ext4_free_ext_cachep, entry);
+-		ext4_mb_unload_buddy(&e4b);
++	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++	/* we expect to find existing buddy because it's pinned */
++	BUG_ON(err != 0);
++
++	db = e4b.bd_info;
++	/* there are blocks to put in buddy to make them really free */
++	count += entry->efd_count;
++	count2++;
++	ext4_lock_group(sb, entry->efd_group);
++	/* Take it out of per group rb tree */
++	rb_erase(&entry->efd_node, &(db->bb_free_root));
++	mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++	if (!db->bb_free_root.rb_node) {
++		/* No more items in the per group rb tree
++		 * balance refcounts from ext4_mb_free_metadata()
++		 */
++		page_cache_release(e4b.bd_buddy_page);
++		page_cache_release(e4b.bd_bitmap_page);
+ 	}
++	ext4_unlock_group(sb, entry->efd_group);
++
++	kmem_cache_free(ext4_free_data_cachep, entry);
++	ext4_mb_unload_buddy(&e4b);
+ 
+ 	mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -3123,22 +3116,24 @@ int __init init_ext4_mballoc(void)
+ 		kmem_cache_create("ext4_alloc_context",
+ 				     sizeof(struct ext4_allocation_context),
+ 				     0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-	if (ext4_ac_cachep == NULL) {
+-		kmem_cache_destroy(ext4_pspace_cachep);
+-		return -ENOMEM;
+-	}
++	if (ext4_ac_cachep == NULL)
++		goto out_err;
+ 
+-	ext4_free_ext_cachep =
+-		kmem_cache_create("ext4_free_block_extents",
++	ext4_free_data_cachep =
++		kmem_cache_create("ext4_free_data",
+ 				     sizeof(struct ext4_free_data),
+ 				     0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-	if (ext4_free_ext_cachep == NULL) {
+-		kmem_cache_destroy(ext4_pspace_cachep);
+-		kmem_cache_destroy(ext4_ac_cachep);
+-		return -ENOMEM;
+-	}
++	if (ext4_free_data_cachep == NULL)
++		goto out1_err;
++
+ 	ext4_create_debugfs_entry();
+ 	return 0;
++
++out1_err:
++	kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++	kmem_cache_destroy(ext4_pspace_cachep);
++	return -ENOMEM;
+ }
+ 
+ void exit_ext4_mballoc(void)
+@@ -3150,7 +3145,7 @@ void exit_ext4_mballoc(void)
+ 	rcu_barrier();
+ 	kmem_cache_destroy(ext4_pspace_cachep);
+ 	kmem_cache_destroy(ext4_ac_cachep);
+-	kmem_cache_destroy(ext4_free_ext_cachep);
++	kmem_cache_destroy(ext4_free_data_cachep);
+ 	ext4_remove_debugfs_entry();
+ }
+ 
+@@ -3688,8 +3683,8 @@ static void ext4_mb_generate_from_freeli
+ 	n = rb_first(&(grp->bb_free_root));
+ 
+ 	while (n) {
+-		entry = rb_entry(n, struct ext4_free_data, node);
+-		mb_set_bits(bitmap, entry->start_blk, entry->count);
++		entry = rb_entry(n, struct ext4_free_data, efd_node);
++		mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+ 		n = rb_next(n);
+ 	}
+ 	return;
+@@ -4974,11 +4969,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-			struct ext4_free_data *entry2)
++		     struct ext4_free_data *entry2)
+ {
+-	if ((entry1->t_tid == entry2->t_tid) &&
+-	    (entry1->group == entry2->group) &&
+-	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
++	if ((entry1->efd_tid == entry2->efd_tid) &&
++	    (entry1->efd_group == entry2->efd_group) &&
++	    ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+ 		return 1;
+ 	return 0;
+ }
+@@ -4991,7 +4986,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	struct ext4_free_data *entry;
+ 	struct ext4_group_info *db = e4b->bd_info;
+ 	struct super_block *sb = e4b->bd_sb;
+-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ 	struct rb_node *parent = NULL, *new_node;
+ 
+@@ -4999,8 +4993,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	BUG_ON(e4b->bd_bitmap_page == NULL);
+ 	BUG_ON(e4b->bd_buddy_page == NULL);
+ 
+-	new_node = &new_entry->node;
+-	block = new_entry->start_blk;
++	new_node = &new_entry->efd_node;
++	block = new_entry->efd_start_blk;
+ 
+ 	if (!*n) {
+ 		/* first free block exent. We need to
+@@ -5013,15 +5007,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	}
+ 	while (*n) {
+ 		parent = *n;
+-		entry = rb_entry(parent, struct ext4_free_data, node);
+-		if (block < entry->start_blk)
++		entry = rb_entry(parent, struct ext4_free_data, efd_node);
++		if (block < entry->efd_start_blk)
+ 			n = &(*n)->rb_left;
+-		else if (block >= (entry->start_blk + entry->count))
++		else if (block >= (entry->efd_start_blk + entry->efd_count))
+ 			n = &(*n)->rb_right;
+ 		else {
+ 			ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+ 					"Double free of blocks %d (%d %d)",
+-					block, entry->start_blk, entry->count);
++					block, entry->efd_start_blk, entry->efd_count);
+ 			return 0;
+ 		}
+ 	}
+@@ -5032,34 +5026,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	/* Now try to see the extent can be merged to left and right */
+ 	node = rb_prev(new_node);
+ 	if (node) {
+-		entry = rb_entry(node, struct ext4_free_data, node);
++		entry = rb_entry(node, struct ext4_free_data, efd_node);
+ 		if (can_merge(entry, new_entry)) {
+-			new_entry->start_blk = entry->start_blk;
+-			new_entry->count += entry->count;
++			new_entry->efd_start_blk = entry->efd_start_blk;
++			new_entry->efd_count += entry->efd_count;
+ 			rb_erase(node, &(db->bb_free_root));
+-			spin_lock(&sbi->s_md_lock);
+-			list_del(&entry->list);
+-			spin_unlock(&sbi->s_md_lock);
+-			kmem_cache_free(ext4_free_ext_cachep, entry);
++			ext4_journal_callback_del(handle, &entry->efd_jce);
++			kmem_cache_free(ext4_free_data_cachep, entry);
+ 		}
+ 	}
+ 
+ 	node = rb_next(new_node);
+ 	if (node) {
+-		entry = rb_entry(node, struct ext4_free_data, node);
++		entry = rb_entry(node, struct ext4_free_data, efd_node);
+ 		if (can_merge(new_entry, entry)) {
+-			new_entry->count += entry->count;
++			new_entry->efd_count += entry->efd_count;
+ 			rb_erase(node, &(db->bb_free_root));
+-			spin_lock(&sbi->s_md_lock);
+-			list_del(&entry->list);
+-			spin_unlock(&sbi->s_md_lock);
+-			kmem_cache_free(ext4_free_ext_cachep, entry);
++			ext4_journal_callback_del(handle, &entry->efd_jce);
++			kmem_cache_free(ext4_free_data_cachep, entry);
+ 		}
+ 	}
+ 	/* Add the extent to transaction's private list */
+-	spin_lock(&sbi->s_md_lock);
+-	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-	spin_unlock(&sbi->s_md_lock);
++	ext4_journal_callback_add(handle, ext4_free_data_callback,
++				  &new_entry->efd_jce);
+ 	return 0;
+ }
+ 
+@@ -5191,11 +5180,11 @@ do_more:
+ 		 * blocks being freed are metadata. these blocks shouldn't
+ 		 * be used until this transaction is committed
+ 		 */
+-		new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-		new_entry->start_blk = bit;
+-		new_entry->group  = block_group;
+-		new_entry->count = count;
+-		new_entry->t_tid = handle->h_transaction->t_tid;
++		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++		new_entry->efd_start_blk = bit;
++		new_entry->efd_group  = block_group;
++		new_entry->efd_count = count;
++		new_entry->efd_tid = handle->h_transaction->t_tid;
+ 
+ 		ext4_lock_group(sb, block_group);
+ 		mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -107,23 +107,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC	512
+ 
+-
+ struct ext4_free_data {
+-	/* this links the free block information from group_info */
+-	struct rb_node node;
++	/* MUST be the first member */
++	struct ext4_journal_cb_entry	efd_jce;
+ 
+-	/* this links the free block information from ext4_sb_info */
+-	struct list_head list;
++	/* ext4_free_data private data starts from here */
++
++	/* this links the free block information from group_info */
++	struct rb_node		efd_node;
+ 
+ 	/* group which free block extent belongs */
+-	ext4_group_t group;
++	ext4_group_t		efd_group;
+ 
+ 	/* free block extent */
+-	ext4_grpblk_t start_blk;
+-	ext4_grpblk_t count;
++	ext4_grpblk_t		efd_start_blk;
++	ext4_grpblk_t		efd_count;
+ 
+ 	/* transaction which freed this extent */
+-	tid_t	t_tid;
++	tid_t			efd_tid;
+ };
+ 
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -304,6 +304,23 @@ void ext4_journal_abort_handle(const cha
+ 
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
+ 
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++	struct super_block		*sb = journal->j_private;
++	struct ext4_sb_info		*sbi = EXT4_SB(sb);
++	int				error = is_journal_aborted(journal);
++	struct ext4_journal_cb_entry	*jce, *tmp;
++
++	spin_lock(&sbi->s_md_lock);
++	list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++		list_del_init(&jce->jce_list);
++		spin_unlock(&sbi->s_md_lock);
++		jce->jce_func(sb, jce, error);
++		spin_lock(&sbi->s_md_lock);
++	}
++	spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -2997,6 +3014,8 @@ static int ext4_fill_super(struct super_
+ 	}
+ 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ 
++	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+ 
+ 	if (test_opt(sb, NOBH)) {
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch
new file mode 100644
index 0000000..b177e2e
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch
@@ -0,0 +1,464 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+ 
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++	/* list information for other callbacks attached to the same handle */
++	struct list_head jce_list;
++
++	/*  Function to call with this callback structure */
++	void (*jce_func)(struct super_block *sb,
++			 struct ext4_journal_cb_entry *jce, int error);
++
++	/* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++			void (*func)(struct super_block *sb,
++				     struct ext4_journal_cb_entry *jce,
++				     int rc),
++			struct ext4_journal_cb_entry *jce)
++{
++	struct ext4_sb_info *sbi =
++			EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++	/* Add the jce to transaction's private list */
++	jce->jce_func = func;
++	spin_lock(&sbi->s_md_lock);
++	list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++	spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++					     struct ext4_journal_cb_entry *jce)
++{
++	struct ext4_sb_info *sbi =
++			EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++	spin_lock(&sbi->s_md_lock);
++	list_del_init(&jce->jce_list);
++	spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+ 		     struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC	512
+ 
+-
+ struct ext4_free_data {
+-	/* this links the free block information from group_info */
+-	struct rb_node node;
++	/* MUST be the first member */
++	struct ext4_journal_cb_entry	efd_jce;
+ 
+-	/* this links the free block information from ext4_sb_info */
+-	struct list_head list;
++	/* ext4_free_data private data starts from here */
++
++	/* this links the free block information from group_info */
++	struct rb_node		efd_node;
+ 
+ 	/* group which free block extent belongs */
+-	ext4_group_t group;
++	ext4_group_t		efd_group;
+ 
+ 	/* free block extent */
+-	ext4_grpblk_t start_blk;
+-	ext4_grpblk_t count;
++	ext4_grpblk_t		efd_start_blk;
++	ext4_grpblk_t		efd_count;
+ 
+ 	/* transaction which freed this extent */
+-	tid_t	t_tid;
++	tid_t			efd_tid;
+ };
+ 
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
+ 
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+ #include <trace/events/ext4.h>
+@@ -336,12 +337,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ 					ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ 						ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ 
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2592,8 +2593,6 @@ int ext4_mb_init(struct super_block *sb,
+ 		}
+ 	}
+ 
+-	if (sbi->s_journal)
+-		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ 	return 0;
+ }
+ 
+@@ -2693,56 +2692,52 @@ static inline int ext4_issue_discard(str
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++				    struct ext4_journal_cb_entry *jce,
++				    int rc)
+ {
+-	struct super_block *sb = journal->j_private;
++	struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+ 	struct ext4_buddy e4b;
+ 	struct ext4_group_info *db;
+ 	int err, count = 0, count2 = 0;
+-	struct ext4_free_data *entry;
+-	struct list_head *l, *ltmp;
+-
+-	list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-		entry = list_entry(l, struct ext4_free_data, list);
+ 
+-		mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-			 entry->count, entry->group, entry);
++	mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++		 entry->efd_count, entry->efd_group, entry);
+ 
+-		if (test_opt(sb, DISCARD)) {
+-			int ret;
+-			ret = ext4_issue_discard(sb, entry->group,
+-					entry->start_blk, entry->count);
+-			if (unlikely(ret == -EOPNOTSUPP)) {
+-				ext4_warning(sb, "discard not supported, "
+-						 "disabling");
+-				clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+-			}
++	if (test_opt(sb, DISCARD)) {
++		int ret;
++		ret = ext4_issue_discard(sb, entry->efd_group,
++				entry->efd_start_blk, entry->efd_count);
++		if (unlikely(ret == -EOPNOTSUPP)) {
++			ext4_warning(sb, "discard not supported, "
++					 "disabling");
++			clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+ 		}
++	}
+ 
+-		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-		/* we expect to find existing buddy because it's pinned */
+-		BUG_ON(err != 0);
+-
+-		db = e4b.bd_info;
+-		/* there are blocks to put in buddy to make them really free */
+-		count += entry->count;
+-		count2++;
+-		ext4_lock_group(sb, entry->group);
+-		/* Take it out of per group rb tree */
+-		rb_erase(&entry->node, &(db->bb_free_root));
+-		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-		if (!db->bb_free_root.rb_node) {
+-			/* No more items in the per group rb tree
+-			 * balance refcounts from ext4_mb_free_metadata()
+-			 */
+-			page_cache_release(e4b.bd_buddy_page);
+-			page_cache_release(e4b.bd_bitmap_page);
+-		}
+-		ext4_unlock_group(sb, entry->group);
+-		kmem_cache_free(ext4_free_ext_cachep, entry);
+-		ext4_mb_release_desc(&e4b);
++	err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++	/* we expect to find existing buddy because it's pinned */
++	BUG_ON(err != 0);
++
++	db = e4b.bd_info;
++	/* there are blocks to put in buddy to make them really free */
++	count += entry->efd_count;
++	count2++;
++	ext4_lock_group(sb, entry->efd_group);
++	/* Take it out of per group rb tree */
++	rb_erase(&entry->efd_node, &(db->bb_free_root));
++	mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++	if (!db->bb_free_root.rb_node) {
++		/* No more items in the per group rb tree
++		 * balance refcounts from ext4_mb_free_metadata()
++		 */
++		page_cache_release(e4b.bd_buddy_page);
++		page_cache_release(e4b.bd_bitmap_page);
+ 	}
++	ext4_unlock_group(sb, entry->efd_group);
++	kmem_cache_free(ext4_free_data_cachep, entry);
++	ext4_mb_release_desc(&e4b);
+ 
+ 	mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -2794,22 +2789,22 @@ int __init init_ext4_mballoc(void)
+ 		kmem_cache_create("ext4_alloc_context",
+ 				     sizeof(struct ext4_allocation_context),
+ 				     0, SLAB_RECLAIM_ACCOUNT, NULL);
+-	if (ext4_ac_cachep == NULL) {
+-		kmem_cache_destroy(ext4_pspace_cachep);
+-		return -ENOMEM;
+-	}
++	if (ext4_ac_cachep == NULL)
++		goto out_err;
++
++	ext4_free_data_cachep =
++		KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
++	if (ext4_free_data_cachep == NULL)
++		goto out1_err;
+ 
+-	ext4_free_ext_cachep =
+-		kmem_cache_create("ext4_free_block_extents",
+-				     sizeof(struct ext4_free_data),
+-				     0, SLAB_RECLAIM_ACCOUNT, NULL);
+-	if (ext4_free_ext_cachep == NULL) {
+-		kmem_cache_destroy(ext4_pspace_cachep);
+-		kmem_cache_destroy(ext4_ac_cachep);
+-		return -ENOMEM;
+-	}
+ 	ext4_create_debugfs_entry();
+ 	return 0;
++
++out1_err:
++	kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++	kmem_cache_destroy(ext4_pspace_cachep);
++	return -ENOMEM;
+ }
+ 
+ void exit_ext4_mballoc(void)
+@@ -2821,7 +2816,7 @@ void exit_ext4_mballoc(void)
+ 	rcu_barrier();
+ 	kmem_cache_destroy(ext4_pspace_cachep);
+ 	kmem_cache_destroy(ext4_ac_cachep);
+-	kmem_cache_destroy(ext4_free_ext_cachep);
++	kmem_cache_destroy(ext4_free_data_cachep);
+ 	ext4_remove_debugfs_entry();
+ }
+ 
+@@ -3362,8 +3357,8 @@ static void ext4_mb_generate_from_freeli
+ 	n = rb_first(&(grp->bb_free_root));
+ 
+ 	while (n) {
+-		entry = rb_entry(n, struct ext4_free_data, node);
+-		mb_set_bits(bitmap, entry->start_blk, entry->count);
++		entry = rb_entry(n, struct ext4_free_data, efd_node);
++		mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+ 		n = rb_next(n);
+ 	}
+ 	return;
+@@ -4623,11 +4618,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-			struct ext4_free_data *entry2)
++		     struct ext4_free_data *entry2)
+ {
+-	if ((entry1->t_tid == entry2->t_tid) &&
+-	    (entry1->group == entry2->group) &&
+-	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
++	if ((entry1->efd_tid == entry2->efd_tid) &&
++	    (entry1->efd_group == entry2->efd_group) &&
++	    ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+ 		return 1;
+ 	return 0;
+ }
+@@ -4640,7 +4635,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	struct ext4_free_data *entry;
+ 	struct ext4_group_info *db = e4b->bd_info;
+ 	struct super_block *sb = e4b->bd_sb;
+-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ 	struct rb_node *parent = NULL, *new_node;
+ 
+@@ -4648,8 +4642,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	BUG_ON(e4b->bd_bitmap_page == NULL);
+ 	BUG_ON(e4b->bd_buddy_page == NULL);
+ 
+-	new_node = &new_entry->node;
+-	block = new_entry->start_blk;
++	new_node = &new_entry->efd_node;
++	block = new_entry->efd_start_blk;
+ 
+ 	if (!*n) {
+ 		/* first free block exent. We need to
+@@ -4662,15 +4656,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	}
+ 	while (*n) {
+ 		parent = *n;
+-		entry = rb_entry(parent, struct ext4_free_data, node);
+-		if (block < entry->start_blk)
++		entry = rb_entry(parent, struct ext4_free_data, efd_node);
++		if (block < entry->efd_start_blk)
+ 			n = &(*n)->rb_left;
+-		else if (block >= (entry->start_blk + entry->count))
++		else if (block >= (entry->efd_start_blk + entry->efd_count))
+ 			n = &(*n)->rb_right;
+ 		else {
+ 			ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+ 					"Double free of blocks %d (%d %d)",
+-					block, entry->start_blk, entry->count);
++					block, entry->efd_start_blk, entry->efd_count);
+ 			return 0;
+ 		}
+ 	}
+@@ -4681,34 +4675,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+ 	/* Now try to see the extent can be merged to left and right */
+ 	node = rb_prev(new_node);
+ 	if (node) {
+-		entry = rb_entry(node, struct ext4_free_data, node);
++		entry = rb_entry(node, struct ext4_free_data, efd_node);
+ 		if (can_merge(entry, new_entry)) {
+-			new_entry->start_blk = entry->start_blk;
+-			new_entry->count += entry->count;
++			new_entry->efd_start_blk = entry->efd_start_blk;
++			new_entry->efd_count += entry->efd_count;
+ 			rb_erase(node, &(db->bb_free_root));
+-			spin_lock(&sbi->s_md_lock);
+-			list_del(&entry->list);
+-			spin_unlock(&sbi->s_md_lock);
+-			kmem_cache_free(ext4_free_ext_cachep, entry);
++			ext4_journal_callback_del(handle, &entry->efd_jce);
++			kmem_cache_free(ext4_free_data_cachep, entry);
+ 		}
+ 	}
+ 
+ 	node = rb_next(new_node);
+ 	if (node) {
+-		entry = rb_entry(node, struct ext4_free_data, node);
++		entry = rb_entry(node, struct ext4_free_data, efd_node);
+ 		if (can_merge(new_entry, entry)) {
+-			new_entry->count += entry->count;
++			new_entry->efd_count += entry->efd_count;
+ 			rb_erase(node, &(db->bb_free_root));
+-			spin_lock(&sbi->s_md_lock);
+-			list_del(&entry->list);
+-			spin_unlock(&sbi->s_md_lock);
+-			kmem_cache_free(ext4_free_ext_cachep, entry);
++			ext4_journal_callback_del(handle, &entry->efd_jce);
++			kmem_cache_free(ext4_free_data_cachep, entry);
+ 		}
+ 	}
+ 	/* Add the extent to transaction's private list */
+-	spin_lock(&sbi->s_md_lock);
+-	list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-	spin_unlock(&sbi->s_md_lock);
++	ext4_journal_callback_add(handle, ext4_free_data_callback,
++				  &new_entry->efd_jce);
+ 	return 0;
+ }
+ 
+@@ -4836,11 +4825,11 @@ do_more:
+ 		 * blocks being freed are metadata. these blocks shouldn't
+ 		 * be used until this transaction is committed
+ 		 */
+-		new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-		new_entry->start_blk = bit;
+-		new_entry->group  = block_group;
+-		new_entry->count = count;
+-		new_entry->t_tid = handle->h_transaction->t_tid;
++		new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++		new_entry->efd_start_blk = bit;
++		new_entry->efd_group  = block_group;
++		new_entry->efd_count = count;
++		new_entry->efd_tid = handle->h_transaction->t_tid;
+ 
+ 		ext4_lock_group(sb, block_group);
+ 		mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -301,6 +301,23 @@ void ext4_journal_abort_handle(const cha
+ 
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
+ 
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++	struct super_block		*sb = journal->j_private;
++	struct ext4_sb_info		*sbi = EXT4_SB(sb);
++	int				error = is_journal_aborted(journal);
++	struct ext4_journal_cb_entry	*jce, *tmp;
++
++	spin_lock(&sbi->s_md_lock);
++	list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++		list_del_init(&jce->jce_list);
++		spin_unlock(&sbi->s_md_lock);
++		jce->jce_func(sb, jce, error);
++		spin_lock(&sbi->s_md_lock);
++	}
++	spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -3040,6 +3057,8 @@ static int ext4_fill_super(struct super_
+ 	}
+ 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ 
++	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+ 
+ 	if (test_opt(sb, NOBH)) {
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
index 1e5417f..ebcd4d8 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
@@ -34,3 +34,4 @@ ext4-failed-mount-b23368.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
+ext4-journal-callback-rhel5.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
index c64eee3..38d2111 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
@@ -30,3 +30,4 @@ ext4-back-dquot-to-rhel6.patch
 ext4-nocmtime-2.6-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
+ext4-journal-callback.patch
diff --git a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
index 3d7b761..5e11d38 100644
--- a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
+++ b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
 ===================================================================
 --- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h	2009-04-15 08:35:28.000000000 +0530
diff --git a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
index f219771..e15d750 100644
--- a/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
+++ b/lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 This allows the jbd transaction commit callbacks to be registered.
 The ext4 jbd2 code has a different commit callback (one per transaction)
 that could be used to provide equivalent functionality.  This would
diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c
index 271f194..84f647b 100644
--- a/lustre/lvfs/fsfilt_ext3.c
+++ b/lustre/lvfs/fsfilt_ext3.c
@@ -33,6 +33,9 @@
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -83,9 +86,11 @@
 #endif
 
 #if defined(HAVE_EXT3_XATTR_H)
-#include <ext3/xattr.h>
-#else
+# include <ext3/xattr.h>
+#elif !defined(EXT3_XATTR_INDEX_TRUSTED)
 /* ext3 xattr.h not available in rh style kernel-devel rpm */
+/* CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h */
+# define EXT3_XATTR_INDEX_TRUSTED        4
 extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 #endif
@@ -133,34 +138,41 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
                  ext3_discard_preallocations(inode)
 #endif
 
-
-static cfs_mem_cache_t *fcb_cache;
-
-struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
-        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
-        struct obd_device *cb_obd;      /* MDS/OBD completion device */
-        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
-        void *cb_data;                  /* MDS/OST completion function data */
-};
-
-#ifndef EXT3_XATTR_INDEX_TRUSTED        /* temporary until we hit l28 kernel */
-#define EXT3_XATTR_INDEX_TRUSTED        4
-#endif
-
 #ifdef HAVE_EXT4_LDISKFS
 #define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
 #else
 #define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
 #define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
 #define ext3_ext_store_pblock(ex, pblock)  ((ex)->ee_start = cpu_to_le32(pblock))
 #define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
 #endif
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+# define journal_callback ext4_journal_cb_entry
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         ext4_journal_callback_add(handle, func, jcb)
+#elif defined(HAVE_JBD2_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         jbd2_journal_callback_set(handle, func, jcb)
+#elif defined(HAVE_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         journal_callback_set(handle, func, jcb)
+#else
+# error missing journal commit callback
+#endif /* HAVE_EXT4_JOURNAL_CALLBACK_ADD */
+
+static cfs_mem_cache_t *fcb_cache;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
+        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
+};
+
 #ifndef ext3_find_next_bit
 #define ext3_find_next_bit           ext2_find_next_bit
 #endif
@@ -777,9 +789,14 @@ static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
         return rc;
 }
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+static void fsfilt_ext3_cb_func(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
+#endif
 {
-        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+        struct fsfilt_cb_data *fcb = container_of(jcb, typeof(*fcb), cb_jcb);
 
         fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
@@ -802,8 +819,7 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
-        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func,
-                                    (struct journal_callback *)fcb);
+        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func, &fcb->cb_jcb);
 
         return 0;
 }
diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c
index 9a052be..29764c9 100644
--- a/lustre/osd-ldiskfs/osd_handler.c
+++ b/lustre/osd-ldiskfs/osd_handler.c
@@ -33,6 +33,9 @@
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -617,7 +620,12 @@ static int osd_param_is_sane(const struct osd_device *dev,
 /*
  * Concurrency: shouldn't matter.
  */
+#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+static void osd_trans_commit_cb(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
+#endif
 {
         struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
         struct thandle     *th  = &oh->ot_super;
@@ -694,12 +702,12 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                                 /* add commit callback */
                                 lu_context_init(&th->th_ctx, LCT_TX_HANDLE);
                                 lu_context_enter(&th->th_ctx);
-                                osd_journal_callback_set(jh, osd_trans_commit_cb,
-                                                         (struct journal_callback *)&oh->ot_jcb);
-                                        LASSERT(oti->oti_txns == 0);
-                                        LASSERT(oti->oti_r_locks == 0);
-                                        LASSERT(oti->oti_w_locks == 0);
-                                        oti->oti_txns++;
+                                osd_journal_callback_set(jh,osd_trans_commit_cb,
+                                                         &oh->ot_jcb);
+                                LASSERT(oti->oti_txns == 0);
+                                LASSERT(oti->oti_r_locks == 0);
+                                LASSERT(oti->oti_w_locks == 0);
+                                oti->oti_txns++;
                         } else {
                                 OBD_FREE_PTR(oh);
                                 th = (void *)jh;
diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h
index ea559e3..343287c 100644
--- a/lustre/osd-ldiskfs/osd_internal.h
+++ b/lustre/osd-ldiskfs/osd_internal.h
@@ -30,6 +30,9 @@
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -55,7 +58,12 @@
 #ifdef HAVE_EXT4_LDISKFS
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
-#define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+#  define journal_callback ldiskfs_journal_cb_entry
+#  define osd_journal_callback_set(handle, func, jcb) ldiskfs_journal_callback_add(handle, func, jcb)
+# else
+#  define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# endif
 #else
 #include <linux/jbd.h>
 #include <linux/ldiskfs_fs.h>