Whamcloud - gitweb
LU-433 remove jbd2-jcberr patch from kernel
authorBobi Jam <bobijam@whamcloud.com>
Tue, 21 Jun 2011 01:07:57 +0000 (09:07 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 6 Oct 2011 04:08:59 +0000 (00:08 -0400)
In the upstream ext4 code there is a new functionality to allow a
per-commit callback to be set (j_commit_callback), which is used by
the mballoc code to manage free space bitmaps after deleted blocks
have been released. We expand it to contain multiple different
callbacks.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I6397ccabd8d729658cf2ee13c9c3731a9eb31219
Reviewed-on: http://review.whamcloud.com/983
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
build/autoconf/lustre-build.m4
ldiskfs/configure.ac
ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-journal-callback.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
lustre/lvfs/fsfilt_ext3.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h

index 20e3d0d..7feb9dc 100644 (file)
@@ -320,6 +320,10 @@ AC_SUBST(LDISKFS_SUBDIR)
 AM_CONDITIONAL(LDISKFS_ENABLED, test x$with_ldiskfs != xno)
 AM_CONDITIONAL(LDISKFS_IN_KERNEL, test x$with_ldiskfs = xinkernel)
 
+if test x$with_ldiskfs != xno ; then
+       LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET
+fi
+
 if test x$enable_ext4 = xyes ; then
        AC_DEFINE(HAVE_EXT4_LDISKFS, 1, [build ext4 based ldiskfs])
 fi
@@ -384,6 +388,28 @@ esac
 AC_SUBST(LIBCFS_SUBDIR)
 AC_SUBST(LIBCFS_INCLUDE_DIR)
 ])
+#
+# Check for jbd2_journal_callback_set(), which is needed for commit
+# callbacks.  When LU-433 lands jbd2_journal_callback_set() will only
+# remain for legacy reasons and AC_MSG_ERROR can be removed.
+#
+# 2.6.18 with ext3 still uses journal_callback_set() for commit callbacks.
+#
+AC_DEFUN([LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET],
+[
+       LB_CHECK_SYMBOL_EXPORT([jbd2_journal_callback_set],
+       [fs/jbd2/journal.c],
+       [AC_DEFINE(HAVE_JBD2_JOURNAL_CALLBACK_SET, 1,
+                  [kernel exports jbd2_journal_callback_set])],
+       [LB_CHECK_SYMBOL_EXPORT([journal_callback_set],
+               [fs/jbd/journal.c],
+               [AC_DEFINE(HAVE_JOURNAL_CALLBACK_SET, 1,
+                          [kernel exports journal_callback_set])],
+               [if test x$with_ldiskfs != xno ; then
+                       AC_MSG_ERROR([ldiskfs needs jbd2-jcberr patch])
+               fi])])
+])
 
 #
 # LB_DEFINE_LDISKFS_OPTIONS
index 4d40c0d..2b3d1c5 100644 (file)
@@ -145,17 +145,6 @@ AC_SUBST(LDISKFS_SERIES)
 
 AC_SUBST(ac_configure_args)
 
-LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-                      [fs/dcache.c],
-                      [AC_DEFINE(HAVE_D_REHASH_COND, 1,
-                                 [d_rehash_cond is exported by the kernel])],
-                                 [])
-
-LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-                      [fs/dcache.c],
-                      [AC_DEFINE(HAVE___D_REHASH, 1,
-                                 [__d_rehash is exported by the kernel])],
-                                 [])
 
 LB_CONFIG_FILES
 AC_CONFIG_FILES([ldiskfs/autoMakefile ldiskfs/Makefile])
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch
new file mode 100644 (file)
index 0000000..4c08c9e
--- /dev/null
@@ -0,0 +1,448 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++      /* list information for other callbacks attached to the same handle */
++      struct list_head jce_list;
++
++      /*  Function to call with this callback structure */
++      void (*jce_func)(struct super_block *sb,
++                       struct ext4_journal_cb_entry *jce, int error);
++
++      /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++                      void (*func)(struct super_block *sb,
++                                   struct ext4_journal_cb_entry *jce,
++                                   int rc),
++                      struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      /* Add the jce to transaction's private list */
++      jce->jce_func = func;
++      spin_lock(&sbi->s_md_lock);
++      list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++                                           struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      spin_lock(&sbi->s_md_lock);
++      list_del_init(&jce->jce_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+                    struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+@@ -335,14 +336,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+                                               ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
+-
+-
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2942,8 +2941,6 @@ int ext4_mb_init(struct super_block *sb,
+       ext4_mb_history_init(sb);
+-      if (sbi->s_journal)
+-              sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+       return 0;
+ }
+@@ -3032,46 +3029,42 @@ int ext4_mb_release(struct super_block *
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++                                  struct ext4_journal_cb_entry *jce,
++                                  int rc)
+ {
+-      struct super_block *sb = journal->j_private;
++      struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+       struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+-      struct ext4_free_data *entry;
+-      struct list_head *l, *ltmp;
+-      list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-              entry = list_entry(l, struct ext4_free_data, list);
+-
+-              mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-                       entry->count, entry->group, entry);
+-
+-              err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-              /* we expect to find existing buddy because it's pinned */
+-              BUG_ON(err != 0);
+-
+-              db = e4b.bd_info;
+-              /* there are blocks to put in buddy to make them really free */
+-              count += entry->count;
+-              count2++;
+-              ext4_lock_group(sb, entry->group);
+-              /* Take it out of per group rb tree */
+-              rb_erase(&entry->node, &(db->bb_free_root));
+-              mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-              if (!db->bb_free_root.rb_node) {
+-                      /* No more items in the per group rb tree
+-                       * balance refcounts from ext4_mb_free_metadata()
+-                       */
+-                      page_cache_release(e4b.bd_buddy_page);
+-                      page_cache_release(e4b.bd_bitmap_page);
+-              }
+-              ext4_unlock_group(sb, entry->group);
++      mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++               entry->efd_count, entry->efd_group, entry);
+-              kmem_cache_free(ext4_free_ext_cachep, entry);
+-              ext4_mb_unload_buddy(&e4b);
++      err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++      /* we expect to find existing buddy because it's pinned */
++      BUG_ON(err != 0);
++
++      db = e4b.bd_info;
++      /* there are blocks to put in buddy to make them really free */
++      count += entry->efd_count;
++      count2++;
++      ext4_lock_group(sb, entry->efd_group);
++      /* Take it out of per group rb tree */
++      rb_erase(&entry->efd_node, &(db->bb_free_root));
++      mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++      if (!db->bb_free_root.rb_node) {
++              /* No more items in the per group rb tree
++               * balance refcounts from ext4_mb_free_metadata()
++               */
++              page_cache_release(e4b.bd_buddy_page);
++              page_cache_release(e4b.bd_bitmap_page);
+       }
++      ext4_unlock_group(sb, entry->efd_group);
++
++      kmem_cache_free(ext4_free_data_cachep, entry);
++      ext4_mb_unload_buddy(&e4b);
+       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -3123,22 +3116,24 @@ int __init init_ext4_mballoc(void)
+               kmem_cache_create("ext4_alloc_context",
+                                    sizeof(struct ext4_allocation_context),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-      if (ext4_ac_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_ac_cachep == NULL)
++              goto out_err;
+-      ext4_free_ext_cachep =
+-              kmem_cache_create("ext4_free_block_extents",
++      ext4_free_data_cachep =
++              kmem_cache_create("ext4_free_data",
+                                    sizeof(struct ext4_free_data),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-      if (ext4_free_ext_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              kmem_cache_destroy(ext4_ac_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_free_data_cachep == NULL)
++              goto out1_err;
++
+       ext4_create_debugfs_entry();
+       return 0;
++
++out1_err:
++      kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++      kmem_cache_destroy(ext4_pspace_cachep);
++      return -ENOMEM;
+ }
+ void exit_ext4_mballoc(void)
+@@ -3150,7 +3145,7 @@ void exit_ext4_mballoc(void)
+       rcu_barrier();
+       kmem_cache_destroy(ext4_pspace_cachep);
+       kmem_cache_destroy(ext4_ac_cachep);
+-      kmem_cache_destroy(ext4_free_ext_cachep);
++      kmem_cache_destroy(ext4_free_data_cachep);
+       ext4_remove_debugfs_entry();
+ }
+@@ -3688,8 +3683,8 @@ static void ext4_mb_generate_from_freeli
+       n = rb_first(&(grp->bb_free_root));
+       while (n) {
+-              entry = rb_entry(n, struct ext4_free_data, node);
+-              mb_set_bits(bitmap, entry->start_blk, entry->count);
++              entry = rb_entry(n, struct ext4_free_data, efd_node);
++              mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+               n = rb_next(n);
+       }
+       return;
+@@ -4974,11 +4969,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-                      struct ext4_free_data *entry2)
++                   struct ext4_free_data *entry2)
+ {
+-      if ((entry1->t_tid == entry2->t_tid) &&
+-          (entry1->group == entry2->group) &&
+-          ((entry1->start_blk + entry1->count) == entry2->start_blk))
++      if ((entry1->efd_tid == entry2->efd_tid) &&
++          (entry1->efd_group == entry2->efd_group) &&
++          ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+               return 1;
+       return 0;
+ }
+@@ -4991,7 +4986,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+       struct ext4_free_data *entry;
+       struct ext4_group_info *db = e4b->bd_info;
+       struct super_block *sb = e4b->bd_sb;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+@@ -4999,8 +4993,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+       BUG_ON(e4b->bd_bitmap_page == NULL);
+       BUG_ON(e4b->bd_buddy_page == NULL);
+-      new_node = &new_entry->node;
+-      block = new_entry->start_blk;
++      new_node = &new_entry->efd_node;
++      block = new_entry->efd_start_blk;
+       if (!*n) {
+               /* first free block exent. We need to
+@@ -5013,15 +5007,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+       }
+       while (*n) {
+               parent = *n;
+-              entry = rb_entry(parent, struct ext4_free_data, node);
+-              if (block < entry->start_blk)
++              entry = rb_entry(parent, struct ext4_free_data, efd_node);
++              if (block < entry->efd_start_blk)
+                       n = &(*n)->rb_left;
+-              else if (block >= (entry->start_blk + entry->count))
++              else if (block >= (entry->efd_start_blk + entry->efd_count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+                                       "Double free of blocks %d (%d %d)",
+-                                      block, entry->start_blk, entry->count);
++                                      block, entry->efd_start_blk, entry->efd_count);
+                       return 0;
+               }
+       }
+@@ -5032,34 +5026,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(entry, new_entry)) {
+-                      new_entry->start_blk = entry->start_blk;
+-                      new_entry->count += entry->count;
++                      new_entry->efd_start_blk = entry->efd_start_blk;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       node = rb_next(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(new_entry, entry)) {
+-                      new_entry->count += entry->count;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       /* Add the extent to transaction's private list */
+-      spin_lock(&sbi->s_md_lock);
+-      list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-      spin_unlock(&sbi->s_md_lock);
++      ext4_journal_callback_add(handle, ext4_free_data_callback,
++                                &new_entry->efd_jce);
+       return 0;
+ }
+@@ -5191,11 +5180,11 @@ do_more:
+                * blocks being freed are metadata. these blocks shouldn't
+                * be used until this transaction is committed
+                */
+-              new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-              new_entry->start_blk = bit;
+-              new_entry->group  = block_group;
+-              new_entry->count = count;
+-              new_entry->t_tid = handle->h_transaction->t_tid;
++              new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++              new_entry->efd_start_blk = bit;
++              new_entry->efd_group  = block_group;
++              new_entry->efd_count = count;
++              new_entry->efd_tid = handle->h_transaction->t_tid;
+               ext4_lock_group(sb, block_group);
+               mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -107,23 +107,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC     512
+-
+ struct ext4_free_data {
+-      /* this links the free block information from group_info */
+-      struct rb_node node;
++      /* MUST be the first member */
++      struct ext4_journal_cb_entry    efd_jce;
+-      /* this links the free block information from ext4_sb_info */
+-      struct list_head list;
++      /* ext4_free_data private data starts from here */
++
++      /* this links the free block information from group_info */
++      struct rb_node          efd_node;
+       /* group which free block extent belongs */
+-      ext4_group_t group;
++      ext4_group_t            efd_group;
+       /* free block extent */
+-      ext4_grpblk_t start_blk;
+-      ext4_grpblk_t count;
++      ext4_grpblk_t           efd_start_blk;
++      ext4_grpblk_t           efd_count;
+       /* transaction which freed this extent */
+-      tid_t   t_tid;
++      tid_t                   efd_tid;
+ };
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -304,6 +304,23 @@ void ext4_journal_abort_handle(const cha
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++      struct super_block              *sb = journal->j_private;
++      struct ext4_sb_info             *sbi = EXT4_SB(sb);
++      int                             error = is_journal_aborted(journal);
++      struct ext4_journal_cb_entry    *jce, *tmp;
++
++      spin_lock(&sbi->s_md_lock);
++      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++              list_del_init(&jce->jce_list);
++              spin_unlock(&sbi->s_md_lock);
++              jce->jce_func(sb, jce, error);
++              spin_lock(&sbi->s_md_lock);
++      }
++      spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -2997,6 +3014,8 @@ static int ext4_fill_super(struct super_
+       }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
++      sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+       if (test_opt(sb, NOBH)) {
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch
new file mode 100644 (file)
index 0000000..b177e2e
--- /dev/null
@@ -0,0 +1,464 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++      /* list information for other callbacks attached to the same handle */
++      struct list_head jce_list;
++
++      /*  Function to call with this callback structure */
++      void (*jce_func)(struct super_block *sb,
++                       struct ext4_journal_cb_entry *jce, int error);
++
++      /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++                      void (*func)(struct super_block *sb,
++                                   struct ext4_journal_cb_entry *jce,
++                                   int rc),
++                      struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      /* Add the jce to transaction's private list */
++      jce->jce_func = func;
++      spin_lock(&sbi->s_md_lock);
++      list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++                                           struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      spin_lock(&sbi->s_md_lock);
++      list_del_init(&jce->jce_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+                    struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC     512
+-
+ struct ext4_free_data {
+-      /* this links the free block information from group_info */
+-      struct rb_node node;
++      /* MUST be the first member */
++      struct ext4_journal_cb_entry    efd_jce;
+-      /* this links the free block information from ext4_sb_info */
+-      struct list_head list;
++      /* ext4_free_data private data starts from here */
++
++      /* this links the free block information from group_info */
++      struct rb_node          efd_node;
+       /* group which free block extent belongs */
+-      ext4_group_t group;
++      ext4_group_t            efd_group;
+       /* free block extent */
+-      ext4_grpblk_t start_blk;
+-      ext4_grpblk_t count;
++      ext4_grpblk_t           efd_start_blk;
++      ext4_grpblk_t           efd_count;
+       /* transaction which freed this extent */
+-      tid_t   t_tid;
++      tid_t                   efd_tid;
+ };
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+ #include <trace/events/ext4.h>
+@@ -336,12 +337,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+                                               ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2592,8 +2593,6 @@ int ext4_mb_init(struct super_block *sb,
+               }
+       }
+-      if (sbi->s_journal)
+-              sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+       return 0;
+ }
+@@ -2693,56 +2692,52 @@ static inline int ext4_issue_discard(str
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++                                  struct ext4_journal_cb_entry *jce,
++                                  int rc)
+ {
+-      struct super_block *sb = journal->j_private;
++      struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+       struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+-      struct ext4_free_data *entry;
+-      struct list_head *l, *ltmp;
+-
+-      list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-              entry = list_entry(l, struct ext4_free_data, list);
+-              mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-                       entry->count, entry->group, entry);
++      mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++               entry->efd_count, entry->efd_group, entry);
+-              if (test_opt(sb, DISCARD)) {
+-                      int ret;
+-                      ret = ext4_issue_discard(sb, entry->group,
+-                                      entry->start_blk, entry->count);
+-                      if (unlikely(ret == -EOPNOTSUPP)) {
+-                              ext4_warning(sb, "discard not supported, "
+-                                               "disabling");
+-                              clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+-                      }
++      if (test_opt(sb, DISCARD)) {
++              int ret;
++              ret = ext4_issue_discard(sb, entry->efd_group,
++                              entry->efd_start_blk, entry->efd_count);
++              if (unlikely(ret == -EOPNOTSUPP)) {
++                      ext4_warning(sb, "discard not supported, "
++                                       "disabling");
++                      clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+               }
++      }
+-              err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-              /* we expect to find existing buddy because it's pinned */
+-              BUG_ON(err != 0);
+-
+-              db = e4b.bd_info;
+-              /* there are blocks to put in buddy to make them really free */
+-              count += entry->count;
+-              count2++;
+-              ext4_lock_group(sb, entry->group);
+-              /* Take it out of per group rb tree */
+-              rb_erase(&entry->node, &(db->bb_free_root));
+-              mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-              if (!db->bb_free_root.rb_node) {
+-                      /* No more items in the per group rb tree
+-                       * balance refcounts from ext4_mb_free_metadata()
+-                       */
+-                      page_cache_release(e4b.bd_buddy_page);
+-                      page_cache_release(e4b.bd_bitmap_page);
+-              }
+-              ext4_unlock_group(sb, entry->group);
+-              kmem_cache_free(ext4_free_ext_cachep, entry);
+-              ext4_mb_release_desc(&e4b);
++      err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++      /* we expect to find existing buddy because it's pinned */
++      BUG_ON(err != 0);
++
++      db = e4b.bd_info;
++      /* there are blocks to put in buddy to make them really free */
++      count += entry->efd_count;
++      count2++;
++      ext4_lock_group(sb, entry->efd_group);
++      /* Take it out of per group rb tree */
++      rb_erase(&entry->efd_node, &(db->bb_free_root));
++      mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++      if (!db->bb_free_root.rb_node) {
++              /* No more items in the per group rb tree
++               * balance refcounts from ext4_mb_free_metadata()
++               */
++              page_cache_release(e4b.bd_buddy_page);
++              page_cache_release(e4b.bd_bitmap_page);
+       }
++      ext4_unlock_group(sb, entry->efd_group);
++      kmem_cache_free(ext4_free_data_cachep, entry);
++      ext4_mb_release_desc(&e4b);
+       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -2794,22 +2789,22 @@ int __init init_ext4_mballoc(void)
+               kmem_cache_create("ext4_alloc_context",
+                                    sizeof(struct ext4_allocation_context),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+-      if (ext4_ac_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_ac_cachep == NULL)
++              goto out_err;
++
++      ext4_free_data_cachep =
++              KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
++      if (ext4_free_data_cachep == NULL)
++              goto out1_err;
+-      ext4_free_ext_cachep =
+-              kmem_cache_create("ext4_free_block_extents",
+-                                   sizeof(struct ext4_free_data),
+-                                   0, SLAB_RECLAIM_ACCOUNT, NULL);
+-      if (ext4_free_ext_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              kmem_cache_destroy(ext4_ac_cachep);
+-              return -ENOMEM;
+-      }
+       ext4_create_debugfs_entry();
+       return 0;
++
++out1_err:
++      kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++      kmem_cache_destroy(ext4_pspace_cachep);
++      return -ENOMEM;
+ }
+ void exit_ext4_mballoc(void)
+@@ -2821,7 +2816,7 @@ void exit_ext4_mballoc(void)
+       rcu_barrier();
+       kmem_cache_destroy(ext4_pspace_cachep);
+       kmem_cache_destroy(ext4_ac_cachep);
+-      kmem_cache_destroy(ext4_free_ext_cachep);
++      kmem_cache_destroy(ext4_free_data_cachep);
+       ext4_remove_debugfs_entry();
+ }
+@@ -3362,8 +3357,8 @@ static void ext4_mb_generate_from_freeli
+       n = rb_first(&(grp->bb_free_root));
+       while (n) {
+-              entry = rb_entry(n, struct ext4_free_data, node);
+-              mb_set_bits(bitmap, entry->start_blk, entry->count);
++              entry = rb_entry(n, struct ext4_free_data, efd_node);
++              mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+               n = rb_next(n);
+       }
+       return;
+@@ -4623,11 +4618,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-                      struct ext4_free_data *entry2)
++                   struct ext4_free_data *entry2)
+ {
+-      if ((entry1->t_tid == entry2->t_tid) &&
+-          (entry1->group == entry2->group) &&
+-          ((entry1->start_blk + entry1->count) == entry2->start_blk))
++      if ((entry1->efd_tid == entry2->efd_tid) &&
++          (entry1->efd_group == entry2->efd_group) &&
++          ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+               return 1;
+       return 0;
+ }
+@@ -4640,7 +4635,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+       struct ext4_free_data *entry;
+       struct ext4_group_info *db = e4b->bd_info;
+       struct super_block *sb = e4b->bd_sb;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+@@ -4648,8 +4642,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+       BUG_ON(e4b->bd_bitmap_page == NULL);
+       BUG_ON(e4b->bd_buddy_page == NULL);
+-      new_node = &new_entry->node;
+-      block = new_entry->start_blk;
++      new_node = &new_entry->efd_node;
++      block = new_entry->efd_start_blk;
+       if (!*n) {
+               /* first free block exent. We need to
+@@ -4662,15 +4656,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+       }
+       while (*n) {
+               parent = *n;
+-              entry = rb_entry(parent, struct ext4_free_data, node);
+-              if (block < entry->start_blk)
++              entry = rb_entry(parent, struct ext4_free_data, efd_node);
++              if (block < entry->efd_start_blk)
+                       n = &(*n)->rb_left;
+-              else if (block >= (entry->start_blk + entry->count))
++              else if (block >= (entry->efd_start_blk + entry->efd_count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+                                       "Double free of blocks %d (%d %d)",
+-                                      block, entry->start_blk, entry->count);
++                                      block, entry->efd_start_blk, entry->efd_count);
+                       return 0;
+               }
+       }
+@@ -4681,34 +4675,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(entry, new_entry)) {
+-                      new_entry->start_blk = entry->start_blk;
+-                      new_entry->count += entry->count;
++                      new_entry->efd_start_blk = entry->efd_start_blk;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       node = rb_next(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(new_entry, entry)) {
+-                      new_entry->count += entry->count;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       /* Add the extent to transaction's private list */
+-      spin_lock(&sbi->s_md_lock);
+-      list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-      spin_unlock(&sbi->s_md_lock);
++      ext4_journal_callback_add(handle, ext4_free_data_callback,
++                                &new_entry->efd_jce);
+       return 0;
+ }
+@@ -4836,11 +4825,11 @@ do_more:
+                * blocks being freed are metadata. these blocks shouldn't
+                * be used until this transaction is committed
+                */
+-              new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-              new_entry->start_blk = bit;
+-              new_entry->group  = block_group;
+-              new_entry->count = count;
+-              new_entry->t_tid = handle->h_transaction->t_tid;
++              new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++              new_entry->efd_start_blk = bit;
++              new_entry->efd_group  = block_group;
++              new_entry->efd_count = count;
++              new_entry->efd_tid = handle->h_transaction->t_tid;
+               ext4_lock_group(sb, block_group);
+               mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -301,6 +301,23 @@ void ext4_journal_abort_handle(const cha
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++      struct super_block              *sb = journal->j_private;
++      struct ext4_sb_info             *sbi = EXT4_SB(sb);
++      int                             error = is_journal_aborted(journal);
++      struct ext4_journal_cb_entry    *jce, *tmp;
++
++      spin_lock(&sbi->s_md_lock);
++      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++              list_del_init(&jce->jce_list);
++              spin_unlock(&sbi->s_md_lock);
++              jce->jce_func(sb, jce, error);
++              spin_lock(&sbi->s_md_lock);
++      }
++      spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -3040,6 +3057,8 @@ static int ext4_fill_super(struct super_
+       }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
++      sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+       if (test_opt(sb, NOBH)) {
index 1e5417f..ebcd4d8 100644 (file)
@@ -34,3 +34,4 @@ ext4-failed-mount-b23368.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
+ext4-journal-callback-rhel5.patch
index c64eee3..38d2111 100644 (file)
@@ -30,3 +30,4 @@ ext4-back-dquot-to-rhel6.patch
 ext4-nocmtime-2.6-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
+ext4-journal-callback.patch
index 3d7b761..5e11d38 100644 (file)
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
 ===================================================================
 --- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h     2009-04-15 08:35:28.000000000 +0530
index f219771..e15d750 100644 (file)
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 This allows the jbd transaction commit callbacks to be registered.
 The ext4 jbd2 code has a different commit callback (one per transaction)
 that could be used to provide equivalent functionality.  This would
index 271f194..84f647b 100644 (file)
@@ -33,6 +33,9 @@
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
 #endif
 
 #if defined(HAVE_EXT3_XATTR_H)
-#include <ext3/xattr.h>
-#else
+# include <ext3/xattr.h>
+#elif !defined(EXT3_XATTR_INDEX_TRUSTED)
 /* ext3 xattr.h not available in rh style kernel-devel rpm */
+/* CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h */
+# define EXT3_XATTR_INDEX_TRUSTED        4
 extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 #endif
@@ -133,34 +138,41 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
                  ext3_discard_preallocations(inode)
 #endif
 
-
-static cfs_mem_cache_t *fcb_cache;
-
-struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
-        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
-        struct obd_device *cb_obd;      /* MDS/OBD completion device */
-        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
-        void *cb_data;                  /* MDS/OST completion function data */
-};
-
-#ifndef EXT3_XATTR_INDEX_TRUSTED        /* temporary until we hit l28 kernel */
-#define EXT3_XATTR_INDEX_TRUSTED        4
-#endif
-
 #ifdef HAVE_EXT4_LDISKFS
 #define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
 #else
 #define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
 #define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
 #define ext3_ext_store_pblock(ex, pblock)  ((ex)->ee_start = cpu_to_le32(pblock))
 #define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
 #endif
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+# define journal_callback ext4_journal_cb_entry
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         ext4_journal_callback_add(handle, func, jcb)
+#elif defined(HAVE_JBD2_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         jbd2_journal_callback_set(handle, func, jcb)
+#elif defined(HAVE_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         journal_callback_set(handle, func, jcb)
+#else
+# error missing journal commit callback
+#endif /* HAVE_EXT4_JOURNAL_CALLBACK_ADD */
+
+static cfs_mem_cache_t *fcb_cache;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
+        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
+};
+
 #ifndef ext3_find_next_bit
 #define ext3_find_next_bit           ext2_find_next_bit
 #endif
@@ -777,9 +789,14 @@ static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
         return rc;
 }
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+static void fsfilt_ext3_cb_func(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
+#endif
 {
-        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+        struct fsfilt_cb_data *fcb = container_of(jcb, typeof(*fcb), cb_jcb);
 
         fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
@@ -802,8 +819,7 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
-        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func,
-                                    (struct journal_callback *)fcb);
+        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func, &fcb->cb_jcb);
 
         return 0;
 }
index 9a052be..29764c9 100644 (file)
@@ -33,6 +33,9 @@
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -617,7 +620,12 @@ static int osd_param_is_sane(const struct osd_device *dev,
 /*
  * Concurrency: shouldn't matter.
  */
+#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+static void osd_trans_commit_cb(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
+#endif
 {
         struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
         struct thandle     *th  = &oh->ot_super;
@@ -694,12 +702,12 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                                 /* add commit callback */
                                 lu_context_init(&th->th_ctx, LCT_TX_HANDLE);
                                 lu_context_enter(&th->th_ctx);
-                                osd_journal_callback_set(jh, osd_trans_commit_cb,
-                                                         (struct journal_callback *)&oh->ot_jcb);
-                                        LASSERT(oti->oti_txns == 0);
-                                        LASSERT(oti->oti_r_locks == 0);
-                                        LASSERT(oti->oti_w_locks == 0);
-                                        oti->oti_txns++;
+                                osd_journal_callback_set(jh,osd_trans_commit_cb,
+                                                         &oh->ot_jcb);
+                                LASSERT(oti->oti_txns == 0);
+                                LASSERT(oti->oti_r_locks == 0);
+                                LASSERT(oti->oti_w_locks == 0);
+                                oti->oti_txns++;
                         } else {
                                 OBD_FREE_PTR(oh);
                                 th = (void *)jh;
index ea559e3..343287c 100644 (file)
@@ -30,6 +30,9 @@
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
 #ifdef HAVE_EXT4_LDISKFS
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
-#define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+#  define journal_callback ldiskfs_journal_cb_entry
+#  define osd_journal_callback_set(handle, func, jcb) ldiskfs_journal_callback_add(handle, func, jcb)
+# else
+#  define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# endif
 #else
 #include <linux/jbd.h>
 #include <linux/ldiskfs_fs.h>