Whamcloud - gitweb
LU-433 remove jbd2-jcberr patch from kernel
authorBobi Jam <bobijam@whamcloud.com>
Tue, 21 Jun 2011 01:07:57 +0000 (09:07 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 6 Oct 2011 04:08:59 +0000 (00:08 -0400)
In the upstream ext4 code there is a new functionality to allow a
per-commit callback to be set (j_commit_callback), which is used by
the mballoc code to manage free space bitmaps after deleted blocks
have been released. We expand it to contain multiple different
callbacks.

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I6397ccabd8d729658cf2ee13c9c3731a9eb31219
Reviewed-on: http://review.whamcloud.com/983
Tested-by: Hudson
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
build/autoconf/lustre-build.m4
ldiskfs/configure.ac
ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/ext4-journal-callback.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel5.patch
lustre/kernel_patches/patches/jbd2-jcberr-2.6-rhel6.patch
lustre/lvfs/fsfilt_ext3.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h

index 20e3d0d..7feb9dc 100644 (file)
@@ -320,6 +320,10 @@ AC_SUBST(LDISKFS_SUBDIR)
 AM_CONDITIONAL(LDISKFS_ENABLED, test x$with_ldiskfs != xno)
 AM_CONDITIONAL(LDISKFS_IN_KERNEL, test x$with_ldiskfs = xinkernel)
 
 AM_CONDITIONAL(LDISKFS_ENABLED, test x$with_ldiskfs != xno)
 AM_CONDITIONAL(LDISKFS_IN_KERNEL, test x$with_ldiskfs = xinkernel)
 
+if test x$with_ldiskfs != xno ; then
+       LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET
+fi
+
 if test x$enable_ext4 = xyes ; then
        AC_DEFINE(HAVE_EXT4_LDISKFS, 1, [build ext4 based ldiskfs])
 fi
 if test x$enable_ext4 = xyes ; then
        AC_DEFINE(HAVE_EXT4_LDISKFS, 1, [build ext4 based ldiskfs])
 fi
@@ -384,6 +388,28 @@ esac
 AC_SUBST(LIBCFS_SUBDIR)
 AC_SUBST(LIBCFS_INCLUDE_DIR)
 ])
 AC_SUBST(LIBCFS_SUBDIR)
 AC_SUBST(LIBCFS_INCLUDE_DIR)
 ])
+#
+# Check for jbd2_journal_callback_set(), which is needed for commit
+# callbacks.  When LU-433 lands jbd2_journal_callback_set() will only
+# remain for legacy reasons and AC_MSG_ERROR can be removed.
+#
+# 2.6.18 with ext3 still uses journal_callback_set() for commit callbacks.
+#
+AC_DEFUN([LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET],
+[
+       LB_CHECK_SYMBOL_EXPORT([jbd2_journal_callback_set],
+       [fs/jbd2/journal.c],
+       [AC_DEFINE(HAVE_JBD2_JOURNAL_CALLBACK_SET, 1,
+                  [kernel exports jbd2_journal_callback_set])],
+       [LB_CHECK_SYMBOL_EXPORT([journal_callback_set],
+               [fs/jbd/journal.c],
+               [AC_DEFINE(HAVE_JOURNAL_CALLBACK_SET, 1,
+                          [kernel exports journal_callback_set])],
+               [if test x$with_ldiskfs != xno ; then
+                       AC_MSG_ERROR([ldiskfs needs jbd2-jcberr patch])
+               fi])])
+])
 
 #
 # LB_DEFINE_LDISKFS_OPTIONS
 
 #
 # LB_DEFINE_LDISKFS_OPTIONS
index 4d40c0d..2b3d1c5 100644 (file)
@@ -145,17 +145,6 @@ AC_SUBST(LDISKFS_SERIES)
 
 AC_SUBST(ac_configure_args)
 
 
 AC_SUBST(ac_configure_args)
 
-LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
-                      [fs/dcache.c],
-                      [AC_DEFINE(HAVE_D_REHASH_COND, 1,
-                                 [d_rehash_cond is exported by the kernel])],
-                                 [])
-
-LB_CHECK_SYMBOL_EXPORT([__d_rehash],
-                      [fs/dcache.c],
-                      [AC_DEFINE(HAVE___D_REHASH, 1,
-                                 [__d_rehash is exported by the kernel])],
-                                 [])
 
 LB_CONFIG_FILES
 AC_CONFIG_FILES([ldiskfs/autoMakefile ldiskfs/Makefile])
 
 LB_CONFIG_FILES
 AC_CONFIG_FILES([ldiskfs/autoMakefile ldiskfs/Makefile])
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback-rhel5.patch
new file mode 100644 (file)
index 0000000..4c08c9e
--- /dev/null
@@ -0,0 +1,448 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++      /* list information for other callbacks attached to the same handle */
++      struct list_head jce_list;
++
++      /*  Function to call with this callback structure */
++      void (*jce_func)(struct super_block *sb,
++                       struct ext4_journal_cb_entry *jce, int error);
++
++      /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++                      void (*func)(struct super_block *sb,
++                                   struct ext4_journal_cb_entry *jce,
++                                   int rc),
++                      struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      /* Add the jce to transaction's private list */
++      jce->jce_func = func;
++      spin_lock(&sbi->s_md_lock);
++      list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++                                           struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      spin_lock(&sbi->s_md_lock);
++      list_del_init(&jce->jce_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+                    struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+@@ -335,14 +336,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+                                               ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
+-
+-
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2942,8 +2941,6 @@ int ext4_mb_init(struct super_block *sb,
+       ext4_mb_history_init(sb);
+-      if (sbi->s_journal)
+-              sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+       return 0;
+ }
+@@ -3032,46 +3029,42 @@ int ext4_mb_release(struct super_block *
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++                                  struct ext4_journal_cb_entry *jce,
++                                  int rc)
+ {
+-      struct super_block *sb = journal->j_private;
++      struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+       struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+-      struct ext4_free_data *entry;
+-      struct list_head *l, *ltmp;
+-      list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-              entry = list_entry(l, struct ext4_free_data, list);
+-
+-              mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-                       entry->count, entry->group, entry);
+-
+-              err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-              /* we expect to find existing buddy because it's pinned */
+-              BUG_ON(err != 0);
+-
+-              db = e4b.bd_info;
+-              /* there are blocks to put in buddy to make them really free */
+-              count += entry->count;
+-              count2++;
+-              ext4_lock_group(sb, entry->group);
+-              /* Take it out of per group rb tree */
+-              rb_erase(&entry->node, &(db->bb_free_root));
+-              mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-              if (!db->bb_free_root.rb_node) {
+-                      /* No more items in the per group rb tree
+-                       * balance refcounts from ext4_mb_free_metadata()
+-                       */
+-                      page_cache_release(e4b.bd_buddy_page);
+-                      page_cache_release(e4b.bd_bitmap_page);
+-              }
+-              ext4_unlock_group(sb, entry->group);
++      mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++               entry->efd_count, entry->efd_group, entry);
+-              kmem_cache_free(ext4_free_ext_cachep, entry);
+-              ext4_mb_unload_buddy(&e4b);
++      err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++      /* we expect to find existing buddy because it's pinned */
++      BUG_ON(err != 0);
++
++      db = e4b.bd_info;
++      /* there are blocks to put in buddy to make them really free */
++      count += entry->efd_count;
++      count2++;
++      ext4_lock_group(sb, entry->efd_group);
++      /* Take it out of per group rb tree */
++      rb_erase(&entry->efd_node, &(db->bb_free_root));
++      mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++      if (!db->bb_free_root.rb_node) {
++              /* No more items in the per group rb tree
++               * balance refcounts from ext4_mb_free_metadata()
++               */
++              page_cache_release(e4b.bd_buddy_page);
++              page_cache_release(e4b.bd_bitmap_page);
+       }
++      ext4_unlock_group(sb, entry->efd_group);
++
++      kmem_cache_free(ext4_free_data_cachep, entry);
++      ext4_mb_unload_buddy(&e4b);
+       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -3123,22 +3116,24 @@ int __init init_ext4_mballoc(void)
+               kmem_cache_create("ext4_alloc_context",
+                                    sizeof(struct ext4_allocation_context),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-      if (ext4_ac_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_ac_cachep == NULL)
++              goto out_err;
+-      ext4_free_ext_cachep =
+-              kmem_cache_create("ext4_free_block_extents",
++      ext4_free_data_cachep =
++              kmem_cache_create("ext4_free_data",
+                                    sizeof(struct ext4_free_data),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+-      if (ext4_free_ext_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              kmem_cache_destroy(ext4_ac_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_free_data_cachep == NULL)
++              goto out1_err;
++
+       ext4_create_debugfs_entry();
+       return 0;
++
++out1_err:
++      kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++      kmem_cache_destroy(ext4_pspace_cachep);
++      return -ENOMEM;
+ }
+ void exit_ext4_mballoc(void)
+@@ -3150,7 +3145,7 @@ void exit_ext4_mballoc(void)
+       rcu_barrier();
+       kmem_cache_destroy(ext4_pspace_cachep);
+       kmem_cache_destroy(ext4_ac_cachep);
+-      kmem_cache_destroy(ext4_free_ext_cachep);
++      kmem_cache_destroy(ext4_free_data_cachep);
+       ext4_remove_debugfs_entry();
+ }
+@@ -3688,8 +3683,8 @@ static void ext4_mb_generate_from_freeli
+       n = rb_first(&(grp->bb_free_root));
+       while (n) {
+-              entry = rb_entry(n, struct ext4_free_data, node);
+-              mb_set_bits(bitmap, entry->start_blk, entry->count);
++              entry = rb_entry(n, struct ext4_free_data, efd_node);
++              mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+               n = rb_next(n);
+       }
+       return;
+@@ -4974,11 +4969,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-                      struct ext4_free_data *entry2)
++                   struct ext4_free_data *entry2)
+ {
+-      if ((entry1->t_tid == entry2->t_tid) &&
+-          (entry1->group == entry2->group) &&
+-          ((entry1->start_blk + entry1->count) == entry2->start_blk))
++      if ((entry1->efd_tid == entry2->efd_tid) &&
++          (entry1->efd_group == entry2->efd_group) &&
++          ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+               return 1;
+       return 0;
+ }
+@@ -4991,7 +4986,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+       struct ext4_free_data *entry;
+       struct ext4_group_info *db = e4b->bd_info;
+       struct super_block *sb = e4b->bd_sb;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+@@ -4999,8 +4993,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+       BUG_ON(e4b->bd_bitmap_page == NULL);
+       BUG_ON(e4b->bd_buddy_page == NULL);
+-      new_node = &new_entry->node;
+-      block = new_entry->start_blk;
++      new_node = &new_entry->efd_node;
++      block = new_entry->efd_start_blk;
+       if (!*n) {
+               /* first free block exent. We need to
+@@ -5013,15 +5007,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+       }
+       while (*n) {
+               parent = *n;
+-              entry = rb_entry(parent, struct ext4_free_data, node);
+-              if (block < entry->start_blk)
++              entry = rb_entry(parent, struct ext4_free_data, efd_node);
++              if (block < entry->efd_start_blk)
+                       n = &(*n)->rb_left;
+-              else if (block >= (entry->start_blk + entry->count))
++              else if (block >= (entry->efd_start_blk + entry->efd_count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+                                       "Double free of blocks %d (%d %d)",
+-                                      block, entry->start_blk, entry->count);
++                                      block, entry->efd_start_blk, entry->efd_count);
+                       return 0;
+               }
+       }
+@@ -5032,34 +5026,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(entry, new_entry)) {
+-                      new_entry->start_blk = entry->start_blk;
+-                      new_entry->count += entry->count;
++                      new_entry->efd_start_blk = entry->efd_start_blk;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       node = rb_next(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(new_entry, entry)) {
+-                      new_entry->count += entry->count;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       /* Add the extent to transaction's private list */
+-      spin_lock(&sbi->s_md_lock);
+-      list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-      spin_unlock(&sbi->s_md_lock);
++      ext4_journal_callback_add(handle, ext4_free_data_callback,
++                                &new_entry->efd_jce);
+       return 0;
+ }
+@@ -5191,11 +5180,11 @@ do_more:
+                * blocks being freed are metadata. these blocks shouldn't
+                * be used until this transaction is committed
+                */
+-              new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-              new_entry->start_blk = bit;
+-              new_entry->group  = block_group;
+-              new_entry->count = count;
+-              new_entry->t_tid = handle->h_transaction->t_tid;
++              new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++              new_entry->efd_start_blk = bit;
++              new_entry->efd_group  = block_group;
++              new_entry->efd_count = count;
++              new_entry->efd_tid = handle->h_transaction->t_tid;
+               ext4_lock_group(sb, block_group);
+               mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -107,23 +107,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC     512
+-
+ struct ext4_free_data {
+-      /* this links the free block information from group_info */
+-      struct rb_node node;
++      /* MUST be the first member */
++      struct ext4_journal_cb_entry    efd_jce;
+-      /* this links the free block information from ext4_sb_info */
+-      struct list_head list;
++      /* ext4_free_data private data starts from here */
++
++      /* this links the free block information from group_info */
++      struct rb_node          efd_node;
+       /* group which free block extent belongs */
+-      ext4_group_t group;
++      ext4_group_t            efd_group;
+       /* free block extent */
+-      ext4_grpblk_t start_blk;
+-      ext4_grpblk_t count;
++      ext4_grpblk_t           efd_start_blk;
++      ext4_grpblk_t           efd_count;
+       /* transaction which freed this extent */
+-      tid_t   t_tid;
++      tid_t                   efd_tid;
+ };
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -304,6 +304,23 @@ void ext4_journal_abort_handle(const cha
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++      struct super_block              *sb = journal->j_private;
++      struct ext4_sb_info             *sbi = EXT4_SB(sb);
++      int                             error = is_journal_aborted(journal);
++      struct ext4_journal_cb_entry    *jce, *tmp;
++
++      spin_lock(&sbi->s_md_lock);
++      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++              list_del_init(&jce->jce_list);
++              spin_unlock(&sbi->s_md_lock);
++              jce->jce_func(sb, jce, error);
++              spin_lock(&sbi->s_md_lock);
++      }
++      spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -2997,6 +3014,8 @@ static int ext4_fill_super(struct super_
+       }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
++      sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+       if (test_opt(sb, NOBH)) {
diff --git a/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch b/ldiskfs/kernel_patches/patches/ext4-journal-callback.patch
new file mode 100644 (file)
index 0000000..b177e2e
--- /dev/null
@@ -0,0 +1,464 @@
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
++/**
++ *   struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ *   This struct is a 'seed' structure for a using with your own callback
++ *   structs. If you are using callbacks you must allocate one of these
++ *   or another struct of your own definition which has this struct
++ *   as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++      /* list information for other callbacks attached to the same handle */
++      struct list_head jce_list;
++
++      /*  Function to call with this callback structure */
++      void (*jce_func)(struct super_block *sb,
++                       struct ext4_journal_cb_entry *jce, int error);
++
++      /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ *        @sb: superblock of current filesystem for transaction
++ *        @jce: returned journal callback data
++ *        @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++                      void (*func)(struct super_block *sb,
++                                   struct ext4_journal_cb_entry *jce,
++                                   int rc),
++                      struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      /* Add the jce to transaction's private list */
++      jce->jce_func = func;
++      spin_lock(&sbi->s_md_lock);
++      list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++                                           struct ext4_journal_cb_entry *jce)
++{
++      struct ext4_sb_info *sbi =
++                      EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++      spin_lock(&sbi->s_md_lock);
++      list_del_init(&jce->jce_list);
++      spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+                    struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
+  */
+ #define MB_DEFAULT_GROUP_PREALLOC     512
+-
+ struct ext4_free_data {
+-      /* this links the free block information from group_info */
+-      struct rb_node node;
++      /* MUST be the first member */
++      struct ext4_journal_cb_entry    efd_jce;
+-      /* this links the free block information from ext4_sb_info */
+-      struct list_head list;
++      /* ext4_free_data private data starts from here */
++
++      /* this links the free block information from group_info */
++      struct rb_node          efd_node;
+       /* group which free block extent belongs */
+-      ext4_group_t group;
++      ext4_group_t            efd_group;
+       /* free block extent */
+-      ext4_grpblk_t start_blk;
+-      ext4_grpblk_t count;
++      ext4_grpblk_t           efd_start_blk;
++      ext4_grpblk_t           efd_count;
+       /* transaction which freed this extent */
+-      tid_t   t_tid;
++      tid_t                   efd_tid;
+ };
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+  * mballoc.c contains the multiblocks allocation routines
+  */
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+ #include <trace/events/ext4.h>
+@@ -336,12 +337,12 @@
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+                                               ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2592,8 +2593,6 @@ int ext4_mb_init(struct super_block *sb,
+               }
+       }
+-      if (sbi->s_journal)
+-              sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+       return 0;
+ }
+@@ -2693,56 +2692,52 @@ static inline int ext4_issue_discard(str
+  * This function is called by the jbd2 layer once the commit has finished,
+  * so we know we can free the blocks that were released with that commit.
+  */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++                                  struct ext4_journal_cb_entry *jce,
++                                  int rc)
+ {
+-      struct super_block *sb = journal->j_private;
++      struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+       struct ext4_buddy e4b;
+       struct ext4_group_info *db;
+       int err, count = 0, count2 = 0;
+-      struct ext4_free_data *entry;
+-      struct list_head *l, *ltmp;
+-
+-      list_for_each_safe(l, ltmp, &txn->t_private_list) {
+-              entry = list_entry(l, struct ext4_free_data, list);
+-              mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+-                       entry->count, entry->group, entry);
++      mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++               entry->efd_count, entry->efd_group, entry);
+-              if (test_opt(sb, DISCARD)) {
+-                      int ret;
+-                      ret = ext4_issue_discard(sb, entry->group,
+-                                      entry->start_blk, entry->count);
+-                      if (unlikely(ret == -EOPNOTSUPP)) {
+-                              ext4_warning(sb, "discard not supported, "
+-                                               "disabling");
+-                              clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+-                      }
++      if (test_opt(sb, DISCARD)) {
++              int ret;
++              ret = ext4_issue_discard(sb, entry->efd_group,
++                              entry->efd_start_blk, entry->efd_count);
++              if (unlikely(ret == -EOPNOTSUPP)) {
++                      ext4_warning(sb, "discard not supported, "
++                                       "disabling");
++                      clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+               }
++      }
+-              err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+-              /* we expect to find existing buddy because it's pinned */
+-              BUG_ON(err != 0);
+-
+-              db = e4b.bd_info;
+-              /* there are blocks to put in buddy to make them really free */
+-              count += entry->count;
+-              count2++;
+-              ext4_lock_group(sb, entry->group);
+-              /* Take it out of per group rb tree */
+-              rb_erase(&entry->node, &(db->bb_free_root));
+-              mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+-              if (!db->bb_free_root.rb_node) {
+-                      /* No more items in the per group rb tree
+-                       * balance refcounts from ext4_mb_free_metadata()
+-                       */
+-                      page_cache_release(e4b.bd_buddy_page);
+-                      page_cache_release(e4b.bd_bitmap_page);
+-              }
+-              ext4_unlock_group(sb, entry->group);
+-              kmem_cache_free(ext4_free_ext_cachep, entry);
+-              ext4_mb_release_desc(&e4b);
++      err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++      /* we expect to find existing buddy because it's pinned */
++      BUG_ON(err != 0);
++
++      db = e4b.bd_info;
++      /* there are blocks to put in buddy to make them really free */
++      count += entry->efd_count;
++      count2++;
++      ext4_lock_group(sb, entry->efd_group);
++      /* Take it out of per group rb tree */
++      rb_erase(&entry->efd_node, &(db->bb_free_root));
++      mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++      if (!db->bb_free_root.rb_node) {
++              /* No more items in the per group rb tree
++               * balance refcounts from ext4_mb_free_metadata()
++               */
++              page_cache_release(e4b.bd_buddy_page);
++              page_cache_release(e4b.bd_bitmap_page);
+       }
++      ext4_unlock_group(sb, entry->efd_group);
++      kmem_cache_free(ext4_free_data_cachep, entry);
++      ext4_mb_release_desc(&e4b);
+       mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -2794,22 +2789,22 @@ int __init init_ext4_mballoc(void)
+               kmem_cache_create("ext4_alloc_context",
+                                    sizeof(struct ext4_allocation_context),
+                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+-      if (ext4_ac_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              return -ENOMEM;
+-      }
++      if (ext4_ac_cachep == NULL)
++              goto out_err;
++
++      ext4_free_data_cachep =
++              KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
++      if (ext4_free_data_cachep == NULL)
++              goto out1_err;
+-      ext4_free_ext_cachep =
+-              kmem_cache_create("ext4_free_block_extents",
+-                                   sizeof(struct ext4_free_data),
+-                                   0, SLAB_RECLAIM_ACCOUNT, NULL);
+-      if (ext4_free_ext_cachep == NULL) {
+-              kmem_cache_destroy(ext4_pspace_cachep);
+-              kmem_cache_destroy(ext4_ac_cachep);
+-              return -ENOMEM;
+-      }
+       ext4_create_debugfs_entry();
+       return 0;
++
++out1_err:
++      kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++      kmem_cache_destroy(ext4_pspace_cachep);
++      return -ENOMEM;
+ }
+ void exit_ext4_mballoc(void)
+@@ -2821,7 +2816,7 @@ void exit_ext4_mballoc(void)
+       rcu_barrier();
+       kmem_cache_destroy(ext4_pspace_cachep);
+       kmem_cache_destroy(ext4_ac_cachep);
+-      kmem_cache_destroy(ext4_free_ext_cachep);
++      kmem_cache_destroy(ext4_free_data_cachep);
+       ext4_remove_debugfs_entry();
+ }
+@@ -3362,8 +3357,8 @@ static void ext4_mb_generate_from_freeli
+       n = rb_first(&(grp->bb_free_root));
+       while (n) {
+-              entry = rb_entry(n, struct ext4_free_data, node);
+-              mb_set_bits(bitmap, entry->start_blk, entry->count);
++              entry = rb_entry(n, struct ext4_free_data, efd_node);
++              mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+               n = rb_next(n);
+       }
+       return;
+@@ -4623,11 +4618,11 @@ out3:
+  * AND the blocks are associated with the same group.
+  */
+ static int can_merge(struct ext4_free_data *entry1,
+-                      struct ext4_free_data *entry2)
++                   struct ext4_free_data *entry2)
+ {
+-      if ((entry1->t_tid == entry2->t_tid) &&
+-          (entry1->group == entry2->group) &&
+-          ((entry1->start_blk + entry1->count) == entry2->start_blk))
++      if ((entry1->efd_tid == entry2->efd_tid) &&
++          (entry1->efd_group == entry2->efd_group) &&
++          ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+               return 1;
+       return 0;
+ }
+@@ -4640,7 +4635,6 @@ ext4_mb_free_metadata(handle_t *handle, 
+       struct ext4_free_data *entry;
+       struct ext4_group_info *db = e4b->bd_info;
+       struct super_block *sb = e4b->bd_sb;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct rb_node **n = &db->bb_free_root.rb_node, *node;
+       struct rb_node *parent = NULL, *new_node;
+@@ -4648,8 +4642,8 @@ ext4_mb_free_metadata(handle_t *handle, 
+       BUG_ON(e4b->bd_bitmap_page == NULL);
+       BUG_ON(e4b->bd_buddy_page == NULL);
+-      new_node = &new_entry->node;
+-      block = new_entry->start_blk;
++      new_node = &new_entry->efd_node;
++      block = new_entry->efd_start_blk;
+       if (!*n) {
+               /* first free block exent. We need to
+@@ -4662,15 +4656,15 @@ ext4_mb_free_metadata(handle_t *handle, 
+       }
+       while (*n) {
+               parent = *n;
+-              entry = rb_entry(parent, struct ext4_free_data, node);
+-              if (block < entry->start_blk)
++              entry = rb_entry(parent, struct ext4_free_data, efd_node);
++              if (block < entry->efd_start_blk)
+                       n = &(*n)->rb_left;
+-              else if (block >= (entry->start_blk + entry->count))
++              else if (block >= (entry->efd_start_blk + entry->efd_count))
+                       n = &(*n)->rb_right;
+               else {
+                       ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+                                       "Double free of blocks %d (%d %d)",
+-                                      block, entry->start_blk, entry->count);
++                                      block, entry->efd_start_blk, entry->efd_count);
+                       return 0;
+               }
+       }
+@@ -4681,34 +4675,29 @@ ext4_mb_free_metadata(handle_t *handle, 
+       /* Now try to see the extent can be merged to left and right */
+       node = rb_prev(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(entry, new_entry)) {
+-                      new_entry->start_blk = entry->start_blk;
+-                      new_entry->count += entry->count;
++                      new_entry->efd_start_blk = entry->efd_start_blk;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       node = rb_next(new_node);
+       if (node) {
+-              entry = rb_entry(node, struct ext4_free_data, node);
++              entry = rb_entry(node, struct ext4_free_data, efd_node);
+               if (can_merge(new_entry, entry)) {
+-                      new_entry->count += entry->count;
++                      new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      spin_lock(&sbi->s_md_lock);
+-                      list_del(&entry->list);
+-                      spin_unlock(&sbi->s_md_lock);
+-                      kmem_cache_free(ext4_free_ext_cachep, entry);
++                      ext4_journal_callback_del(handle, &entry->efd_jce);
++                      kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+       /* Add the extent to transaction's private list */
+-      spin_lock(&sbi->s_md_lock);
+-      list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+-      spin_unlock(&sbi->s_md_lock);
++      ext4_journal_callback_add(handle, ext4_free_data_callback,
++                                &new_entry->efd_jce);
+       return 0;
+ }
+@@ -4836,11 +4825,11 @@ do_more:
+                * blocks being freed are metadata. these blocks shouldn't
+                * be used until this transaction is committed
+                */
+-              new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+-              new_entry->start_blk = bit;
+-              new_entry->group  = block_group;
+-              new_entry->count = count;
+-              new_entry->t_tid = handle->h_transaction->t_tid;
++              new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++              new_entry->efd_start_blk = bit;
++              new_entry->efd_group  = block_group;
++              new_entry->efd_count = count;
++              new_entry->efd_tid = handle->h_transaction->t_tid;
+               ext4_lock_group(sb, block_group);
+               mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -301,6 +301,23 @@ void ext4_journal_abort_handle(const cha
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++      struct super_block              *sb = journal->j_private;
++      struct ext4_sb_info             *sbi = EXT4_SB(sb);
++      int                             error = is_journal_aborted(journal);
++      struct ext4_journal_cb_entry    *jce, *tmp;
++
++      spin_lock(&sbi->s_md_lock);
++      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++              list_del_init(&jce->jce_list);
++              spin_unlock(&sbi->s_md_lock);
++              jce->jce_func(sb, jce, error);
++              spin_lock(&sbi->s_md_lock);
++      }
++      spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+@@ -3040,6 +3057,8 @@ static int ext4_fill_super(struct super_
+       }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
++      sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+       if (test_opt(sb, NOBH)) {
index 1e5417f..ebcd4d8 100644 (file)
@@ -34,3 +34,4 @@ ext4-failed-mount-b23368.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
+ext4-journal-callback-rhel5.patch
index c64eee3..38d2111 100644 (file)
@@ -30,3 +30,4 @@ ext4-back-dquot-to-rhel6.patch
 ext4-nocmtime-2.6-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
 ext4-nocmtime-2.6-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
+ext4-journal-callback.patch
index 3d7b761..5e11d38 100644 (file)
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
 ===================================================================
 --- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h     2009-04-15 08:35:28.000000000 +0530
 Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
 ===================================================================
 --- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h     2009-04-15 08:35:28.000000000 +0530
index f219771..e15d750 100644 (file)
@@ -1,3 +1,8 @@
+This patch is no longer needed for Lustre, since Lustre 2.1.  It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time.  See Jira issue LU-433.
+
 This allows the jbd transaction commit callbacks to be registered.
 The ext4 jbd2 code has a different commit callback (one per transaction)
 that could be used to provide equivalent functionality.  This would
 This allows the jbd transaction commit callbacks to be registered.
 The ext4 jbd2 code has a different commit callback (one per transaction)
 that could be used to provide equivalent functionality.  This would
index 271f194..84f647b 100644 (file)
@@ -33,6 +33,9 @@
  *
  */
 /*
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
 #endif
 
 #if defined(HAVE_EXT3_XATTR_H)
 #endif
 
 #if defined(HAVE_EXT3_XATTR_H)
-#include <ext3/xattr.h>
-#else
+# include <ext3/xattr.h>
+#elif !defined(EXT3_XATTR_INDEX_TRUSTED)
 /* ext3 xattr.h not available in rh style kernel-devel rpm */
 /* ext3 xattr.h not available in rh style kernel-devel rpm */
+/* CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h */
+# define EXT3_XATTR_INDEX_TRUSTED        4
 extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 #endif
 extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
 extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 #endif
@@ -133,34 +138,41 @@ extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,
                  ext3_discard_preallocations(inode)
 #endif
 
                  ext3_discard_preallocations(inode)
 #endif
 
-
-static cfs_mem_cache_t *fcb_cache;
-
-struct fsfilt_cb_data {
-        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
-        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
-        struct obd_device *cb_obd;      /* MDS/OBD completion device */
-        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
-        void *cb_data;                  /* MDS/OST completion function data */
-};
-
-#ifndef EXT3_XATTR_INDEX_TRUSTED        /* temporary until we hit l28 kernel */
-#define EXT3_XATTR_INDEX_TRUSTED        4
-#endif
-
 #ifdef HAVE_EXT4_LDISKFS
 #define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
 #ifdef HAVE_EXT4_LDISKFS
 #define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
 #else
 #define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
 #else
 #define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
 #define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
 #define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
 #define ext3_ext_store_pblock(ex, pblock)  ((ex)->ee_start = cpu_to_le32(pblock))
 #define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
 #endif
 
 #define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
 #define ext3_ext_store_pblock(ex, pblock)  ((ex)->ee_start = cpu_to_le32(pblock))
 #define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
 #endif
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+# define journal_callback ext4_journal_cb_entry
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         ext4_journal_callback_add(handle, func, jcb)
+#elif defined(HAVE_JBD2_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         jbd2_journal_callback_set(handle, func, jcb)
+#elif defined(HAVE_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+         journal_callback_set(handle, func, jcb)
+#else
+# error missing journal commit callback
+#endif /* HAVE_EXT4_JOURNAL_CALLBACK_ADD */
+
+static cfs_mem_cache_t *fcb_cache;
+
+struct fsfilt_cb_data {
+        struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
+        fsfilt_cb_t cb_func;            /* MDS/OBD completion function */
+        struct obd_device *cb_obd;      /* MDS/OBD completion device */
+        __u64 cb_last_rcvd;             /* MDS/OST last committed operation */
+        void *cb_data;                  /* MDS/OST completion function data */
+};
+
 #ifndef ext3_find_next_bit
 #define ext3_find_next_bit           ext2_find_next_bit
 #endif
 #ifndef ext3_find_next_bit
 #define ext3_find_next_bit           ext2_find_next_bit
 #endif
@@ -777,9 +789,14 @@ static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
         return rc;
 }
 
         return rc;
 }
 
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+static void fsfilt_ext3_cb_func(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
 static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
+#endif
 {
 {
-        struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+        struct fsfilt_cb_data *fcb = container_of(jcb, typeof(*fcb), cb_jcb);
 
         fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
 
         fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
 
@@ -802,8 +819,7 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
         fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
         fcb->cb_data = cb_data;
 
         CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
-        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func,
-                                    (struct journal_callback *)fcb);
+        fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func, &fcb->cb_jcb);
 
         return 0;
 }
 
         return 0;
 }
index 9a052be..29764c9 100644 (file)
@@ -33,6 +33,9 @@
  *
  */
 /*
  *
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
@@ -617,7 +620,12 @@ static int osd_param_is_sane(const struct osd_device *dev,
 /*
  * Concurrency: shouldn't matter.
  */
 /*
  * Concurrency: shouldn't matter.
  */
+#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+static void osd_trans_commit_cb(struct super_block *sb,
+                                struct journal_callback *jcb, int error)
+#else
 static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
 static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
+#endif
 {
         struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
         struct thandle     *th  = &oh->ot_super;
 {
         struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
         struct thandle     *th  = &oh->ot_super;
@@ -694,12 +702,12 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                                 /* add commit callback */
                                 lu_context_init(&th->th_ctx, LCT_TX_HANDLE);
                                 lu_context_enter(&th->th_ctx);
                                 /* add commit callback */
                                 lu_context_init(&th->th_ctx, LCT_TX_HANDLE);
                                 lu_context_enter(&th->th_ctx);
-                                osd_journal_callback_set(jh, osd_trans_commit_cb,
-                                                         (struct journal_callback *)&oh->ot_jcb);
-                                        LASSERT(oti->oti_txns == 0);
-                                        LASSERT(oti->oti_r_locks == 0);
-                                        LASSERT(oti->oti_w_locks == 0);
-                                        oti->oti_txns++;
+                                osd_journal_callback_set(jh,osd_trans_commit_cb,
+                                                         &oh->ot_jcb);
+                                LASSERT(oti->oti_txns == 0);
+                                LASSERT(oti->oti_r_locks == 0);
+                                LASSERT(oti->oti_w_locks == 0);
+                                oti->oti_txns++;
                         } else {
                                 OBD_FREE_PTR(oh);
                                 th = (void *)jh;
                         } else {
                                 OBD_FREE_PTR(oh);
                                 th = (void *)jh;
index ea559e3..343287c 100644 (file)
@@ -30,6 +30,9 @@
  * Use is subject to license terms.
  */
 /*
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  *
 #ifdef HAVE_EXT4_LDISKFS
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
 #ifdef HAVE_EXT4_LDISKFS
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
-#define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+#  define journal_callback ldiskfs_journal_cb_entry
+#  define osd_journal_callback_set(handle, func, jcb) ldiskfs_journal_callback_add(handle, func, jcb)
+# else
+#  define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# endif
 #else
 #include <linux/jbd.h>
 #include <linux/ldiskfs_fs.h>
 #else
 #include <linux/jbd.h>
 #include <linux/ldiskfs_fs.h>