AM_CONDITIONAL(LDISKFS_ENABLED, test x$with_ldiskfs != xno)
AM_CONDITIONAL(LDISKFS_IN_KERNEL, test x$with_ldiskfs = xinkernel)
+if test x$with_ldiskfs != xno ; then
+ LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET
+fi
+
if test x$enable_ext4 = xyes ; then
AC_DEFINE(HAVE_EXT4_LDISKFS, 1, [build ext4 based ldiskfs])
fi
AC_SUBST(LIBCFS_SUBDIR)
AC_SUBST(LIBCFS_INCLUDE_DIR)
])
+
+#
+# Check for jbd2_journal_callback_set(), which is needed for commit
+# callbacks. When LU-433 lands jbd2_journal_callback_set() will only
+# remain for legacy reasons and AC_MSG_ERROR can be removed.
+#
+# 2.6.18 with ext3 still uses journal_callback_set() for commit callbacks.
+#
+AC_DEFUN([LB_LDISKFS_JBD2_JOURNAL_CALLBACK_SET],
+[
+ LB_CHECK_SYMBOL_EXPORT([jbd2_journal_callback_set],
+ [fs/jbd2/journal.c],
+ [AC_DEFINE(HAVE_JBD2_JOURNAL_CALLBACK_SET, 1,
+ [kernel exports jbd2_journal_callback_set])],
+ [LB_CHECK_SYMBOL_EXPORT([journal_callback_set],
+ [fs/jbd/journal.c],
+ [AC_DEFINE(HAVE_JOURNAL_CALLBACK_SET, 1,
+ [kernel exports journal_callback_set])],
+ [if test x$with_ldiskfs != xno ; then
+ AC_MSG_ERROR([ldiskfs needs jbd2-jcberr patch])
+ fi])])
+])
#
# LB_DEFINE_LDISKFS_OPTIONS
AC_SUBST(ac_configure_args)
-LB_CHECK_SYMBOL_EXPORT([d_rehash_cond],
- [fs/dcache.c],
- [AC_DEFINE(HAVE_D_REHASH_COND, 1,
- [d_rehash_cond is exported by the kernel])],
- [])
-
-LB_CHECK_SYMBOL_EXPORT([__d_rehash],
- [fs/dcache.c],
- [AC_DEFINE(HAVE___D_REHASH, 1,
- [__d_rehash is exported by the kernel])],
- [])
LB_CONFIG_FILES
AC_CONFIG_FILES([ldiskfs/autoMakefile ldiskfs/Makefile])
--- /dev/null
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+
++/**
++ * struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ * This struct is a 'seed' structure for a using with your own callback
++ * structs. If you are using callbacks you must allocate one of these
++ * or another struct of your own definition which has this struct
++ * as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++ /* list information for other callbacks attached to the same handle */
++ struct list_head jce_list;
++
++ /* Function to call with this callback structure */
++ void (*jce_func)(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce, int error);
++
++ /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ * @sb: superblock of current filesystem for transaction
++ * @jce: returned journal callback data
++ * @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++ void (*func)(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce,
++ int rc),
++ struct ext4_journal_cb_entry *jce)
++{
++ struct ext4_sb_info *sbi =
++ EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++ /* Add the jce to transaction's private list */
++ jce->jce_func = func;
++ spin_lock(&sbi->s_md_lock);
++ list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++ spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++ struct ext4_journal_cb_entry *jce)
++{
++ struct ext4_sb_info *sbi =
++ EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++ spin_lock(&sbi->s_md_lock);
++ list_del_init(&jce->jce_list);
++ spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+ * mballoc.c contains the multiblocks allocation routines
+ */
+
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+
+@@ -335,14 +336,12 @@
+ */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
+-
+-
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2942,8 +2941,6 @@ int ext4_mb_init(struct super_block *sb,
+
+ ext4_mb_history_init(sb);
+
+- if (sbi->s_journal)
+- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ return 0;
+ }
+
+@@ -3032,46 +3029,42 @@ int ext4_mb_release(struct super_block *
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce,
++ int rc)
+ {
+- struct super_block *sb = journal->j_private;
++ struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+ struct ext4_buddy e4b;
+ struct ext4_group_info *db;
+ int err, count = 0, count2 = 0;
+- struct ext4_free_data *entry;
+- struct list_head *l, *ltmp;
+
+- list_for_each_safe(l, ltmp, &txn->t_private_list) {
+- entry = list_entry(l, struct ext4_free_data, list);
+-
+- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+- entry->count, entry->group, entry);
+-
+- err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+- /* we expect to find existing buddy because it's pinned */
+- BUG_ON(err != 0);
+-
+- db = e4b.bd_info;
+- /* there are blocks to put in buddy to make them really free */
+- count += entry->count;
+- count2++;
+- ext4_lock_group(sb, entry->group);
+- /* Take it out of per group rb tree */
+- rb_erase(&entry->node, &(db->bb_free_root));
+- mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+- if (!db->bb_free_root.rb_node) {
+- /* No more items in the per group rb tree
+- * balance refcounts from ext4_mb_free_metadata()
+- */
+- page_cache_release(e4b.bd_buddy_page);
+- page_cache_release(e4b.bd_bitmap_page);
+- }
+- ext4_unlock_group(sb, entry->group);
++ mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++ entry->efd_count, entry->efd_group, entry);
+
+- kmem_cache_free(ext4_free_ext_cachep, entry);
+- ext4_mb_unload_buddy(&e4b);
++ err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++ /* we expect to find existing buddy because it's pinned */
++ BUG_ON(err != 0);
++
++ db = e4b.bd_info;
++ /* there are blocks to put in buddy to make them really free */
++ count += entry->efd_count;
++ count2++;
++ ext4_lock_group(sb, entry->efd_group);
++ /* Take it out of per group rb tree */
++ rb_erase(&entry->efd_node, &(db->bb_free_root));
++ mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++ if (!db->bb_free_root.rb_node) {
++ /* No more items in the per group rb tree
++ * balance refcounts from ext4_mb_free_metadata()
++ */
++ page_cache_release(e4b.bd_buddy_page);
++ page_cache_release(e4b.bd_bitmap_page);
+ }
++ ext4_unlock_group(sb, entry->efd_group);
++
++ kmem_cache_free(ext4_free_data_cachep, entry);
++ ext4_mb_unload_buddy(&e4b);
+
+ mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -3123,22 +3116,24 @@ int __init init_ext4_mballoc(void)
+ kmem_cache_create("ext4_alloc_context",
+ sizeof(struct ext4_allocation_context),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+- if (ext4_ac_cachep == NULL) {
+- kmem_cache_destroy(ext4_pspace_cachep);
+- return -ENOMEM;
+- }
++ if (ext4_ac_cachep == NULL)
++ goto out_err;
+
+- ext4_free_ext_cachep =
+- kmem_cache_create("ext4_free_block_extents",
++ ext4_free_data_cachep =
++ kmem_cache_create("ext4_free_data",
+ sizeof(struct ext4_free_data),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
+- if (ext4_free_ext_cachep == NULL) {
+- kmem_cache_destroy(ext4_pspace_cachep);
+- kmem_cache_destroy(ext4_ac_cachep);
+- return -ENOMEM;
+- }
++ if (ext4_free_data_cachep == NULL)
++ goto out1_err;
++
+ ext4_create_debugfs_entry();
+ return 0;
++
++out1_err:
++ kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++ kmem_cache_destroy(ext4_pspace_cachep);
++ return -ENOMEM;
+ }
+
+ void exit_ext4_mballoc(void)
+@@ -3150,7 +3145,7 @@ void exit_ext4_mballoc(void)
+ rcu_barrier();
+ kmem_cache_destroy(ext4_pspace_cachep);
+ kmem_cache_destroy(ext4_ac_cachep);
+- kmem_cache_destroy(ext4_free_ext_cachep);
++ kmem_cache_destroy(ext4_free_data_cachep);
+ ext4_remove_debugfs_entry();
+ }
+
+@@ -3688,8 +3683,8 @@ static void ext4_mb_generate_from_freeli
+ n = rb_first(&(grp->bb_free_root));
+
+ while (n) {
+- entry = rb_entry(n, struct ext4_free_data, node);
+- mb_set_bits(bitmap, entry->start_blk, entry->count);
++ entry = rb_entry(n, struct ext4_free_data, efd_node);
++ mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+ n = rb_next(n);
+ }
+ return;
+@@ -4974,11 +4969,11 @@ out3:
+ * AND the blocks are associated with the same group.
+ */
+ static int can_merge(struct ext4_free_data *entry1,
+- struct ext4_free_data *entry2)
++ struct ext4_free_data *entry2)
+ {
+- if ((entry1->t_tid == entry2->t_tid) &&
+- (entry1->group == entry2->group) &&
+- ((entry1->start_blk + entry1->count) == entry2->start_blk))
++ if ((entry1->efd_tid == entry2->efd_tid) &&
++ (entry1->efd_group == entry2->efd_group) &&
++ ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+ return 1;
+ return 0;
+ }
+@@ -4991,7 +4986,6 @@ ext4_mb_free_metadata(handle_t *handle,
+ struct ext4_free_data *entry;
+ struct ext4_group_info *db = e4b->bd_info;
+ struct super_block *sb = e4b->bd_sb;
+- struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ struct rb_node *parent = NULL, *new_node;
+
+@@ -4999,8 +4993,8 @@ ext4_mb_free_metadata(handle_t *handle,
+ BUG_ON(e4b->bd_bitmap_page == NULL);
+ BUG_ON(e4b->bd_buddy_page == NULL);
+
+- new_node = &new_entry->node;
+- block = new_entry->start_blk;
++ new_node = &new_entry->efd_node;
++ block = new_entry->efd_start_blk;
+
+ if (!*n) {
+ /* first free block exent. We need to
+@@ -5013,15 +5007,15 @@ ext4_mb_free_metadata(handle_t *handle,
+ }
+ while (*n) {
+ parent = *n;
+- entry = rb_entry(parent, struct ext4_free_data, node);
+- if (block < entry->start_blk)
++ entry = rb_entry(parent, struct ext4_free_data, efd_node);
++ if (block < entry->efd_start_blk)
+ n = &(*n)->rb_left;
+- else if (block >= (entry->start_blk + entry->count))
++ else if (block >= (entry->efd_start_blk + entry->efd_count))
+ n = &(*n)->rb_right;
+ else {
+ ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+ "Double free of blocks %d (%d %d)",
+- block, entry->start_blk, entry->count);
++ block, entry->efd_start_blk, entry->efd_count);
+ return 0;
+ }
+ }
+@@ -5032,34 +5026,29 @@ ext4_mb_free_metadata(handle_t *handle,
+ /* Now try to see the extent can be merged to left and right */
+ node = rb_prev(new_node);
+ if (node) {
+- entry = rb_entry(node, struct ext4_free_data, node);
++ entry = rb_entry(node, struct ext4_free_data, efd_node);
+ if (can_merge(entry, new_entry)) {
+- new_entry->start_blk = entry->start_blk;
+- new_entry->count += entry->count;
++ new_entry->efd_start_blk = entry->efd_start_blk;
++ new_entry->efd_count += entry->efd_count;
+ rb_erase(node, &(db->bb_free_root));
+- spin_lock(&sbi->s_md_lock);
+- list_del(&entry->list);
+- spin_unlock(&sbi->s_md_lock);
+- kmem_cache_free(ext4_free_ext_cachep, entry);
++ ext4_journal_callback_del(handle, &entry->efd_jce);
++ kmem_cache_free(ext4_free_data_cachep, entry);
+ }
+ }
+
+ node = rb_next(new_node);
+ if (node) {
+- entry = rb_entry(node, struct ext4_free_data, node);
++ entry = rb_entry(node, struct ext4_free_data, efd_node);
+ if (can_merge(new_entry, entry)) {
+- new_entry->count += entry->count;
++ new_entry->efd_count += entry->efd_count;
+ rb_erase(node, &(db->bb_free_root));
+- spin_lock(&sbi->s_md_lock);
+- list_del(&entry->list);
+- spin_unlock(&sbi->s_md_lock);
+- kmem_cache_free(ext4_free_ext_cachep, entry);
++ ext4_journal_callback_del(handle, &entry->efd_jce);
++ kmem_cache_free(ext4_free_data_cachep, entry);
+ }
+ }
+ /* Add the extent to transaction's private list */
+- spin_lock(&sbi->s_md_lock);
+- list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+- spin_unlock(&sbi->s_md_lock);
++ ext4_journal_callback_add(handle, ext4_free_data_callback,
++ &new_entry->efd_jce);
+ return 0;
+ }
+
+@@ -5191,11 +5180,11 @@ do_more:
+ * blocks being freed are metadata. these blocks shouldn't
+ * be used until this transaction is committed
+ */
+- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+- new_entry->start_blk = bit;
+- new_entry->group = block_group;
+- new_entry->count = count;
+- new_entry->t_tid = handle->h_transaction->t_tid;
++ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++ new_entry->efd_start_blk = bit;
++ new_entry->efd_group = block_group;
++ new_entry->efd_count = count;
++ new_entry->efd_tid = handle->h_transaction->t_tid;
+
+ ext4_lock_group(sb, block_group);
+ mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -107,23 +107,24 @@ extern u8 mb_enable_debug;
+ */
+ #define MB_DEFAULT_GROUP_PREALLOC 512
+
+-
+ struct ext4_free_data {
+- /* this links the free block information from group_info */
+- struct rb_node node;
++ /* MUST be the first member */
++ struct ext4_journal_cb_entry efd_jce;
+
+- /* this links the free block information from ext4_sb_info */
+- struct list_head list;
++ /* ext4_free_data private data starts from here */
++
++ /* this links the free block information from group_info */
++ struct rb_node efd_node;
+
+ /* group which free block extent belongs */
+- ext4_group_t group;
++ ext4_group_t efd_group;
+
+ /* free block extent */
+- ext4_grpblk_t start_blk;
+- ext4_grpblk_t count;
++ ext4_grpblk_t efd_start_blk;
++ ext4_grpblk_t efd_count;
+
+ /* transaction which freed this extent */
+- tid_t t_tid;
++ tid_t efd_tid;
+ };
+
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -304,6 +304,23 @@ void ext4_journal_abort_handle(const cha
+
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
+
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++ struct super_block *sb = journal->j_private;
++ struct ext4_sb_info *sbi = EXT4_SB(sb);
++ int error = is_journal_aborted(journal);
++ struct ext4_journal_cb_entry *jce, *tmp;
++
++ spin_lock(&sbi->s_md_lock);
++ list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++ list_del_init(&jce->jce_list);
++ spin_unlock(&sbi->s_md_lock);
++ jce->jce_func(sb, jce, error);
++ spin_lock(&sbi->s_md_lock);
++ }
++ spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+ * inconsistencies detected or read IO failures.
+ *
+@@ -2997,6 +3014,8 @@ static int ext4_fill_super(struct super_
+ }
+ set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+
++ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+
+ if (test_opt(sb, NOBH)) {
--- /dev/null
+Index: linux-stage/fs/ext4/ext4_jbd2.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_jbd2.h
++++ linux-stage/fs/ext4/ext4_jbd2.h
+@@ -106,6 +106,80 @@
+ #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+ #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+
++/**
++ * struct ext4_journal_cb_entry - Base structure for callback information.
++ *
++ * This struct is a 'seed' structure for a using with your own callback
++ * structs. If you are using callbacks you must allocate one of these
++ * or another struct of your own definition which has this struct
++ * as it's first element and pass it to ext4_journal_callback_add().
++ */
++struct ext4_journal_cb_entry {
++ /* list information for other callbacks attached to the same handle */
++ struct list_head jce_list;
++
++ /* Function to call with this callback structure */
++ void (*jce_func)(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce, int error);
++
++ /* user data goes here */
++};
++
++/**
++ * ext4_journal_callback_add: add a function to call after transaction commit
++ * @handle: active journal transaction handle to register callback on
++ * @func: callback function to call after the transaction has committed:
++ * @sb: superblock of current filesystem for transaction
++ * @jce: returned journal callback data
++ * @rc: journal state at commit (0 = transaction committed properly)
++ * @jce: journal callback data (internal and function private data struct)
++ *
++ * The registered function will be called in the context of the journal thread
++ * after the transaction for which the handle was created has completed.
++ *
++ * No locks are held when the callback function is called, so it is safe to
++ * call blocking functions from within the callback, but the callback should
++ * not block or run for too long, or the filesystem will be blocked waiting for
++ * the next transaction to commit. No journaling functions can be used, or
++ * there is a risk of deadlock.
++ *
++ * There is no guaranteed calling order of multiple registered callbacks on
++ * the same transaction.
++ */
++static inline void ext4_journal_callback_add(handle_t *handle,
++ void (*func)(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce,
++ int rc),
++ struct ext4_journal_cb_entry *jce)
++{
++ struct ext4_sb_info *sbi =
++ EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++ /* Add the jce to transaction's private list */
++ jce->jce_func = func;
++ spin_lock(&sbi->s_md_lock);
++ list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
++ spin_unlock(&sbi->s_md_lock);
++}
++
++/**
++ * ext4_journal_callback_del: delete a registered callback
++ * @handle: active journal transaction handle on which callback was registered
++ * @jce: registered journal callback entry to unregister
++ */
++static inline void ext4_journal_callback_del(handle_t *handle,
++ struct ext4_journal_cb_entry *jce)
++{
++ struct ext4_sb_info *sbi =
++ EXT4_SB(handle->h_transaction->t_journal->j_private);
++
++ spin_lock(&sbi->s_md_lock);
++ list_del_init(&jce->jce_list);
++ spin_unlock(&sbi->s_md_lock);
++}
++
++#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
++
+ int
+ ext4_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
+ */
+ #define MB_DEFAULT_GROUP_PREALLOC 512
+
+-
+ struct ext4_free_data {
+- /* this links the free block information from group_info */
+- struct rb_node node;
++ /* MUST be the first member */
++ struct ext4_journal_cb_entry efd_jce;
+
+- /* this links the free block information from ext4_sb_info */
+- struct list_head list;
++ /* ext4_free_data private data starts from here */
++
++ /* this links the free block information from group_info */
++ struct rb_node efd_node;
+
+ /* group which free block extent belongs */
+- ext4_group_t group;
++ ext4_group_t efd_group;
+
+ /* free block extent */
+- ext4_grpblk_t start_blk;
+- ext4_grpblk_t count;
++ ext4_grpblk_t efd_start_blk;
++ ext4_grpblk_t efd_count;
+
+ /* transaction which freed this extent */
+- tid_t t_tid;
++ tid_t efd_tid;
+ };
+
+ struct ext4_prealloc_space {
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -21,6 +21,7 @@
+ * mballoc.c contains the multiblocks allocation routines
+ */
+
++#include "ext4_jbd2.h"
+ #include "mballoc.h"
+ #include <linux/debugfs.h>
+ #include <trace/events/ext4.h>
+@@ -336,12 +337,12 @@
+ */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+-static struct kmem_cache *ext4_free_ext_cachep;
++static struct kmem_cache *ext4_free_data_cachep;
+ static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
++static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
+
+ static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
+ {
+@@ -2592,8 +2593,6 @@ int ext4_mb_init(struct super_block *sb,
+ }
+ }
+
+- if (sbi->s_journal)
+- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ return 0;
+ }
+
+@@ -2693,56 +2692,52 @@ static inline int ext4_issue_discard(str
+ * This function is called by the jbd2 layer once the commit has finished,
+ * so we know we can free the blocks that were released with that commit.
+ */
+-static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
++static void ext4_free_data_callback(struct super_block *sb,
++ struct ext4_journal_cb_entry *jce,
++ int rc)
+ {
+- struct super_block *sb = journal->j_private;
++ struct ext4_free_data *entry = (struct ext4_free_data *)jce;
+ struct ext4_buddy e4b;
+ struct ext4_group_info *db;
+ int err, count = 0, count2 = 0;
+- struct ext4_free_data *entry;
+- struct list_head *l, *ltmp;
+-
+- list_for_each_safe(l, ltmp, &txn->t_private_list) {
+- entry = list_entry(l, struct ext4_free_data, list);
+
+- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
+- entry->count, entry->group, entry);
++ mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
++ entry->efd_count, entry->efd_group, entry);
+
+- if (test_opt(sb, DISCARD)) {
+- int ret;
+- ret = ext4_issue_discard(sb, entry->group,
+- entry->start_blk, entry->count);
+- if (unlikely(ret == -EOPNOTSUPP)) {
+- ext4_warning(sb, "discard not supported, "
+- "disabling");
+- clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+- }
++ if (test_opt(sb, DISCARD)) {
++ int ret;
++ ret = ext4_issue_discard(sb, entry->efd_group,
++ entry->efd_start_blk, entry->efd_count);
++ if (unlikely(ret == -EOPNOTSUPP)) {
++ ext4_warning(sb, "discard not supported, "
++ "disabling");
++ clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+ }
++ }
+
+- err = ext4_mb_load_buddy(sb, entry->group, &e4b);
+- /* we expect to find existing buddy because it's pinned */
+- BUG_ON(err != 0);
+-
+- db = e4b.bd_info;
+- /* there are blocks to put in buddy to make them really free */
+- count += entry->count;
+- count2++;
+- ext4_lock_group(sb, entry->group);
+- /* Take it out of per group rb tree */
+- rb_erase(&entry->node, &(db->bb_free_root));
+- mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+-
+- if (!db->bb_free_root.rb_node) {
+- /* No more items in the per group rb tree
+- * balance refcounts from ext4_mb_free_metadata()
+- */
+- page_cache_release(e4b.bd_buddy_page);
+- page_cache_release(e4b.bd_bitmap_page);
+- }
+- ext4_unlock_group(sb, entry->group);
+- kmem_cache_free(ext4_free_ext_cachep, entry);
+- ext4_mb_release_desc(&e4b);
++ err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
++ /* we expect to find existing buddy because it's pinned */
++ BUG_ON(err != 0);
++
++ db = e4b.bd_info;
++ /* there are blocks to put in buddy to make them really free */
++ count += entry->efd_count;
++ count2++;
++ ext4_lock_group(sb, entry->efd_group);
++ /* Take it out of per group rb tree */
++ rb_erase(&entry->efd_node, &(db->bb_free_root));
++ mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
++
++ if (!db->bb_free_root.rb_node) {
++ /* No more items in the per group rb tree
++ * balance refcounts from ext4_mb_free_metadata()
++ */
++ page_cache_release(e4b.bd_buddy_page);
++ page_cache_release(e4b.bd_bitmap_page);
+ }
++ ext4_unlock_group(sb, entry->efd_group);
++ kmem_cache_free(ext4_free_data_cachep, entry);
++ ext4_mb_release_desc(&e4b);
+
+ mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
+ }
+@@ -2794,22 +2789,22 @@ int __init init_ext4_mballoc(void)
+ kmem_cache_create("ext4_alloc_context",
+ sizeof(struct ext4_allocation_context),
+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
+- if (ext4_ac_cachep == NULL) {
+- kmem_cache_destroy(ext4_pspace_cachep);
+- return -ENOMEM;
+- }
++ if (ext4_ac_cachep == NULL)
++ goto out_err;
++
++ ext4_free_data_cachep =
++ KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
++ if (ext4_free_data_cachep == NULL)
++ goto out1_err;
+
+- ext4_free_ext_cachep =
+- kmem_cache_create("ext4_free_block_extents",
+- sizeof(struct ext4_free_data),
+- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+- if (ext4_free_ext_cachep == NULL) {
+- kmem_cache_destroy(ext4_pspace_cachep);
+- kmem_cache_destroy(ext4_ac_cachep);
+- return -ENOMEM;
+- }
+ ext4_create_debugfs_entry();
+ return 0;
++
++out1_err:
++ kmem_cache_destroy(ext4_ac_cachep);
++out_err:
++ kmem_cache_destroy(ext4_pspace_cachep);
++ return -ENOMEM;
+ }
+
+ void exit_ext4_mballoc(void)
+@@ -2821,7 +2816,7 @@ void exit_ext4_mballoc(void)
+ rcu_barrier();
+ kmem_cache_destroy(ext4_pspace_cachep);
+ kmem_cache_destroy(ext4_ac_cachep);
+- kmem_cache_destroy(ext4_free_ext_cachep);
++ kmem_cache_destroy(ext4_free_data_cachep);
+ ext4_remove_debugfs_entry();
+ }
+
+@@ -3362,8 +3357,8 @@ static void ext4_mb_generate_from_freeli
+ n = rb_first(&(grp->bb_free_root));
+
+ while (n) {
+- entry = rb_entry(n, struct ext4_free_data, node);
+- mb_set_bits(bitmap, entry->start_blk, entry->count);
++ entry = rb_entry(n, struct ext4_free_data, efd_node);
++ mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
+ n = rb_next(n);
+ }
+ return;
+@@ -4623,11 +4618,11 @@ out3:
+ * AND the blocks are associated with the same group.
+ */
+ static int can_merge(struct ext4_free_data *entry1,
+- struct ext4_free_data *entry2)
++ struct ext4_free_data *entry2)
+ {
+- if ((entry1->t_tid == entry2->t_tid) &&
+- (entry1->group == entry2->group) &&
+- ((entry1->start_blk + entry1->count) == entry2->start_blk))
++ if ((entry1->efd_tid == entry2->efd_tid) &&
++ (entry1->efd_group == entry2->efd_group) &&
++ ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
+ return 1;
+ return 0;
+ }
+@@ -4640,7 +4635,6 @@ ext4_mb_free_metadata(handle_t *handle,
+ struct ext4_free_data *entry;
+ struct ext4_group_info *db = e4b->bd_info;
+ struct super_block *sb = e4b->bd_sb;
+- struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct rb_node **n = &db->bb_free_root.rb_node, *node;
+ struct rb_node *parent = NULL, *new_node;
+
+@@ -4648,8 +4642,8 @@ ext4_mb_free_metadata(handle_t *handle,
+ BUG_ON(e4b->bd_bitmap_page == NULL);
+ BUG_ON(e4b->bd_buddy_page == NULL);
+
+- new_node = &new_entry->node;
+- block = new_entry->start_blk;
++ new_node = &new_entry->efd_node;
++ block = new_entry->efd_start_blk;
+
+ if (!*n) {
+ /* first free block exent. We need to
+@@ -4662,15 +4656,15 @@ ext4_mb_free_metadata(handle_t *handle,
+ }
+ while (*n) {
+ parent = *n;
+- entry = rb_entry(parent, struct ext4_free_data, node);
+- if (block < entry->start_blk)
++ entry = rb_entry(parent, struct ext4_free_data, efd_node);
++ if (block < entry->efd_start_blk)
+ n = &(*n)->rb_left;
+- else if (block >= (entry->start_blk + entry->count))
++ else if (block >= (entry->efd_start_blk + entry->efd_count))
+ n = &(*n)->rb_right;
+ else {
+ ext4_grp_locked_error(sb, e4b->bd_group, __func__,
+ "Double free of blocks %d (%d %d)",
+- block, entry->start_blk, entry->count);
++ block, entry->efd_start_blk, entry->efd_count);
+ return 0;
+ }
+ }
+@@ -4681,34 +4675,29 @@ ext4_mb_free_metadata(handle_t *handle,
+ /* Now try to see the extent can be merged to left and right */
+ node = rb_prev(new_node);
+ if (node) {
+- entry = rb_entry(node, struct ext4_free_data, node);
++ entry = rb_entry(node, struct ext4_free_data, efd_node);
+ if (can_merge(entry, new_entry)) {
+- new_entry->start_blk = entry->start_blk;
+- new_entry->count += entry->count;
++ new_entry->efd_start_blk = entry->efd_start_blk;
++ new_entry->efd_count += entry->efd_count;
+ rb_erase(node, &(db->bb_free_root));
+- spin_lock(&sbi->s_md_lock);
+- list_del(&entry->list);
+- spin_unlock(&sbi->s_md_lock);
+- kmem_cache_free(ext4_free_ext_cachep, entry);
++ ext4_journal_callback_del(handle, &entry->efd_jce);
++ kmem_cache_free(ext4_free_data_cachep, entry);
+ }
+ }
+
+ node = rb_next(new_node);
+ if (node) {
+- entry = rb_entry(node, struct ext4_free_data, node);
++ entry = rb_entry(node, struct ext4_free_data, efd_node);
+ if (can_merge(new_entry, entry)) {
+- new_entry->count += entry->count;
++ new_entry->efd_count += entry->efd_count;
+ rb_erase(node, &(db->bb_free_root));
+- spin_lock(&sbi->s_md_lock);
+- list_del(&entry->list);
+- spin_unlock(&sbi->s_md_lock);
+- kmem_cache_free(ext4_free_ext_cachep, entry);
++ ext4_journal_callback_del(handle, &entry->efd_jce);
++ kmem_cache_free(ext4_free_data_cachep, entry);
+ }
+ }
+ /* Add the extent to transaction's private list */
+- spin_lock(&sbi->s_md_lock);
+- list_add(&new_entry->list, &handle->h_transaction->t_private_list);
+- spin_unlock(&sbi->s_md_lock);
++ ext4_journal_callback_add(handle, ext4_free_data_callback,
++ &new_entry->efd_jce);
+ return 0;
+ }
+
+@@ -4836,11 +4825,11 @@ do_more:
+ * blocks being freed are metadata. these blocks shouldn't
+ * be used until this transaction is committed
+ */
+- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+- new_entry->start_blk = bit;
+- new_entry->group = block_group;
+- new_entry->count = count;
+- new_entry->t_tid = handle->h_transaction->t_tid;
++ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
++ new_entry->efd_start_blk = bit;
++ new_entry->efd_group = block_group;
++ new_entry->efd_count = count;
++ new_entry->efd_tid = handle->h_transaction->t_tid;
+
+ ext4_lock_group(sb, block_group);
+ mb_clear_bits(bitmap_bh->b_data, bit, count);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -301,6 +301,23 @@ void ext4_journal_abort_handle(const cha
+
+ EXPORT_SYMBOL(ext4_journal_abort_handle);
+
++static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
++{
++ struct super_block *sb = journal->j_private;
++ struct ext4_sb_info *sbi = EXT4_SB(sb);
++ int error = is_journal_aborted(journal);
++ struct ext4_journal_cb_entry *jce, *tmp;
++
++ spin_lock(&sbi->s_md_lock);
++ list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++ list_del_init(&jce->jce_list);
++ spin_unlock(&sbi->s_md_lock);
++ jce->jce_func(sb, jce, error);
++ spin_lock(&sbi->s_md_lock);
++ }
++ spin_unlock(&sbi->s_md_lock);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+ * inconsistencies detected or read IO failures.
+ *
+@@ -3040,6 +3057,8 @@ static int ext4_fill_super(struct super_
+ }
+ set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+
++ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
++
+ no_journal:
+
+ if (test_opt(sb, NOBH)) {
ext4-export-64bit-name-hash.patch
ext4-vmalloc-rhel5.patch
ext4-mballoc-group_check-rhel5.patch
+ext4-journal-callback-rhel5.patch
ext4-nocmtime-2.6-rhel5.patch
ext4-export-64bit-name-hash.patch
ext4-vmalloc-rhel6.patch
+ext4-journal-callback.patch
+This patch is no longer needed for Lustre, since Lustre 2.1. It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time. See Jira issue LU-433.
+
Index: linux-2.6.18-128.1.6/include/linux/jbd2.h
===================================================================
--- linux-2.6.18-128.1.6.orig/include/linux/jbd2.h 2009-04-15 08:35:28.000000000 +0530
+This patch is no longer needed for Lustre, since Lustre 2.1. It is kept
+in the kernel patch series for compatibility with older Lustre releases
+to simplify the upgrade process so that both the kernel and Lustre do
+not need to be upgraded at the same time. See Jira issue LU-433.
+
This allows the jbd transaction commit callbacks to be registered.
The ext4 jbd2 code has a different commit callback (one per transaction)
that could be used to provide equivalent functionality. This would
*
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
#endif
#if defined(HAVE_EXT3_XATTR_H)
-#include <ext3/xattr.h>
-#else
+# include <ext3/xattr.h>
+#elif !defined(EXT3_XATTR_INDEX_TRUSTED)
/* ext3 xattr.h not available in rh style kernel-devel rpm */
+/* CHAOS kernel-devel package will not include fs/ldiskfs/xattr.h */
+# define EXT3_XATTR_INDEX_TRUSTED 4
extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
#endif
ext3_discard_preallocations(inode)
#endif
-
-static cfs_mem_cache_t *fcb_cache;
-
-struct fsfilt_cb_data {
- struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
- fsfilt_cb_t cb_func; /* MDS/OBD completion function */
- struct obd_device *cb_obd; /* MDS/OBD completion device */
- __u64 cb_last_rcvd; /* MDS/OST last committed operation */
- void *cb_data; /* MDS/OST completion function data */
-};
-
-#ifndef EXT3_XATTR_INDEX_TRUSTED /* temporary until we hit l28 kernel */
-#define EXT3_XATTR_INDEX_TRUSTED 4
-#endif
-
#ifdef HAVE_EXT4_LDISKFS
#define fsfilt_log_start_commit(journal, tid) jbd2_log_start_commit(journal, tid)
#define fsfilt_log_wait_commit(journal, tid) jbd2_log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
#else
#define fsfilt_log_start_commit(journal, tid) log_start_commit(journal, tid)
#define fsfilt_log_wait_commit(journal, tid) log_wait_commit(journal, tid)
-#define fsfilt_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb)
#define ext_pblock(ex) le32_to_cpu((ex)->ee_start)
#define ext3_ext_store_pblock(ex, pblock) ((ex)->ee_start = cpu_to_le32(pblock))
#define ext3_inode_bitmap(sb,desc) le32_to_cpu((desc)->bg_inode_bitmap)
#endif
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+# define journal_callback ext4_journal_cb_entry
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ ext4_journal_callback_add(handle, func, jcb)
+#elif defined(HAVE_JBD2_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ jbd2_journal_callback_set(handle, func, jcb)
+#elif defined(HAVE_JOURNAL_CALLBACK_SET)
+# define fsfilt_journal_callback_set(handle, func, jcb) \
+ journal_callback_set(handle, func, jcb)
+#else
+# error missing journal commit callback
+#endif /* HAVE_EXT4_JOURNAL_CALLBACK_ADD */
+
+static cfs_mem_cache_t *fcb_cache;
+
+struct fsfilt_cb_data {
+ struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
+ fsfilt_cb_t cb_func; /* MDS/OBD completion function */
+ struct obd_device *cb_obd; /* MDS/OBD completion device */
+ __u64 cb_last_rcvd; /* MDS/OST last committed operation */
+ void *cb_data; /* MDS/OST completion function data */
+};
+
#ifndef ext3_find_next_bit
#define ext3_find_next_bit ext2_find_next_bit
#endif
return rc;
}
+#ifdef HAVE_EXT4_JOURNAL_CALLBACK_ADD
+static void fsfilt_ext3_cb_func(struct super_block *sb,
+ struct journal_callback *jcb, int error)
+#else
static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error)
+#endif
{
- struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
+ struct fsfilt_cb_data *fcb = container_of(jcb, typeof(*fcb), cb_jcb);
fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
fcb->cb_data = cb_data;
CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
- fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func,
- (struct journal_callback *)fcb);
+ fsfilt_journal_callback_set(handle, fsfilt_ext3_cb_func, &fcb->cb_jcb);
return 0;
}
*
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
/*
* Concurrency: shouldn't matter.
*/
+#ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+static void osd_trans_commit_cb(struct super_block *sb,
+ struct journal_callback *jcb, int error)
+#else
static void osd_trans_commit_cb(struct journal_callback *jcb, int error)
+#endif
{
struct osd_thandle *oh = container_of0(jcb, struct osd_thandle, ot_jcb);
struct thandle *th = &oh->ot_super;
/* add commit callback */
lu_context_init(&th->th_ctx, LCT_TX_HANDLE);
lu_context_enter(&th->th_ctx);
- osd_journal_callback_set(jh, osd_trans_commit_cb,
- (struct journal_callback *)&oh->ot_jcb);
- LASSERT(oti->oti_txns == 0);
- LASSERT(oti->oti_r_locks == 0);
- LASSERT(oti->oti_w_locks == 0);
- oti->oti_txns++;
+ osd_journal_callback_set(jh,osd_trans_commit_cb,
+ &oh->ot_jcb);
+ LASSERT(oti->oti_txns == 0);
+ LASSERT(oti->oti_r_locks == 0);
+ LASSERT(oti->oti_w_locks == 0);
+ oti->oti_txns++;
} else {
OBD_FREE_PTR(oh);
th = (void *)jh;
* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*
#ifdef HAVE_EXT4_LDISKFS
#include <ldiskfs/ldiskfs.h>
#include <ldiskfs/ldiskfs_jbd2.h>
-#define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# ifdef HAVE_LDISKFS_JOURNAL_CALLBACK_ADD
+# define journal_callback ldiskfs_journal_cb_entry
+# define osd_journal_callback_set(handle, func, jcb) ldiskfs_journal_callback_add(handle, func, jcb)
+# else
+# define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb)
+# endif
#else
#include <linux/jbd.h>
#include <linux/ldiskfs_fs.h>