+++ /dev/null
-commit 18aadd47f88464928b5ce57791c2e8f9f2aaece0 (v3.3-rc2-7-g18aadd4)
-Author: Bobi Jam <bobijam@whamcloud.com>
-Date: Mon Feb 20 17:53:02 2012 -0500
-
-ext4: expand commit callback and use it for mballoc
-
-The per-commit callback was used by mballoc code to manage free space
-bitmaps after deleted blocks have been released. This patch expands
-it to support multiple different callbacks, to allow other things to
-be done after the commit has been completed.
-
-Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
-Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
-Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-
-Index: linux-stage/fs/ext4/ext4_jbd2.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4_jbd2.h
-+++ linux-stage/fs/ext4/ext4_jbd2.h
-@@ -104,6 +104,80 @@
- #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
- #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-
-+/**
-+ * struct ext4_journal_cb_entry - Base structure for callback information.
-+ *
-+ * This struct is a 'seed' structure for a using with your own callback
-+ * structs. If you are using callbacks you must allocate one of these
-+ * or another struct of your own definition which has this struct
-+ * as it's first element and pass it to ext4_journal_callback_add().
-+ */
-+struct ext4_journal_cb_entry {
-+ /* list information for other callbacks attached to the same handle */
-+ struct list_head jce_list;
-+
-+ /* Function to call with this callback structure */
-+ void (*jce_func)(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce, int error);
-+
-+ /* user data goes here */
-+};
-+
-+/**
-+ * ext4_journal_callback_add: add a function to call after transaction commit
-+ * @handle: active journal transaction handle to register callback on
-+ * @func: callback function to call after the transaction has committed:
-+ * @sb: superblock of current filesystem for transaction
-+ * @jce: returned journal callback data
-+ * @rc: journal state at commit (0 = transaction committed properly)
-+ * @jce: journal callback data (internal and function private data struct)
-+ *
-+ * The registered function will be called in the context of the journal thread
-+ * after the transaction for which the handle was created has completed.
-+ *
-+ * No locks are held when the callback function is called, so it is safe to
-+ * call blocking functions from within the callback, but the callback should
-+ * not block or run for too long, or the filesystem will be blocked waiting for
-+ * the next transaction to commit. No journaling functions can be used, or
-+ * there is a risk of deadlock.
-+ *
-+ * There is no guaranteed calling order of multiple registered callbacks on
-+ * the same transaction.
-+ */
-+static inline void ext4_journal_callback_add(handle_t *handle,
-+ void (*func)(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce,
-+ int rc),
-+ struct ext4_journal_cb_entry *jce)
-+{
-+ struct ext4_sb_info *sbi =
-+ EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+ /* Add the jce to transaction's private list */
-+ jce->jce_func = func;
-+ spin_lock(&sbi->s_md_lock);
-+ list_add(&jce->jce_list, &handle->h_transaction->t_private_list);
-+ spin_unlock(&sbi->s_md_lock);
-+}
-+
-+/**
-+ * ext4_journal_callback_del: delete a registered callback
-+ * @handle: active journal transaction handle on which callback was registered
-+ * @jce: registered journal callback entry to unregister
-+ */
-+static inline void ext4_journal_callback_del(handle_t *handle,
-+ struct ext4_journal_cb_entry *jce)
-+{
-+ struct ext4_sb_info *sbi =
-+ EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+ spin_lock(&sbi->s_md_lock);
-+ list_del_init(&jce->jce_list);
-+ spin_unlock(&sbi->s_md_lock);
-+}
-+
-+#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
-+
- int
- ext4_mark_iloc_dirty(handle_t *handle,
- struct inode *inode,
-Index: linux-stage/fs/ext4/mballoc.h
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.h
-+++ linux-stage/fs/ext4/mballoc.h
-@@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
- */
- #define MB_DEFAULT_GROUP_PREALLOC 512
-
--
- struct ext4_free_data {
-- /* this links the free block information from group_info */
-- struct rb_node node;
-+ /* MUST be the first member */
-+ struct ext4_journal_cb_entry efd_jce;
-
-- /* this links the free block information from ext4_sb_info */
-- struct list_head list;
-+ /* ext4_free_data private data starts from here */
-+
-+ /* this links the free block information from group_info */
-+ struct rb_node efd_node;
-
- /* group which free block extent belongs */
-- ext4_group_t group;
-+ ext4_group_t efd_group;
-
- /* free block extent */
-- ext4_grpblk_t start_blk;
-- ext4_grpblk_t count;
-+ ext4_grpblk_t efd_start_blk;
-+ ext4_grpblk_t efd_count;
-
- /* transaction which freed this extent */
-- tid_t t_tid;
-+ tid_t efd_tid;
- };
-
- struct ext4_prealloc_space {
-Index: linux-stage/fs/ext4/mballoc.c
-===================================================================
---- linux-stage.orig/fs/ext4/mballoc.c
-+++ linux-stage/fs/ext4/mballoc.c
-@@ -21,6 +21,7 @@
- * mballoc.c contains the multiblocks allocation routines
- */
-
-+#include "ext4_jbd2.h"
- #include "mballoc.h"
- #include <linux/debugfs.h>
- #include <trace/events/ext4.h>
-@@ -336,12 +337,12 @@
- */
- static struct kmem_cache *ext4_pspace_cachep;
- static struct kmem_cache *ext4_ac_cachep;
--static struct kmem_cache *ext4_free_ext_cachep;
-+static struct kmem_cache *ext4_free_data_cachep;
- static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
-+static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
-
- static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
- {
-@@ -2583,8 +2584,6 @@ int ext4_mb_init(struct super_block *sb,
- }
- }
-
-- if (sbi->s_journal)
-- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
- return 0;
- }
-
-@@ -2686,58 +2685,54 @@ static inline int ext4_issue_discard(str
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
-+static void ext4_free_data_callback(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce,
-+ int rc)
- {
-- struct super_block *sb = journal->j_private;
-+ struct ext4_free_data *entry = (struct ext4_free_data *)jce;
- struct ext4_buddy e4b;
- struct ext4_group_info *db;
- int err, count = 0, count2 = 0;
-- struct ext4_free_data *entry;
-- struct list_head *l, *ltmp;
-
-- list_for_each_safe(l, ltmp, &txn->t_private_list) {
-- entry = list_entry(l, struct ext4_free_data, list);
-+ mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
-+ entry->efd_count, entry->efd_group, entry);
-
-- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
-- entry->count, entry->group, entry);
-+ if (test_opt(sb, DISCARD))
-+ ext4_issue_discard(sb, entry->efd_group,
-+ entry->efd_start_blk, entry->efd_count);
-+
-+ err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
-+ /* we expect to find existing buddy because it's pinned */
-+ BUG_ON(err != 0);
-+
-+ db = e4b.bd_info;
-+ /* there are blocks to put in buddy to make them really free */
-+ count += entry->efd_count;
-+ count2++;
-+ ext4_lock_group(sb, entry->efd_group);
-+ /* Take it out of per group rb tree */
-+ rb_erase(&entry->efd_node, &(db->bb_free_root));
-+ mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
-
-- if (test_opt(sb, DISCARD))
-- ext4_issue_discard(sb, entry->group,
-- entry->start_blk, entry->count);
--
-- err = ext4_mb_load_buddy(sb, entry->group, &e4b);
-- /* we expect to find existing buddy because it's pinned */
-- BUG_ON(err != 0);
--
-- db = e4b.bd_info;
-- /* there are blocks to put in buddy to make them really free */
-- count += entry->count;
-- count2++;
-- ext4_lock_group(sb, entry->group);
-- /* Take it out of per group rb tree */
-- rb_erase(&entry->node, &(db->bb_free_root));
-- mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
-+ /*
-+ * Clear the trimmed flag for the group so that the next
-+ * ext4_trim_fs can trim it.
-+ * If the volume is mounted with -o discard, online discard
-+ * is supported and the free blocks will be trimmed online.
-+ */
-+ if (!test_opt(sb, DISCARD))
-+ EXT4_MB_GRP_CLEAR_TRIMMED(db);
-
-- /*
-- * Clear the trimmed flag for the group so that the next
-- * ext4_trim_fs can trim it.
-- * If the volume is mounted with -o discard, online discard
-- * is supported and the free blocks will be trimmed online.
-+ if (!db->bb_free_root.rb_node) {
-+ /* No more items in the per group rb tree
-+ * balance refcounts from ext4_mb_free_metadata()
- */
-- if (!test_opt(sb, DISCARD))
-- EXT4_MB_GRP_CLEAR_TRIMMED(db);
--
-- if (!db->bb_free_root.rb_node) {
-- /* No more items in the per group rb tree
-- * balance refcounts from ext4_mb_free_metadata()
-- */
-- page_cache_release(e4b.bd_buddy_page);
-- page_cache_release(e4b.bd_bitmap_page);
-- }
-- ext4_unlock_group(sb, entry->group);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-- ext4_mb_release_desc(&e4b);
-+ page_cache_release(e4b.bd_buddy_page);
-+ page_cache_release(e4b.bd_bitmap_page);
- }
-+ ext4_unlock_group(sb, entry->efd_group);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
-+ ext4_mb_release_desc(&e4b);
-
- mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
- }
-@@ -2789,22 +2784,22 @@ int __init init_ext4_mballoc(void)
- kmem_cache_create("ext4_alloc_context",
- sizeof(struct ext4_allocation_context),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
-- if (ext4_ac_cachep == NULL) {
-- kmem_cache_destroy(ext4_pspace_cachep);
-- return -ENOMEM;
-- }
-+ if (ext4_ac_cachep == NULL)
-+ goto out_err;
-+
-+ ext4_free_data_cachep =
-+ KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
-+ if (ext4_free_data_cachep == NULL)
-+ goto out1_err;
-
-- ext4_free_ext_cachep =
-- kmem_cache_create("ext4_free_block_extents",
-- sizeof(struct ext4_free_data),
-- 0, SLAB_RECLAIM_ACCOUNT, NULL);
-- if (ext4_free_ext_cachep == NULL) {
-- kmem_cache_destroy(ext4_pspace_cachep);
-- kmem_cache_destroy(ext4_ac_cachep);
-- return -ENOMEM;
-- }
- ext4_create_debugfs_entry();
- return 0;
-+
-+out1_err:
-+ kmem_cache_destroy(ext4_ac_cachep);
-+out_err:
-+ kmem_cache_destroy(ext4_pspace_cachep);
-+ return -ENOMEM;
- }
-
- void exit_ext4_mballoc(void)
-@@ -2816,7 +2811,7 @@ void exit_ext4_mballoc(void)
- rcu_barrier();
- kmem_cache_destroy(ext4_pspace_cachep);
- kmem_cache_destroy(ext4_ac_cachep);
-- kmem_cache_destroy(ext4_free_ext_cachep);
-+ kmem_cache_destroy(ext4_free_data_cachep);
- ext4_remove_debugfs_entry();
- }
-
-@@ -3375,8 +3370,8 @@ static void ext4_mb_generate_from_freeli
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
-- entry = rb_entry(n, struct ext4_free_data, node);
-- mb_set_bits(bitmap, entry->start_blk, entry->count);
-+ entry = rb_entry(n, struct ext4_free_data, efd_node);
-+ mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
- n = rb_next(n);
- }
- return;
-@@ -4631,11 +4626,11 @@ out:
- * AND the blocks are associated with the same group.
- */
- static int can_merge(struct ext4_free_data *entry1,
-- struct ext4_free_data *entry2)
-+ struct ext4_free_data *entry2)
- {
-- if ((entry1->t_tid == entry2->t_tid) &&
-- (entry1->group == entry2->group) &&
-- ((entry1->start_blk + entry1->count) == entry2->start_blk))
-+ if ((entry1->efd_tid == entry2->efd_tid) &&
-+ (entry1->efd_group == entry2->efd_group) &&
-+ ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
- return 1;
- return 0;
- }
-@@ -4648,7 +4643,6 @@ ext4_mb_free_metadata(handle_t *handle,
- struct ext4_free_data *entry;
- struct ext4_group_info *db = e4b->bd_info;
- struct super_block *sb = e4b->bd_sb;
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct rb_node **n = &db->bb_free_root.rb_node, *node;
- struct rb_node *parent = NULL, *new_node;
-
-@@ -4656,8 +4650,8 @@ ext4_mb_free_metadata(handle_t *handle,
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
-
-- new_node = &new_entry->node;
-- block = new_entry->start_blk;
-+ new_node = &new_entry->efd_node;
-+ block = new_entry->efd_start_blk;
-
- if (!*n) {
- /* first free block exent. We need to
-@@ -4670,15 +4664,15 @@ ext4_mb_free_metadata(handle_t *handle,
- }
- while (*n) {
- parent = *n;
-- entry = rb_entry(parent, struct ext4_free_data, node);
-- if (block < entry->start_blk)
-+ entry = rb_entry(parent, struct ext4_free_data, efd_node);
-+ if (block < entry->efd_start_blk)
- n = &(*n)->rb_left;
-- else if (block >= (entry->start_blk + entry->count))
-+ else if (block >= (entry->efd_start_blk + entry->efd_count))
- n = &(*n)->rb_right;
- else {
- ext4_grp_locked_error(sb, e4b->bd_group, __func__,
- "Double free of blocks %d (%d %d)",
-- block, entry->start_blk, entry->count);
-+ block, entry->efd_start_blk, entry->efd_count);
- return 0;
- }
- }
-@@ -4689,34 +4683,29 @@ ext4_mb_free_metadata(handle_t *handle,
- /* Now try to see the extent can be merged to left and right */
- node = rb_prev(new_node);
- if (node) {
-- entry = rb_entry(node, struct ext4_free_data, node);
-+ entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(entry, new_entry)) {
-- new_entry->start_blk = entry->start_blk;
-- new_entry->count += entry->count;
-+ new_entry->efd_start_blk = entry->efd_start_blk;
-+ new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
-- spin_lock(&sbi->s_md_lock);
-- list_del(&entry->list);
-- spin_unlock(&sbi->s_md_lock);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-+ ext4_journal_callback_del(handle, &entry->efd_jce);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
-
- node = rb_next(new_node);
- if (node) {
-- entry = rb_entry(node, struct ext4_free_data, node);
-+ entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(new_entry, entry)) {
-- new_entry->count += entry->count;
-+ new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
-- spin_lock(&sbi->s_md_lock);
-- list_del(&entry->list);
-- spin_unlock(&sbi->s_md_lock);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-+ ext4_journal_callback_del(handle, &entry->efd_jce);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
- /* Add the extent to transaction's private list */
-- spin_lock(&sbi->s_md_lock);
-- list_add(&new_entry->list, &handle->h_transaction->t_private_list);
-- spin_unlock(&sbi->s_md_lock);
-+ ext4_journal_callback_add(handle, ext4_free_data_callback,
-+ &new_entry->efd_jce);
- return 0;
- }
-
-@@ -4851,7 +4840,7 @@ do_more:
- * be used until this transaction is committed
- */
- retry:
-- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
-+ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- /*
- * We use a retry loop because
-@@ -4861,10 +4850,10 @@ retry:
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
- }
-- new_entry->start_blk = bit;
-- new_entry->group = block_group;
-- new_entry->count = count;
-- new_entry->t_tid = handle->h_transaction->t_tid;
-+ new_entry->efd_start_blk = bit;
-+ new_entry->efd_group = block_group;
-+ new_entry->efd_count = count;
-+ new_entry->efd_tid = handle->h_transaction->t_tid;
-
- ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count);
-Index: linux-stage/fs/ext4/super.c
-===================================================================
---- linux-stage.orig/fs/ext4/super.c
-+++ linux-stage/fs/ext4/super.c
-@@ -336,6 +336,18 @@ void ext4_journal_abort_handle(const cha
- jbd2_journal_abort_handle(handle);
- }
-
-+static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
-+{
-+ struct super_block *sb = journal->j_private;
-+ int error = is_journal_aborted(journal);
-+ struct ext4_journal_cb_entry *jce, *tmp;
-+
-+ list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
-+ list_del_init(&jce->jce_list);
-+ jce->jce_func(sb, jce, error);
-+ }
-+}
-+
- /* Deal with the reporting of failure conditions on a filesystem such as
- * inconsistencies detected or read IO failures.
- *
-@@ -3492,6 +3504,8 @@ static int ext4_fill_super(struct super_
- ext4_count_dirs(sb));
- percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
-
-+ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-+
- no_journal:
- if (test_opt(sb, NOBH)) {
- if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {