+++ /dev/null
-From 18aadd47f88464928b5ce57791c2e8f9f2aaece0 Mon Sep 17 00:00:00 2001
-From: Bobi Jam <bobijam@whamcloud.com>
-Date: Mon, 20 Feb 2012 17:53:02 -0500
-Subject: ext4: expand commit callback and
-Git-commit: 18aadd47
-Patch-mainline: v3.4-rc1
-
-The per-commit callback was used by mballoc code to manage free space
-bitmaps after deleted blocks have been released. This patch expands
-it to support multiple different callbacks, to allow other things to
-be done after the commit has been completed.
-
-Upstream-Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
-Upstream-Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
-Upstream-Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-Acked-by: Jeff Mahoney <jeffm@suse.com>
----
- fs/ext4/ext4_jbd2.h | 72 ++++++++++++++++++++++++
- fs/ext4/mballoc.c | 155 ++++++++++++++++++++++++----------------------------
- fs/ext4/mballoc.h | 18 +++---
- fs/ext4/super.c | 18 ++++++
- 4 files changed, 173 insertions(+), 90 deletions(-)
-
---- a/fs/ext4/ext4_jbd2.h
-+++ b/fs/ext4/ext4_jbd2.h
-@@ -104,6 +104,78 @@
- #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
- #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
-
-+/**
-+ * struct ext4_journal_cb_entry - Base structure for callback information.
-+ *
-+ * This struct is a 'seed' structure for a using with your own callback
-+ * structs. If you are using callbacks you must allocate one of these
-+ * or another struct of your own definition which has this struct
-+ * as it's first element and pass it to ext4_journal_callback_add().
-+ */
-+struct ext4_journal_cb_entry {
-+ /* list information for other callbacks attached to the same handle */
-+ struct list_head jce_list;
-+
-+ /* Function to call with this callback structure */
-+ void (*jce_func)(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce, int error);
-+
-+ /* user data goes here */
-+};
-+
-+/**
-+ * ext4_journal_callback_add: add a function to call after transaction commit
-+ * @handle: active journal transaction handle to register callback on
-+ * @func: callback function to call after the transaction has committed:
-+ * @sb: superblock of current filesystem for transaction
-+ * @jce: returned journal callback data
-+ * @rc: journal state at commit (0 = transaction committed properly)
-+ * @jce: journal callback data (internal and function private data struct)
-+ *
-+ * The registered function will be called in the context of the journal thread
-+ * after the transaction for which the handle was created has completed.
-+ *
-+ * No locks are held when the callback function is called, so it is safe to
-+ * call blocking functions from within the callback, but the callback should
-+ * not block or run for too long, or the filesystem will be blocked waiting for
-+ * the next transaction to commit. No journaling functions can be used, or
-+ * there is a risk of deadlock.
-+ *
-+ * There is no guaranteed calling order of multiple registered callbacks on
-+ * the same transaction.
-+ */
-+static inline void ext4_journal_callback_add(handle_t *handle,
-+ void (*func)(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce,
-+ int rc),
-+ struct ext4_journal_cb_entry *jce)
-+{
-+ struct ext4_sb_info *sbi =
-+ EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+ /* Add the jce to transaction's private list */
-+ jce->jce_func = func;
-+ spin_lock(&sbi->s_md_lock);
-+ list_add(&jce->jce_list, &handle->h_transaction->t_private_list);
-+ spin_unlock(&sbi->s_md_lock);
-+}
-+
-+/**
-+ * ext4_journal_callback_del: delete a registered callback
-+ * @handle: active journal transaction handle on which callback was registered
-+ * @jce: registered journal callback entry to unregister
-+ */
-+static inline void ext4_journal_callback_del(handle_t *handle,
-+ struct ext4_journal_cb_entry *jce)
-+{
-+ struct ext4_sb_info *sbi =
-+ EXT4_SB(handle->h_transaction->t_journal->j_private);
-+
-+ spin_lock(&sbi->s_md_lock);
-+ list_del_init(&jce->jce_list);
-+ spin_unlock(&sbi->s_md_lock);
-+}
-+
- int
- ext4_mark_iloc_dirty(handle_t *handle,
- struct inode *inode,
---- a/fs/ext4/mballoc.c
-+++ b/fs/ext4/mballoc.c
-@@ -21,6 +21,7 @@
- * mballoc.c contains the multiblocks allocation routines
- */
-
-+#include "ext4_jbd2.h"
- #include "mballoc.h"
- #include <linux/debugfs.h>
- #include <linux/slab.h>
-@@ -337,7 +338,7 @@
- */
- static struct kmem_cache *ext4_pspace_cachep;
- static struct kmem_cache *ext4_ac_cachep;
--static struct kmem_cache *ext4_free_ext_cachep;
-+static struct kmem_cache *ext4_free_data_cachep;
-
- /* We create slab caches for groupinfo data structures based on the
- * superblock block size. There will be one per mounted filesystem for
-@@ -355,7 +356,8 @@ static void ext4_mb_generate_from_pa(str
- ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
-+static void ext4_free_data_callback(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce, int rc);
-
- static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
- {
-@@ -2492,8 +2494,6 @@ int ext4_mb_init(struct super_block *sb,
- proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
- &ext4_mb_seq_groups_fops, sb);
-
-- if (sbi->s_journal)
-- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
- out:
- if (ret) {
- kfree(sbi->s_mb_offsets);
-@@ -2598,58 +2598,55 @@ static inline int ext4_issue_discard(str
- * This function is called by the jbd2 layer once the commit has finished,
- * so we know we can free the blocks that were released with that commit.
- */
--static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
-+static void ext4_free_data_callback(struct super_block *sb,
-+ struct ext4_journal_cb_entry *jce,
-+ int rc)
- {
-- struct super_block *sb = journal->j_private;
-+ struct ext4_free_data *entry = (struct ext4_free_data *)jce;
- struct ext4_buddy e4b;
- struct ext4_group_info *db;
- int err, count = 0, count2 = 0;
-- struct ext4_free_data *entry;
-- struct list_head *l, *ltmp;
-
-- list_for_each_safe(l, ltmp, &txn->t_private_list) {
-- entry = list_entry(l, struct ext4_free_data, list);
-+ mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
-+ entry->efd_count, entry->efd_group, entry);
-
-- mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
-- entry->count, entry->group, entry);
-+ if (test_opt(sb, DISCARD))
-+ ext4_issue_discard(sb, entry->efd_group,
-+ entry->efd_start_blk, entry->efd_count);
-+
-+ err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
-+ /* we expect to find existing buddy because it's pinned */
-+ BUG_ON(err != 0);
-+
-+
-+ db = e4b.bd_info;
-+ /* there are blocks to put in buddy to make them really free */
-+ count += entry->efd_count;
-+ count2++;
-+ ext4_lock_group(sb, entry->efd_group);
-+ /* Take it out of per group rb tree */
-+ rb_erase(&entry->efd_node, &(db->bb_free_root));
-+ mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
-
-- if (test_opt(sb, DISCARD))
-- ext4_issue_discard(sb, entry->group,
-- entry->start_blk, entry->count);
--
-- err = ext4_mb_load_buddy(sb, entry->group, &e4b);
-- /* we expect to find existing buddy because it's pinned */
-- BUG_ON(err != 0);
--
-- db = e4b.bd_info;
-- /* there are blocks to put in buddy to make them really free */
-- count += entry->count;
-- count2++;
-- ext4_lock_group(sb, entry->group);
-- /* Take it out of per group rb tree */
-- rb_erase(&entry->node, &(db->bb_free_root));
-- mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
-+ /*
-+ * Clear the trimmed flag for the group so that the next
-+ * ext4_trim_fs can trim it.
-+ * If the volume is mounted with -o discard, online discard
-+ * is supported and the free blocks will be trimmed online.
-+ */
-+ if (!test_opt(sb, DISCARD))
-+ EXT4_MB_GRP_CLEAR_TRIMMED(db);
-
-- /*
-- * Clear the trimmed flag for the group so that the next
-- * ext4_trim_fs can trim it.
-- * If the volume is mounted with -o discard, online discard
-- * is supported and the free blocks will be trimmed online.
-+ if (!db->bb_free_root.rb_node) {
-+ /* No more items in the per group rb tree
-+ * balance refcounts from ext4_mb_free_metadata()
- */
-- if (!test_opt(sb, DISCARD))
-- EXT4_MB_GRP_CLEAR_TRIMMED(db);
--
-- if (!db->bb_free_root.rb_node) {
-- /* No more items in the per group rb tree
-- * balance refcounts from ext4_mb_free_metadata()
-- */
-- page_cache_release(e4b.bd_buddy_page);
-- page_cache_release(e4b.bd_bitmap_page);
-- }
-- ext4_unlock_group(sb, entry->group);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-- ext4_mb_unload_buddy(&e4b);
-+ page_cache_release(e4b.bd_buddy_page);
-+ page_cache_release(e4b.bd_bitmap_page);
- }
-+ ext4_unlock_group(sb, entry->efd_group);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
-+ ext4_mb_unload_buddy(&e4b);
-
- mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
- }
-@@ -2702,9 +2699,9 @@ int __init ext4_init_mballoc(void)
- return -ENOMEM;
- }
-
-- ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
-- SLAB_RECLAIM_ACCOUNT);
-- if (ext4_free_ext_cachep == NULL) {
-+ ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
-+ SLAB_RECLAIM_ACCOUNT);
-+ if (ext4_free_data_cachep == NULL) {
- kmem_cache_destroy(ext4_pspace_cachep);
- kmem_cache_destroy(ext4_ac_cachep);
- return -ENOMEM;
-@@ -2722,7 +2719,7 @@ void ext4_exit_mballoc(void)
- rcu_barrier();
- kmem_cache_destroy(ext4_pspace_cachep);
- kmem_cache_destroy(ext4_ac_cachep);
-- kmem_cache_destroy(ext4_free_ext_cachep);
-+ kmem_cache_destroy(ext4_free_data_cachep);
- ext4_groupinfo_destroy_slabs();
- ext4_remove_debugfs_entry();
- }
-@@ -3273,8 +3270,8 @@ static void ext4_mb_generate_from_freeli
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
-- entry = rb_entry(n, struct ext4_free_data, node);
-- mb_set_bits(bitmap, entry->start_blk, entry->count);
-+ entry = rb_entry(n, struct ext4_free_data, efd_node);
-+ mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
- n = rb_next(n);
- }
- return;
-@@ -4369,9 +4366,9 @@ out:
- static int can_merge(struct ext4_free_data *entry1,
- struct ext4_free_data *entry2)
- {
-- if ((entry1->t_tid == entry2->t_tid) &&
-- (entry1->group == entry2->group) &&
-- ((entry1->start_blk + entry1->count) == entry2->start_blk))
-+ if ((entry1->efd_tid == entry2->efd_tid) &&
-+ (entry1->efd_group == entry2->efd_group) &&
-+ ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
- return 1;
- return 0;
- }
-@@ -4385,7 +4382,6 @@ ext4_mb_free_metadata(handle_t *handle,
- struct ext4_free_data *entry;
- struct ext4_group_info *db = e4b->bd_info;
- struct super_block *sb = e4b->bd_sb;
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct rb_node **n = &db->bb_free_root.rb_node, *node;
- struct rb_node *parent = NULL, *new_node;
-
-@@ -4393,8 +4389,8 @@ ext4_mb_free_metadata(handle_t *handle,
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
-
-- new_node = &new_entry->node;
-- block = new_entry->start_blk;
-+ new_node = &new_entry->efd_node;
-+ block = new_entry->efd_start_blk;
-
- if (!*n) {
- /* first free block exent. We need to
-@@ -4407,10 +4403,10 @@ ext4_mb_free_metadata(handle_t *handle,
- }
- while (*n) {
- parent = *n;
-- entry = rb_entry(parent, struct ext4_free_data, node);
-- if (block < entry->start_blk)
-+ entry = rb_entry(parent, struct ext4_free_data, efd_node);
-+ if (block < entry->efd_start_blk)
- n = &(*n)->rb_left;
-- else if (block >= (entry->start_blk + entry->count))
-+ else if (block >= (entry->efd_start_blk + entry->efd_count))
- n = &(*n)->rb_right;
- else {
- ext4_grp_locked_error(sb, group, 0,
-@@ -4426,34 +4422,29 @@ ext4_mb_free_metadata(handle_t *handle,
- /* Now try to see the extent can be merged to left and right */
- node = rb_prev(new_node);
- if (node) {
-- entry = rb_entry(node, struct ext4_free_data, node);
-+ entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(entry, new_entry)) {
-- new_entry->start_blk = entry->start_blk;
-- new_entry->count += entry->count;
-+ new_entry->efd_start_blk = entry->efd_start_blk;
-+ new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
-- spin_lock(&sbi->s_md_lock);
-- list_del(&entry->list);
-- spin_unlock(&sbi->s_md_lock);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-+ ext4_journal_callback_del(handle, &entry->efd_jce);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
-
- node = rb_next(new_node);
- if (node) {
-- entry = rb_entry(node, struct ext4_free_data, node);
-+ entry = rb_entry(node, struct ext4_free_data, efd_node);
- if (can_merge(new_entry, entry)) {
-- new_entry->count += entry->count;
-+ new_entry->efd_count += entry->efd_count;
- rb_erase(node, &(db->bb_free_root));
-- spin_lock(&sbi->s_md_lock);
-- list_del(&entry->list);
-- spin_unlock(&sbi->s_md_lock);
-- kmem_cache_free(ext4_free_ext_cachep, entry);
-+ ext4_journal_callback_del(handle, &entry->efd_jce);
-+ kmem_cache_free(ext4_free_data_cachep, entry);
- }
- }
- /* Add the extent to transaction's private list */
-- spin_lock(&sbi->s_md_lock);
-- list_add(&new_entry->list, &handle->h_transaction->t_private_list);
-- spin_unlock(&sbi->s_md_lock);
-+ ext4_journal_callback_add(handle, ext4_free_data_callback,
-+ &new_entry->efd_jce);
- return 0;
- }
-
-@@ -4596,16 +4587,16 @@ do_more:
- * blocks being freed are metadata. these blocks shouldn't
- * be used until this transaction is committed
- */
-- new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
-+ new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
- if (!new_entry) {
- ext4_mb_unload_buddy(&e4b);
- err = -ENOMEM;
- goto error_return;
- }
-- new_entry->start_blk = bit;
-- new_entry->group = block_group;
-- new_entry->count = count;
-- new_entry->t_tid = handle->h_transaction->t_tid;
-+ new_entry->efd_start_blk = bit;
-+ new_entry->efd_group = block_group;
-+ new_entry->efd_count = count;
-+ new_entry->efd_tid = handle->h_transaction->t_tid;
-
- ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count);
---- a/fs/ext4/mballoc.h
-+++ b/fs/ext4/mballoc.h
-@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
-
-
- struct ext4_free_data {
-- /* this links the free block information from group_info */
-- struct rb_node node;
-+ /* MUST be the first member */
-+ struct ext4_journal_cb_entry efd_jce;
-+
-+ /* ext4_free_data private data starts from here */
-
-- /* this links the free block information from ext4_sb_info */
-- struct list_head list;
-+ /* this links the free block information from group_info */
-+ struct rb_node efd_node;
-
- /* group which free block extent belongs */
-- ext4_group_t group;
-+ ext4_group_t efd_group;
-
- /* free block extent */
-- ext4_grpblk_t start_blk;
-- ext4_grpblk_t count;
-+ ext4_grpblk_t efd_start_blk;
-+ ext4_grpblk_t efd_count;
-
- /* transaction which freed this extent */
-- tid_t t_tid;
-+ tid_t efd_tid;
- };
-
- struct ext4_prealloc_space {
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -413,6 +413,22 @@ static void save_error_info(struct super
- ext4_commit_super(sb, 1);
- }
-
-+static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
-+{
-+ struct super_block *sb = journal->j_private;
-+ struct ext4_sb_info *sbi = EXT4_SB(sb);
-+ int error = is_journal_aborted(journal);
-+ struct ext4_journal_cb_entry *jce, *tmp;
-+
-+ spin_lock(&sbi->s_md_lock);
-+ list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
-+ list_del_init(&jce->jce_list);
-+ spin_unlock(&sbi->s_md_lock);
-+ jce->jce_func(sb, jce, error);
-+ spin_lock(&sbi->s_md_lock);
-+ }
-+ spin_unlock(&sbi->s_md_lock);
-+}
-
- /* Deal with the reporting of failure conditions on a filesystem such as
- * inconsistencies detected or read IO failures.
-@@ -3600,6 +3616,8 @@ static int ext4_fill_super(struct super_
- }
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-
-+ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-+
- /*
- * The journal may have updated the bg summary counts, so we
- * need to update the global counters.