1 Index: linux-stage/fs/ext4/ext4_jbd2.h
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/ext4_jbd2.h
4 +++ linux-stage/fs/ext4/ext4_jbd2.h
6 #define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
7 #define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
10 + * struct ext4_journal_cb_entry - Base structure for callback information.
12 + * This struct is a 'seed' structure for a using with your own callback
13 + * structs. If you are using callbacks you must allocate one of these
14 + * or another struct of your own definition which has this struct
15 + * as it's first element and pass it to ext4_journal_callback_add().
17 +struct ext4_journal_cb_entry {
18 + /* list information for other callbacks attached to the same handle */
19 + struct list_head jce_list;
21 + /* Function to call with this callback structure */
22 + void (*jce_func)(struct super_block *sb,
23 + struct ext4_journal_cb_entry *jce, int error);
25 + /* user data goes here */
29 + * ext4_journal_callback_add: add a function to call after transaction commit
30 + * @handle: active journal transaction handle to register callback on
31 + * @func: callback function to call after the transaction has committed:
32 + * @sb: superblock of current filesystem for transaction
33 + * @jce: returned journal callback data
34 + * @rc: journal state at commit (0 = transaction committed properly)
35 + * @jce: journal callback data (internal and function private data struct)
37 + * The registered function will be called in the context of the journal thread
38 + * after the transaction for which the handle was created has completed.
40 + * No locks are held when the callback function is called, so it is safe to
41 + * call blocking functions from within the callback, but the callback should
42 + * not block or run for too long, or the filesystem will be blocked waiting for
43 + * the next transaction to commit. No journaling functions can be used, or
44 + * there is a risk of deadlock.
46 + * There is no guaranteed calling order of multiple registered callbacks on
47 + * the same transaction.
49 +static inline void ext4_journal_callback_add(handle_t *handle,
50 + void (*func)(struct super_block *sb,
51 + struct ext4_journal_cb_entry *jce,
53 + struct ext4_journal_cb_entry *jce)
55 + struct ext4_sb_info *sbi =
56 + EXT4_SB(handle->h_transaction->t_journal->j_private);
58 + /* Add the jce to transaction's private list */
59 + jce->jce_func = func;
60 + spin_lock(&sbi->s_md_lock);
61 + list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
62 + spin_unlock(&sbi->s_md_lock);
66 + * ext4_journal_callback_del: delete a registered callback
67 + * @handle: active journal transaction handle on which callback was registered
68 + * @jce: registered journal callback entry to unregister
70 +static inline void ext4_journal_callback_del(handle_t *handle,
71 + struct ext4_journal_cb_entry *jce)
73 + struct ext4_sb_info *sbi =
74 + EXT4_SB(handle->h_transaction->t_journal->j_private);
76 + spin_lock(&sbi->s_md_lock);
77 + list_del_init(&jce->jce_list);
78 + spin_unlock(&sbi->s_md_lock);
81 +#define HAVE_EXT4_JOURNAL_CALLBACK_ADD
84 ext4_mark_iloc_dirty(handle_t *handle,
86 Index: linux-stage/fs/ext4/mballoc.h
87 ===================================================================
88 --- linux-stage.orig/fs/ext4/mballoc.h
89 +++ linux-stage/fs/ext4/mballoc.h
90 @@ -96,23 +96,24 @@ extern u8 mb_enable_debug;
92 #define MB_DEFAULT_GROUP_PREALLOC 512
95 struct ext4_free_data {
96 - /* this links the free block information from group_info */
97 - struct rb_node node;
98 + /* MUST be the first member */
99 + struct ext4_journal_cb_entry efd_jce;
101 - /* this links the free block information from ext4_sb_info */
102 - struct list_head list;
103 + /* ext4_free_data private data starts from here */
105 + /* this links the free block information from group_info */
106 + struct rb_node efd_node;
108 /* group which free block extent belongs */
109 - ext4_group_t group;
110 + ext4_group_t efd_group;
112 /* free block extent */
113 - ext4_grpblk_t start_blk;
114 - ext4_grpblk_t count;
115 + ext4_grpblk_t efd_start_blk;
116 + ext4_grpblk_t efd_count;
118 /* transaction which freed this extent */
123 struct ext4_prealloc_space {
124 Index: linux-stage/fs/ext4/mballoc.c
125 ===================================================================
126 --- linux-stage.orig/fs/ext4/mballoc.c
127 +++ linux-stage/fs/ext4/mballoc.c
129 * mballoc.c contains the multiblocks allocation routines
132 +#include "ext4_jbd2.h"
134 #include <linux/debugfs.h>
135 #include <trace/events/ext4.h>
136 @@ -336,12 +337,12 @@
138 static struct kmem_cache *ext4_pspace_cachep;
139 static struct kmem_cache *ext4_ac_cachep;
140 -static struct kmem_cache *ext4_free_ext_cachep;
141 +static struct kmem_cache *ext4_free_data_cachep;
142 static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
144 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
146 -static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
147 +static void ext4_free_data_callback(struct super_block *sb, struct ext4_journal_cb_entry *jce, int error);
149 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
151 @@ -2592,8 +2593,6 @@ int ext4_mb_init(struct super_block *sb,
155 - if (sbi->s_journal)
156 - sbi->s_journal->j_commit_callback = release_blocks_on_commit;
160 @@ -2693,58 +2692,54 @@ static inline int ext4_issue_discard(str
161 * This function is called by the jbd2 layer once the commit has finished,
162 * so we know we can free the blocks that were released with that commit.
164 -static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
165 +static void ext4_free_data_callback(struct super_block *sb,
166 + struct ext4_journal_cb_entry *jce,
169 - struct super_block *sb = journal->j_private;
170 + struct ext4_free_data *entry = (struct ext4_free_data *)jce;
171 struct ext4_buddy e4b;
172 struct ext4_group_info *db;
173 int err, count = 0, count2 = 0;
174 - struct ext4_free_data *entry;
175 - struct list_head *l, *ltmp;
177 - list_for_each_safe(l, ltmp, &txn->t_private_list) {
178 - entry = list_entry(l, struct ext4_free_data, list);
180 - mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
181 - entry->count, entry->group, entry);
182 + mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
183 + entry->efd_count, entry->efd_group, entry);
185 - if (test_opt(sb, DISCARD))
186 - ext4_issue_discard(sb, entry->group,
187 - entry->start_blk, entry->count);
188 + if (test_opt(sb, DISCARD))
189 + ext4_issue_discard(sb, entry->efd_group,
190 + entry->efd_start_blk, entry->efd_count);
192 - err = ext4_mb_load_buddy(sb, entry->group, &e4b);
193 - /* we expect to find existing buddy because it's pinned */
197 - /* there are blocks to put in buddy to make them really free */
198 - count += entry->count;
200 - ext4_lock_group(sb, entry->group);
201 - /* Take it out of per group rb tree */
202 - rb_erase(&entry->node, &(db->bb_free_root));
203 - mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
206 - * Clear the trimmed flag for the group so that the next
207 - * ext4_trim_fs can trim it.
208 - * If the volume is mounted with -o discard, online discard
209 - * is supported and the free blocks will be trimmed online.
211 - if (!test_opt(sb, DISCARD))
212 - EXT4_MB_GRP_CLEAR_TRIMMED(db);
214 - if (!db->bb_free_root.rb_node) {
215 - /* No more items in the per group rb tree
216 - * balance refcounts from ext4_mb_free_metadata()
218 - page_cache_release(e4b.bd_buddy_page);
219 - page_cache_release(e4b.bd_bitmap_page);
221 - ext4_unlock_group(sb, entry->group);
222 - kmem_cache_free(ext4_free_ext_cachep, entry);
223 - ext4_mb_release_desc(&e4b);
224 + err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
225 + /* we expect to find existing buddy because it's pinned */
229 + /* there are blocks to put in buddy to make them really free */
230 + count += entry->efd_count;
232 + ext4_lock_group(sb, entry->efd_group);
233 + /* Take it out of per group rb tree */
234 + rb_erase(&entry->efd_node, &(db->bb_free_root));
235 + mb_free_blocks(NULL, &e4b, entry->efd_start_blk, entry->efd_count);
238 + * Clear the trimmed flag for the group so that the next
239 + * ext4_trim_fs can trim it.
240 + * If the volume is mounted with -o discard, online discard
241 + * is supported and the free blocks will be trimmed online.
243 + if (!test_opt(sb, DISCARD))
244 + EXT4_MB_GRP_CLEAR_TRIMMED(db);
246 + if (!db->bb_free_root.rb_node) {
247 + /* No more items in the per group rb tree
248 + * balance refcounts from ext4_mb_free_metadata()
250 + page_cache_release(e4b.bd_buddy_page);
251 + page_cache_release(e4b.bd_bitmap_page);
253 + ext4_unlock_group(sb, entry->efd_group);
254 + kmem_cache_free(ext4_free_data_cachep, entry);
255 + ext4_mb_release_desc(&e4b);
257 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
259 @@ -2794,22 +2789,22 @@ int __init init_ext4_mballoc(void)
260 kmem_cache_create("ext4_alloc_context",
261 sizeof(struct ext4_allocation_context),
262 0, SLAB_RECLAIM_ACCOUNT, NULL);
263 - if (ext4_ac_cachep == NULL) {
264 - kmem_cache_destroy(ext4_pspace_cachep);
267 + if (ext4_ac_cachep == NULL)
270 + ext4_free_data_cachep =
271 + KMEM_CACHE(ext4_free_data, SLAB_RECLAIM_ACCOUNT);
272 + if (ext4_free_data_cachep == NULL)
275 - ext4_free_ext_cachep =
276 - kmem_cache_create("ext4_free_block_extents",
277 - sizeof(struct ext4_free_data),
278 - 0, SLAB_RECLAIM_ACCOUNT, NULL);
279 - if (ext4_free_ext_cachep == NULL) {
280 - kmem_cache_destroy(ext4_pspace_cachep);
281 - kmem_cache_destroy(ext4_ac_cachep);
284 ext4_create_debugfs_entry();
288 + kmem_cache_destroy(ext4_ac_cachep);
290 + kmem_cache_destroy(ext4_pspace_cachep);
294 void exit_ext4_mballoc(void)
295 @@ -2821,7 +2816,7 @@ void exit_ext4_mballoc(void)
297 kmem_cache_destroy(ext4_pspace_cachep);
298 kmem_cache_destroy(ext4_ac_cachep);
299 - kmem_cache_destroy(ext4_free_ext_cachep);
300 + kmem_cache_destroy(ext4_free_data_cachep);
301 ext4_remove_debugfs_entry();
304 @@ -3362,8 +3357,8 @@ static void ext4_mb_generate_from_freeli
305 n = rb_first(&(grp->bb_free_root));
308 - entry = rb_entry(n, struct ext4_free_data, node);
309 - mb_set_bits(bitmap, entry->start_blk, entry->count);
310 + entry = rb_entry(n, struct ext4_free_data, efd_node);
311 + mb_set_bits(bitmap, entry->efd_start_blk, entry->efd_count);
315 @@ -4623,11 +4618,11 @@ out3:
316 * AND the blocks are associated with the same group.
318 static int can_merge(struct ext4_free_data *entry1,
319 - struct ext4_free_data *entry2)
320 + struct ext4_free_data *entry2)
322 - if ((entry1->t_tid == entry2->t_tid) &&
323 - (entry1->group == entry2->group) &&
324 - ((entry1->start_blk + entry1->count) == entry2->start_blk))
325 + if ((entry1->efd_tid == entry2->efd_tid) &&
326 + (entry1->efd_group == entry2->efd_group) &&
327 + ((entry1->efd_start_blk + entry1->efd_count) == entry2->efd_start_blk))
331 @@ -4640,7 +4635,6 @@ ext4_mb_free_metadata(handle_t *handle,
332 struct ext4_free_data *entry;
333 struct ext4_group_info *db = e4b->bd_info;
334 struct super_block *sb = e4b->bd_sb;
335 - struct ext4_sb_info *sbi = EXT4_SB(sb);
336 struct rb_node **n = &db->bb_free_root.rb_node, *node;
337 struct rb_node *parent = NULL, *new_node;
339 @@ -4648,8 +4642,8 @@ ext4_mb_free_metadata(handle_t *handle,
340 BUG_ON(e4b->bd_bitmap_page == NULL);
341 BUG_ON(e4b->bd_buddy_page == NULL);
343 - new_node = &new_entry->node;
344 - block = new_entry->start_blk;
345 + new_node = &new_entry->efd_node;
346 + block = new_entry->efd_start_blk;
349 /* first free block exent. We need to
350 @@ -4662,15 +4656,15 @@ ext4_mb_free_metadata(handle_t *handle,
354 - entry = rb_entry(parent, struct ext4_free_data, node);
355 - if (block < entry->start_blk)
356 + entry = rb_entry(parent, struct ext4_free_data, efd_node);
357 + if (block < entry->efd_start_blk)
359 - else if (block >= (entry->start_blk + entry->count))
360 + else if (block >= (entry->efd_start_blk + entry->efd_count))
363 ext4_grp_locked_error(sb, e4b->bd_group, __func__,
364 "Double free of blocks %d (%d %d)",
365 - block, entry->start_blk, entry->count);
366 + block, entry->efd_start_blk, entry->efd_count);
370 @@ -4681,34 +4675,29 @@ ext4_mb_free_metadata(handle_t *handle,
371 /* Now try to see the extent can be merged to left and right */
372 node = rb_prev(new_node);
374 - entry = rb_entry(node, struct ext4_free_data, node);
375 + entry = rb_entry(node, struct ext4_free_data, efd_node);
376 if (can_merge(entry, new_entry)) {
377 - new_entry->start_blk = entry->start_blk;
378 - new_entry->count += entry->count;
379 + new_entry->efd_start_blk = entry->efd_start_blk;
380 + new_entry->efd_count += entry->efd_count;
381 rb_erase(node, &(db->bb_free_root));
382 - spin_lock(&sbi->s_md_lock);
383 - list_del(&entry->list);
384 - spin_unlock(&sbi->s_md_lock);
385 - kmem_cache_free(ext4_free_ext_cachep, entry);
386 + ext4_journal_callback_del(handle, &entry->efd_jce);
387 + kmem_cache_free(ext4_free_data_cachep, entry);
391 node = rb_next(new_node);
393 - entry = rb_entry(node, struct ext4_free_data, node);
394 + entry = rb_entry(node, struct ext4_free_data, efd_node);
395 if (can_merge(new_entry, entry)) {
396 - new_entry->count += entry->count;
397 + new_entry->efd_count += entry->efd_count;
398 rb_erase(node, &(db->bb_free_root));
399 - spin_lock(&sbi->s_md_lock);
400 - list_del(&entry->list);
401 - spin_unlock(&sbi->s_md_lock);
402 - kmem_cache_free(ext4_free_ext_cachep, entry);
403 + ext4_journal_callback_del(handle, &entry->efd_jce);
404 + kmem_cache_free(ext4_free_data_cachep, entry);
407 /* Add the extent to transaction's private list */
408 - spin_lock(&sbi->s_md_lock);
409 - list_add(&new_entry->list, &handle->h_transaction->t_private_list);
410 - spin_unlock(&sbi->s_md_lock);
411 + ext4_journal_callback_add(handle, ext4_free_data_callback,
412 + &new_entry->efd_jce);
416 @@ -4836,11 +4825,11 @@ do_more:
417 * blocks being freed are metadata. these blocks shouldn't
418 * be used until this transaction is committed
420 - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
421 - new_entry->start_blk = bit;
422 - new_entry->group = block_group;
423 - new_entry->count = count;
424 - new_entry->t_tid = handle->h_transaction->t_tid;
425 + new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
426 + new_entry->efd_start_blk = bit;
427 + new_entry->efd_group = block_group;
428 + new_entry->efd_count = count;
429 + new_entry->efd_tid = handle->h_transaction->t_tid;
431 ext4_lock_group(sb, block_group);
432 mb_clear_bits(bitmap_bh->b_data, bit, count);
433 Index: linux-stage/fs/ext4/super.c
434 ===================================================================
435 --- linux-stage.orig/fs/ext4/super.c
436 +++ linux-stage/fs/ext4/super.c
437 @@ -301,6 +301,23 @@ void ext4_journal_abort_handle(const cha
439 EXPORT_SYMBOL(ext4_journal_abort_handle);
441 +static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
443 + struct super_block *sb = journal->j_private;
444 + struct ext4_sb_info *sbi = EXT4_SB(sb);
445 + int error = is_journal_aborted(journal);
446 + struct ext4_journal_cb_entry *jce, *tmp;
448 + spin_lock(&sbi->s_md_lock);
449 + list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
450 + list_del_init(&jce->jce_list);
451 + spin_unlock(&sbi->s_md_lock);
452 + jce->jce_func(sb, jce, error);
453 + spin_lock(&sbi->s_md_lock);
455 + spin_unlock(&sbi->s_md_lock);
458 /* Deal with the reporting of failure conditions on a filesystem such as
459 * inconsistencies detected or read IO failures.
461 @@ -3040,6 +3057,8 @@ static int ext4_fill_super(struct super_
463 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
465 + sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
469 if (test_opt(sb, NOBH)) {