1 Index: linux-2.6.10/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.6.10.orig/fs/ext3/mballoc.c 2005-02-25 17:28:41.836311072 +0200
4 +++ linux-2.6.10/fs/ext3/mballoc.c 2005-02-25 17:28:41.859307576 +0200
7 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 + * GNU General Public License for more details.
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
26 + * mballoc.c contains the multiblocks allocation routines
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/namei.h>
33 +#include <linux/jbd.h>
34 +#include <linux/ext3_fs.h>
35 +#include <linux/ext3_jbd.h>
36 +#include <linux/quotaops.h>
37 +#include <linux/buffer_head.h>
38 +#include <linux/module.h>
42 + * - bitmap/buddy read-ahead (proposed by Oleg Drokin aka green)
43 + * - track min/max extents in each group for better group selection
44 + * - is it worthwhile to use buddies directly if req is 2^N blocks?
45 + * - mb_mark_used() may allocate chunk right after splitting buddy
46 + * - special flag to advice allocator to look for requested + N blocks
47 + * this may improve interaction between extents and mballoc
48 + * - tree of groups sorted by number of free blocks
49 + * - percpu reservation code (hotpath)
54 + * with AGRESSIVE_CHECK allocator runs consistency checks over
55 + * structures. these checks slow things down a lot
57 +#define AGGRESSIVE_CHECK__
60 + * with MBALLOC_STATS allocator will collect stats that will be
61 + * shown at umount. The collecting costs though!
63 +#define MBALLOC_STATS
69 +#define mb_debug(fmt,a...) printk(fmt, ##a)
71 +#define mb_debug(fmt,a...)
75 + * where to save buddies structures beetween umount/mount (clean case only)
77 +#define EXT3_BUDDY_FILE ".buddy"
80 + * How long mballoc can look for a best extent (in found extents)
82 +#define EXT3_MB_MAX_TO_SCAN 100
85 + * This structure is on-disk description of a group for mballoc
87 +struct ext3_mb_group_descr {
88 + __u16 mgd_first_free; /* first free block in the group */
89 + __u16 mgd_free; /* number of free blocks in the group */
90 + __u16 mgd_counters[16]; /* number of free blocks by order */
94 + * This structure is header of mballoc's file
96 +struct ext3_mb_grp_header {
100 +#define EXT3_MB_MAGIC_V1 0xbabd16fd
103 +struct ext3_free_extent {
109 +struct ext3_allocation_context {
110 + struct super_block *ac_sb;
113 +struct ext3_free_extent ac_g_ex;
115 + /* the best found extent */
116 + struct ext3_free_extent ac_b_ex;
118 + /* number of iterations done. we have to track to limit searching */
119 + unsigned long ac_ex_scanned;
120 + __u16 ac_groups_scanned;
123 + __u8 ac_flags; /* allocation hints */
127 +#define AC_STATUS_CONTINUE 1
128 +#define AC_STATUS_FOUND 2
129 +#define AC_STATUS_BREAK 3
132 + struct buffer_head *bd_bh;
133 + struct buffer_head *bd_bh2;
134 + struct ext3_buddy_group_blocks *bd_bd;
135 + struct super_block *bd_sb;
139 +#define EXT3_MB_BITMAP(e3b) ((e3b)->bd_bh->b_data)
140 +#define EXT3_MB_BUDDY(e3b) ((e3b)->bd_bh2->b_data)
142 +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
144 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
145 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
146 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
147 +int ext3_mb_reserve_blocks(struct super_block *, int);
148 +void ext3_mb_release_blocks(struct super_block *, int);
149 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
150 +void ext3_mb_free_committed_blocks(struct super_block *);
152 +#if BITS_PER_LONG == 64
153 +#define mb_correct_addr_and_bit(bit,addr) \
155 + bit += ((unsigned long) addr & 7UL) << 3; \
156 + addr = (void *) ((unsigned long) addr & ~7UL); \
158 +#elif BITS_PER_LONG == 32
159 +#define mb_correct_addr_and_bit(bit,addr) \
161 + bit += ((unsigned long) addr & 3UL) << 3; \
162 + addr = (void *) ((unsigned long) addr & ~3UL); \
165 +#error "how many bits you are?!"
168 +static inline int mb_test_bit(int bit, void *addr)
170 + mb_correct_addr_and_bit(bit,addr);
171 + return ext2_test_bit(bit, addr);
174 +static inline void mb_set_bit(int bit, void *addr)
176 + mb_correct_addr_and_bit(bit,addr);
177 + ext2_set_bit(bit, addr);
180 +static inline void mb_set_bit_atomic(int bit, void *addr)
182 + mb_correct_addr_and_bit(bit,addr);
183 + ext2_set_bit_atomic(NULL, bit, addr);
186 +static inline void mb_clear_bit(int bit, void *addr)
188 + mb_correct_addr_and_bit(bit,addr);
189 + ext2_clear_bit(bit, addr);
192 +static inline void mb_clear_bit_atomic(int bit, void *addr)
194 + mb_correct_addr_and_bit(bit,addr);
195 + ext2_clear_bit_atomic(NULL, bit, addr);
198 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
203 + J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b));
204 + J_ASSERT(max != NULL);
206 + if (order > e3b->bd_blkbits + 1) {
211 + /* at order 0 we see each particular block */
212 + *max = 1 << (e3b->bd_blkbits + 3);
214 + return EXT3_MB_BITMAP(e3b);
216 + bb = EXT3_MB_BUDDY(e3b);
218 + while (i < order) {
219 + bb += 1 << (e3b->bd_blkbits - i);
223 + J_ASSERT((unsigned) (bb - (char *) EXT3_MB_BUDDY(e3b)) <
224 + e3b->bd_sb->s_blocksize);
228 +static int ext3_mb_load_buddy(struct super_block *sb, int group,
229 + struct ext3_buddy *e3b)
231 + struct ext3_sb_info *sbi = EXT3_SB(sb);
233 + J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
234 + J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
237 + e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
238 + if (e3b->bd_bh == NULL) {
239 + ext3_error(sb, "ext3_mb_load_buddy",
240 + "can't get block for buddy bitmap\n");
244 + e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
245 + if (e3b->bd_bh2 == NULL) {
246 + ext3_error(sb, "ext3_mb_load_buddy",
247 + "can't get block for buddy bitmap\n");
251 + if (!buffer_uptodate(e3b->bd_bh))
252 + ll_rw_block(READ, 1, &e3b->bd_bh);
253 + if (!buffer_uptodate(e3b->bd_bh2))
254 + ll_rw_block(READ, 1, &e3b->bd_bh2);
256 + wait_on_buffer(e3b->bd_bh);
257 + J_ASSERT(buffer_uptodate(e3b->bd_bh));
258 + wait_on_buffer(e3b->bd_bh2);
259 + J_ASSERT(buffer_uptodate(e3b->bd_bh2));
261 + e3b->bd_blkbits = sb->s_blocksize_bits;
262 + e3b->bd_bd = sbi->s_buddy_blocks[group];
264 + e3b->bd_group = group;
268 + brelse(e3b->bd_bh);
269 + brelse(e3b->bd_bh2);
271 + e3b->bd_bh2 = NULL;
275 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
277 + mark_buffer_dirty(e3b->bd_bh);
278 + mark_buffer_dirty(e3b->bd_bh2);
281 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
283 + brelse(e3b->bd_bh);
284 + brelse(e3b->bd_bh2);
287 +#ifdef AGGRESSIVE_CHECK
288 +static void mb_check_buddy(struct ext3_buddy *e3b)
290 + int order = e3b->bd_blkbits + 1;
291 + int max, max2, i, j, k, count;
292 + void *buddy, *buddy2;
294 + if (!test_opt(e3b->bd_sb, MBALLOC))
297 + while (order > 1) {
298 + buddy = mb_find_buddy(e3b, order, &max);
300 + buddy2 = mb_find_buddy(e3b, order - 1, &max2);
302 + J_ASSERT(buddy != buddy2);
303 + J_ASSERT(max * 2 == max2);
306 + for (i = 0; i < max; i++) {
308 + if (mb_test_bit(i, buddy)) {
309 + /* only single bit in buddy2 may be 1 */
310 + if (!mb_test_bit(i << 1, buddy2))
311 + J_ASSERT(mb_test_bit((i<<1)+1, buddy2));
312 + else if (!mb_test_bit((i << 1) + 1, buddy2))
313 + J_ASSERT(mb_test_bit(i << 1, buddy2));
317 + /* both bits in buddy2 must be 0 */
318 + J_ASSERT(mb_test_bit(i << 1, buddy2));
319 + J_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
321 + for (j = 0; j < (1 << order); j++) {
322 + k = (i * (1 << order)) + j;
323 + J_ASSERT(!mb_test_bit(k, EXT3_MB_BITMAP(e3b)));
327 + J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
331 + buddy = mb_find_buddy(e3b, 0, &max);
332 + for (i = 0; i < max; i++) {
333 + if (!mb_test_bit(i, buddy))
335 + /* check used bits only */
336 + for (j = 0; j < e3b->bd_blkbits + 1; j++) {
337 + buddy2 = mb_find_buddy(e3b, j, &max2);
339 + J_ASSERT(k < max2);
340 + J_ASSERT(mb_test_bit(k, buddy2));
345 +#define mb_check_buddy(e3b)
349 +ext3_lock_group(struct super_block *sb, int group)
351 + spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
355 +ext3_unlock_group(struct super_block *sb, int group)
357 + spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
360 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
365 + J_ASSERT(EXT3_MB_BITMAP(e3b) != EXT3_MB_BUDDY(e3b));
366 + J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
368 + bb = EXT3_MB_BUDDY(e3b);
369 + while (order <= e3b->bd_blkbits + 1) {
370 + block = block >> 1;
371 + if (!mb_test_bit(block, bb)) {
372 + /* this block is part of buddy of order 'order' */
375 + bb += 1 << (e3b->bd_blkbits - order);
381 +static inline void mb_clear_bits(void *bm, int cur, int len)
386 + while (cur < len) {
387 + if ((cur & 31) == 0 && (len - cur) >= 32) {
388 + /* fast path: clear whole word at once */
389 + addr = bm + (cur >> 3);
394 + mb_clear_bit_atomic(cur, bm);
399 +static inline void mb_set_bits(void *bm, int cur, int len)
404 + while (cur < len) {
405 + if ((cur & 31) == 0 && (len - cur) >= 32) {
406 + /* fast path: clear whole word at once */
407 + addr = bm + (cur >> 3);
408 + *addr = 0xffffffff;
412 + mb_set_bit_atomic(cur, bm);
417 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
419 + int block, max, order;
420 + void *buddy, *buddy2;
422 + mb_check_buddy(e3b);
424 + e3b->bd_bd->bb_free += count;
425 + if (first < e3b->bd_bd->bb_first_free)
426 + e3b->bd_bd->bb_first_free = first;
428 + while (count-- > 0) {
432 + J_ASSERT(mb_test_bit(block, EXT3_MB_BITMAP(e3b)));
433 + mb_clear_bit(block, EXT3_MB_BITMAP(e3b));
434 + e3b->bd_bd->bb_counters[order]++;
436 + /* start of the buddy */
437 + buddy = mb_find_buddy(e3b, order, &max);
441 + if (mb_test_bit(block, buddy) ||
442 + mb_test_bit(block + 1, buddy))
445 + /* both the buddies are free, try to coalesce them */
446 + buddy2 = mb_find_buddy(e3b, order + 1, &max);
452 + /* for special purposes, we don't set
453 + * free bits in bitmap */
454 + mb_set_bit(block, buddy);
455 + mb_set_bit(block + 1, buddy);
457 + e3b->bd_bd->bb_counters[order]--;
458 + e3b->bd_bd->bb_counters[order]--;
460 + block = block >> 1;
462 + e3b->bd_bd->bb_counters[order]++;
464 + mb_clear_bit(block, buddy2);
468 + mb_check_buddy(e3b);
473 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
474 + int needed, struct ext3_free_extent *ex)
476 + int next, max, ord;
479 + J_ASSERT(ex != NULL);
481 + buddy = mb_find_buddy(e3b, order, &max);
483 + J_ASSERT(block < max);
484 + if (mb_test_bit(block, buddy)) {
492 + /* find actual order */
493 + order = mb_find_order_for_block(e3b, block);
494 + block = block >> order;
497 + ex->fe_len = 1 << order;
498 + ex->fe_start = block << order;
499 + ex->fe_group = e3b->bd_group;
501 + while ((buddy = mb_find_buddy(e3b, order, &max))) {
503 + if (block + 1 >= max)
506 + next = (block + 1) * (1 << order);
507 + if (mb_test_bit(next, EXT3_MB_BITMAP(e3b)))
510 + ord = mb_find_order_for_block(e3b, next);
513 + block = next >> order;
514 + ex->fe_len += 1 << order;
517 + J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
521 +static int mb_mark_used(struct ext3_buddy *e3b, struct ext3_free_extent *ex)
523 + int start = ex->fe_start;
524 + int len = ex->fe_len;
525 + int ord, mlen, max, cur;
529 + e3b->bd_bd->bb_free -= len;
530 + if (e3b->bd_bd->bb_first_free == start)
531 + e3b->bd_bd->bb_first_free += len;
534 + ord = mb_find_order_for_block(e3b, start);
536 + if (((start >> ord) << ord) == start && len >= (1 << ord)) {
537 + /* the whole chunk may be allocated at once! */
539 + buddy = mb_find_buddy(e3b, ord, &max);
540 + J_ASSERT((start >> ord) < max);
541 + mb_set_bit(start >> ord, buddy);
542 + e3b->bd_bd->bb_counters[ord]--;
545 + J_ASSERT(len >= 0);
549 + /* we have to split large buddy */
551 + buddy = mb_find_buddy(e3b, ord, &max);
552 + mb_set_bit(start >> ord, buddy);
553 + e3b->bd_bd->bb_counters[ord]--;
556 + cur = (start >> ord) & ~1U;
557 + buddy = mb_find_buddy(e3b, ord, &max);
558 + mb_clear_bit(cur, buddy);
559 + mb_clear_bit(cur + 1, buddy);
560 + e3b->bd_bd->bb_counters[ord]++;
561 + e3b->bd_bd->bb_counters[ord]++;
564 + /* now drop all the bits in bitmap */
565 + mb_set_bits(EXT3_MB_BITMAP(e3b), ex->fe_start, len0);
567 + mb_check_buddy(e3b);
573 + * Must be called under group lock!
575 +static void ext3_mb_use_best_found(struct ext3_allocation_context *ac,
576 + struct ext3_buddy *e3b)
578 + ac->ac_b_ex.fe_len = min(ac->ac_b_ex.fe_len, ac->ac_g_ex.fe_len);
579 + mb_mark_used(e3b, &ac->ac_b_ex);
580 + ac->ac_status = AC_STATUS_FOUND;
584 + * The routine checks whether found extent is good enough. If it is,
585 + * then the extent gets marked used and flag is set to the context
586 + * to stop scanning. Otherwise, the extent is compared with the
587 + * previous found extent and if new one is better, then it's stored
588 + * in the context. Later, the best found extent will be used, if
589 + * mballoc can't find good enough extent.
591 + * FIXME: real allocation policy is to be designed yet!
593 +static void ext3_mb_measure_extent(struct ext3_allocation_context *ac,
594 + struct ext3_free_extent *ex,
595 + struct ext3_buddy *e3b)
597 + int factor = EXT3_SB(ac->ac_sb)->s_mb_factor;
598 + struct ext3_free_extent *bex = &ac->ac_b_ex;
599 + int diff = ac->ac_g_ex.fe_len - ex->fe_len;
601 + J_ASSERT(ex->fe_len > 0);
602 + J_ASSERT(ex->fe_len < (1 << ac->ac_sb->s_blocksize_bits) * 8);
603 + J_ASSERT(ex->fe_start < (1 << ac->ac_sb->s_blocksize_bits) * 8);
608 + * The special case - take what you catch first
610 + if (ac->ac_flags & EXT3_MB_HINT_FIRST) {
612 + ext3_mb_use_best_found(ac, e3b);
617 + * Let's check whether the chuck is good enough
619 + if (ex->fe_len >= ac->ac_g_ex.fe_len) {
621 + ext3_mb_use_best_found(ac, e3b);
626 + * If the request is vey large, then it makes sense to use large
627 + * chunks for it. Even if they don't satisfy whole request.
629 + if (ex->fe_len > 1000) {
631 + ext3_mb_use_best_found(ac, e3b);
636 + * Sometimes it's worty to take close chunk
638 + if (factor && (ac->ac_g_ex.fe_len * 100) / (diff * 100) >= factor) {
640 + ext3_mb_use_best_found(ac, e3b);
645 + * If this is first found extent, just store it in the context
647 + if (bex->fe_len == 0) {
653 + * If new found extent is better, store it in the context
654 + * FIXME: possible the policy should be more complex?
656 + if (ex->fe_len > bex->fe_len) {
661 + * We don't want to scan for a whole year
663 + if (ac->ac_found > EXT3_MB_MAX_TO_SCAN)
664 + ac->ac_status = AC_STATUS_BREAK;
667 +static int ext3_mb_try_best_found(struct ext3_allocation_context *ac,
668 + struct ext3_buddy *e3b)
670 + struct ext3_free_extent ex = ac->ac_b_ex;
671 + int group = ex.fe_group, max, err;
673 + J_ASSERT(ex.fe_len > 0);
674 + err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
678 + ext3_lock_group(ac->ac_sb, group);
679 + max = mb_find_extent(e3b, 0, ex.fe_start, ex.fe_len, &ex);
682 + ext3_mb_use_best_found(ac, e3b);
684 + ext3_unlock_group(ac->ac_sb, group);
686 + if (ac->ac_status == AC_STATUS_FOUND)
687 + ext3_mb_dirty_buddy(e3b);
688 + ext3_mb_release_desc(e3b);
693 +static int ext3_mb_find_by_goal(struct ext3_allocation_context *ac,
694 + struct ext3_buddy *e3b)
696 + int group = ac->ac_g_ex.fe_group, max, err;
697 + struct ext3_free_extent ex;
699 + err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
703 + ext3_lock_group(ac->ac_sb, group);
704 + max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
705 + ac->ac_g_ex.fe_len, &ex);
708 + J_ASSERT(ex.fe_len > 0);
709 + J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
710 + J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
712 + ext3_mb_use_best_found(ac, e3b);
714 + ext3_unlock_group(ac->ac_sb, group);
716 + if (ac->ac_status == AC_STATUS_FOUND)
717 + ext3_mb_dirty_buddy(e3b);
718 + ext3_mb_release_desc(e3b);
723 + * The routine scans the group and measures all found extents.
724 + * In order to optimize scanning, caller must pass number of
725 + * free blocks in the group, so the routine can upper limit.
727 +static void ext3_mb_scan_group(struct ext3_allocation_context *ac,
728 + struct ext3_buddy *e3b)
730 + struct super_block *sb = ac->ac_sb;
731 + void *bitmap = EXT3_MB_BITMAP(e3b);
732 + struct ext3_free_extent ex;
735 + free = e3b->bd_bd->bb_free;
736 + J_ASSERT(free > 0);
738 + i = e3b->bd_bd->bb_first_free;
740 + while (free && ac->ac_status != AC_STATUS_FOUND) {
741 + i = ext2_find_next_zero_bit(bitmap, sb->s_blocksize * 8, i);
742 + if (i >= sb->s_blocksize * 8) {
743 + J_ASSERT(free == 0);
747 + mb_find_extent(e3b, 0, i, ac->ac_g_ex.fe_len, &ex);
748 + J_ASSERT(ex.fe_len > 0);
749 + J_ASSERT(free >= ex.fe_len);
751 + ext3_mb_measure_extent(ac, &ex, e3b);
758 +static int ext3_mb_good_group(struct ext3_allocation_context *ac,
763 + J_ASSERT(cr >= 0 && cr < 3);
765 + free = EXT3_SB(ac->ac_sb)->s_buddy_blocks[group]->bb_free;
770 + if (free >= ac->ac_g_ex.fe_len >> 1)
772 + } else if (cr == 1) {
773 + if (free >= ac->ac_g_ex.fe_len >> 2)
775 + } else if (cr == 2) {
781 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
782 + unsigned long goal, int *len, int flags, int *errp)
784 + struct buffer_head *bitmap_bh = NULL;
785 + struct ext3_allocation_context ac;
786 + int i, group, block, cr, err = 0;
787 + struct ext3_group_desc *gdp;
788 + struct ext3_super_block *es;
789 + struct buffer_head *gdp_bh;
790 + struct ext3_sb_info *sbi;
791 + struct super_block *sb;
792 + struct ext3_buddy e3b;
794 + J_ASSERT(len != NULL);
795 + J_ASSERT(*len > 0);
799 + printk("ext3_mb_new_nblocks: nonexistent device");
803 + if (!test_opt(sb, MBALLOC)) {
804 + static int ext3_mballoc_warning = 0;
805 + if (ext3_mballoc_warning == 0) {
806 + printk(KERN_ERR "EXT3-fs: multiblock request with "
807 + "mballoc disabled!\n");
808 + ext3_mballoc_warning++;
811 + err = ext3_new_block_old(handle, inode, goal, errp);
815 + ext3_mb_poll_new_transaction(sb, handle);
818 + es = EXT3_SB(sb)->s_es;
821 + * We can't allocate > group size
823 + if (*len >= EXT3_BLOCKS_PER_GROUP(sb) - 10)
824 + *len = EXT3_BLOCKS_PER_GROUP(sb) - 10;
826 + if (!(flags & EXT3_MB_HINT_RESERVED)) {
827 + /* someone asks for non-reserved blocks */
829 + err = ext3_mb_reserve_blocks(sb, 1);
837 + * Check quota for allocation of this blocks.
839 + while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
847 + /* start searching from the goal */
848 + if (goal < le32_to_cpu(es->s_first_data_block) ||
849 + goal >= le32_to_cpu(es->s_blocks_count))
850 + goal = le32_to_cpu(es->s_first_data_block);
851 + group = (goal - le32_to_cpu(es->s_first_data_block)) /
852 + EXT3_BLOCKS_PER_GROUP(sb);
853 + block = ((goal - le32_to_cpu(es->s_first_data_block)) %
854 + EXT3_BLOCKS_PER_GROUP(sb));
856 + /* set up allocation goals */
857 + ac.ac_b_ex.fe_group = 0;
858 + ac.ac_b_ex.fe_start = 0;
859 + ac.ac_b_ex.fe_len = 0;
860 + ac.ac_status = AC_STATUS_CONTINUE;
861 + ac.ac_groups_scanned = 0;
862 + ac.ac_ex_scanned = 0;
864 + ac.ac_sb = inode->i_sb;
865 + ac.ac_g_ex.fe_group = group;
866 + ac.ac_g_ex.fe_start = block;
867 + ac.ac_g_ex.fe_len = *len;
868 + ac.ac_flags = flags;
871 + * Sometimes, caller may want to merge even small number
872 + * of blocks to an existing extent
874 + if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
875 + err = ext3_mb_find_by_goal(&ac, &e3b);
878 + if (ac.ac_status == AC_STATUS_FOUND)
884 + * If requested chunk is power of 2 length, we can try
885 + * to exploit buddy nature to speed allocation up
890 + * Let's just scan groups to find more-less suitable blocks
894 + for (; cr < 3 && ac.ac_status == AC_STATUS_CONTINUE; cr++) {
895 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
896 + if (group == EXT3_SB(sb)->s_groups_count)
899 + /* check is group good for our criteries */
900 + if (!ext3_mb_good_group(&ac, group, cr))
903 + err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b);
907 + ext3_lock_group(sb, group);
908 + if (!ext3_mb_good_group(&ac, group, cr)) {
909 + /* someone did allocation from this group */
910 + ext3_unlock_group(sb, group);
911 + ext3_mb_release_desc(&e3b);
915 + ext3_mb_scan_group(&ac, &e3b);
916 + ext3_unlock_group(sb, group);
918 + if (ac.ac_status == AC_STATUS_FOUND)
919 + ext3_mb_dirty_buddy(&e3b);
920 + ext3_mb_release_desc(&e3b);
924 + if (ac.ac_status != AC_STATUS_CONTINUE)
929 + if (ac.ac_status == AC_STATUS_BREAK &&
930 + !(ac.ac_flags & EXT3_MB_HINT_FIRST)) {
932 + * We've been searching too long. Let's try to allocate
933 + * the best chunk we've found so far
935 + /*ext3_warning(inode->i_sb, __FUNCTION__,
936 + "too long searching: got %d want %d\n",
937 + ac.ac_b_ex.fe_len, ac.ac_g_ex.fe_len);*/
938 + ext3_mb_try_best_found(&ac, &e3b);
939 + if (ac.ac_status != AC_STATUS_FOUND) {
941 + * Someone more lucky has already allocated it.
942 + * The only thing we can do is just take first
945 + /*printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");*/
946 + ac.ac_b_ex.fe_group = 0;
947 + ac.ac_b_ex.fe_start = 0;
948 + ac.ac_b_ex.fe_len = 0;
949 + ac.ac_status = AC_STATUS_CONTINUE;
950 + ac.ac_flags |= EXT3_MB_HINT_FIRST;
956 + if (ac.ac_status != AC_STATUS_FOUND) {
958 + * We aren't lucky definitely
960 + DQUOT_FREE_BLOCK(inode, *len);
964 + printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
965 + ac.ac_status, ac.ac_flags);
966 + printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
967 + ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
968 + ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
969 + printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
970 + sbi->s_blocks_reserved, ac.ac_found);
971 + printk("EXT3-fs: groups: ");
972 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
973 + printk("%d: %d ", i,
974 + sbi->s_buddy_blocks[i]->bb_free);
981 + J_ASSERT(ac.ac_b_ex.fe_len > 0);
983 + /* good news - free block(s) have been found. now it's time
984 + * to mark block(s) in good old journaled bitmap */
985 + block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb)
986 + + ac.ac_b_ex.fe_start
987 + + le32_to_cpu(es->s_first_data_block);
989 + /* we made a desicion, now mark found blocks in good old
990 + * bitmap to be journaled */
992 + ext3_debug("using block group %d(%d)\n",
993 + ac.ac_b_group.group, gdp->bg_free_blocks_count);
995 + bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group);
1001 + err = ext3_journal_get_write_access(handle, bitmap_bh);
1007 + gdp = ext3_get_group_desc(sb, ac.ac_b_ex.fe_group, &gdp_bh);
1013 + err = ext3_journal_get_write_access(handle, gdp_bh);
1017 + block = ac.ac_b_ex.fe_group * EXT3_BLOCKS_PER_GROUP(sb)
1018 + + ac.ac_b_ex.fe_start
1019 + + le32_to_cpu(es->s_first_data_block);
1021 + if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
1022 + block == le32_to_cpu(gdp->bg_inode_bitmap) ||
1023 + in_range(block, le32_to_cpu(gdp->bg_inode_table),
1024 + EXT3_SB(sb)->s_itb_per_group))
1025 + ext3_error(sb, "ext3_new_block",
1026 + "Allocating block in system zone - "
1027 + "block = %u", block);
1028 +#ifdef AGGRESSIVE_CHECK
1029 + for (i = 0; i < ac.ac_b_ex.fe_len; i++)
1030 + J_ASSERT(!mb_test_bit(ac.ac_b_ex.fe_start + i, bitmap_bh->b_data));
1032 + mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len);
1034 + spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
1035 + gdp->bg_free_blocks_count =
1036 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
1037 + - ac.ac_b_ex.fe_len);
1038 + spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
1039 + percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len);
1041 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1044 + err = ext3_journal_dirty_metadata(handle, gdp_bh);
1050 + brelse(bitmap_bh);
1052 + /* drop non-allocated, but dquote'd blocks */
1053 + J_ASSERT(*len >= ac.ac_b_ex.fe_len);
1054 + DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_ex.fe_len);
1056 + *len = ac.ac_b_ex.fe_len;
1057 + J_ASSERT(*len > 0);
1058 + J_ASSERT(block != 0);
1062 + /* if we've already allocated something, roll it back */
1063 + if (ac.ac_status == AC_STATUS_FOUND) {
1064 + /* FIXME: free blocks here */
1067 + DQUOT_FREE_BLOCK(inode, *len);
1068 + brelse(bitmap_bh);
1072 + if (!(flags & EXT3_MB_HINT_RESERVED)) {
1073 + /* block wasn't reserved before and we reserved it
1074 + * at the beginning of allocation. it doesn't matter
1075 + * whether we allocated anything or we failed: time
1076 + * to release reservation. NOTE: because I expect
1077 + * any multiblock request from delayed allocation
1078 + * path only, here is single block always */
1079 + ext3_mb_release_blocks(sb, 1);
1081 +#ifdef MBALLOC_STATS
1082 + if (ac.ac_g_ex.fe_len > 1) {
1083 + spin_lock(&sbi->s_bal_lock);
1084 + sbi->s_bal_reqs++;
1085 + sbi->s_bal_allocated += *len;
1086 + if (*len >= ac.ac_g_ex.fe_len)
1087 + sbi->s_bal_success++;
1088 + sbi->s_bal_ex_scanned += ac.ac_found;
1089 + if (ac.ac_g_ex.fe_start == ac.ac_b_ex.fe_start &&
1090 + ac.ac_g_ex.fe_group == ac.ac_b_ex.fe_group)
1091 + sbi->s_bal_goals++;
1092 + if (ac.ac_found > EXT3_MB_MAX_TO_SCAN)
1093 + sbi->s_bal_breaks++;
1094 + spin_unlock(&sbi->s_bal_lock);
1100 +int ext3_mb_get_descr_loc(struct ext3_buddy *e3b, struct buffer_head **bh,
1101 + struct ext3_mb_group_descr **grp)
1103 + struct super_block *sb = e3b->bd_sb;
1104 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1105 + int descr_per_block, err, offset;
1106 + struct ext3_mb_grp_header *hdr;
1107 + unsigned long block;
1109 + descr_per_block = (sb->s_blocksize - sizeof(struct ext3_mb_grp_header))
1110 + / sizeof(struct ext3_mb_group_descr);
1111 + block = e3b->bd_group / descr_per_block;
1112 + *bh = ext3_bread(NULL, sbi->s_buddy, block, 0, &err);
1113 + if (*bh == NULL) {
1114 + printk(KERN_ERR "EXT3-fs: cant getblk descr for group %d: %d\n",
1115 + e3b->bd_group, err);
1119 + hdr = (struct ext3_mb_grp_header *) (*bh)->b_data;
1120 + if (hdr->mh_magic != EXT3_MB_MAGIC_V1) {
1121 + printk(KERN_ERR "EXT3-fs: invalid magic in group %d!\n",
1128 + offset = e3b->bd_group % descr_per_block
1129 + * sizeof(struct ext3_mb_group_descr)
1130 + + sizeof(struct ext3_mb_grp_header);
1131 + *grp = (struct ext3_mb_group_descr *) ((*bh)->b_data + offset);
1136 +int ext3_mb_load_descr(struct ext3_buddy *e3b)
1138 + struct ext3_mb_group_descr *grp;
1139 + struct ext3_group_desc *gdp;
1140 + struct buffer_head *bh;
1143 + err = ext3_mb_get_descr_loc(e3b, &bh, &grp);
1147 + e3b->bd_bd->bb_first_free = grp->mgd_first_free;
1148 + e3b->bd_bd->bb_free = grp->mgd_free;
1149 + for (i = 0; i <= e3b->bd_blkbits + 1; i++) {
1151 + e3b->bd_bd->bb_counters[i] = grp->mgd_counters[i];
1155 + /* additional checks against old group descriptor */
1156 + gdp = ext3_get_group_desc(e3b->bd_sb, e3b->bd_group, NULL);
1159 + if (e3b->bd_bd->bb_free != le16_to_cpu(gdp->bg_free_blocks_count)) {
1160 + printk(KERN_ERR "EXT3-fs: mbgroup %d corrupted (%d != %d)\n",
1161 + e3b->bd_group, e3b->bd_bd->bb_free,
1162 + le16_to_cpu(gdp->bg_free_blocks_count));
1170 +int ext3_mb_update_descr(struct ext3_buddy *e3b)
1172 + struct ext3_mb_group_descr *grp;
1173 + struct ext3_group_desc *gdp;
1174 + struct buffer_head *bh;
1178 + /* additional checks against old group descriptor */
1179 + gdp = ext3_get_group_desc(e3b->bd_sb, e3b->bd_group, NULL);
1182 + if (e3b->bd_bd->bb_free != le16_to_cpu(gdp->bg_free_blocks_count)) {
1183 + printk(KERN_ERR "EXT3-fs: mbgroup %d corrupted (%d != %d)\n",
1184 + e3b->bd_group, e3b->bd_bd->bb_free,
1185 + le16_to_cpu(gdp->bg_free_blocks_count));
1189 + err = ext3_mb_get_descr_loc(e3b, &bh, &grp);
1193 + handle = ext3_journal_start_sb(e3b->bd_sb, 1);
1194 + if (IS_ERR(handle)) {
1195 + err = PTR_ERR(handle);
1200 + err = ext3_journal_get_write_access(handle, bh);
1203 + grp->mgd_first_free = e3b->bd_bd->bb_first_free;
1204 + grp->mgd_free = e3b->bd_bd->bb_free;
1205 + for (i = 0; i <= e3b->bd_blkbits + 1; i++) {
1207 + grp->mgd_counters[i] = e3b->bd_bd->bb_counters[i];
1209 + err = ext3_journal_dirty_metadata(handle, bh);
1216 + ext3_journal_stop(handle);
1220 +int ext3_mb_generate_buddy(struct ext3_buddy *e3b)
1222 + struct super_block *sb = e3b->bd_sb;
1223 + struct buffer_head *bh;
1226 + mb_debug("generate buddy for group %d\n", e3b->bd_group);
1227 + memset(e3b->bd_bh->b_data, 0xff, sb->s_blocksize);
1228 + memset(e3b->bd_bh2->b_data, 0xff, sb->s_blocksize);
1230 + bh = read_block_bitmap(sb, e3b->bd_group);
1234 + /* mb_free_blocks will set real free */
1235 + e3b->bd_bd->bb_free = 0;
1236 + e3b->bd_bd->bb_first_free = 1 << 15;
1238 + * if change bb_counters size, don't forget about
1239 + * ext3_mb_init_backend() -bzzz
1241 + memset(e3b->bd_bd->bb_counters, 0,
1242 + sizeof(unsigned) * (sb->s_blocksize_bits + 2));
1244 + /* loop over the blocks, and create buddies for free ones */
1245 + for (i = 0; i < sb->s_blocksize * 8; i++) {
1246 + if (!mb_test_bit(i, (void *) bh->b_data)) {
1247 + mb_free_blocks(e3b, i, 1);
1252 + mb_check_buddy(e3b);
1253 + ext3_mb_dirty_buddy(e3b);
1258 +EXPORT_SYMBOL(ext3_mb_new_blocks);
1260 +#define MB_CREDITS \
1261 + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
1262 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
1264 +int ext3_mb_init_backend(struct super_block *sb, int *created)
1266 + int err, i, len, descr_per_block, buddy_offset, size;
1267 + struct inode *root = sb->s_root->d_inode;
1268 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1269 + struct ext3_mb_grp_header *hdr;
1270 + struct buffer_head *bh = NULL;
1271 + unsigned long block;
1272 + struct dentry *db;
1277 + len = sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count;
1278 + sbi->s_buddy_blocks = kmalloc(len, GFP_KERNEL);
1279 + if (sbi->s_buddy_blocks == NULL) {
1280 + printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
1283 + memset(sbi->s_buddy_blocks, 0, len);
1284 + sbi->s_buddy = NULL;
1286 + down(&root->i_sem);
1287 + len = strlen(EXT3_BUDDY_FILE);
1288 + db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root, len);
1290 + err = PTR_ERR(db);
1291 + printk(KERN_ERR "EXT3-fs: cant lookup buddy: %d\n", err);
1296 + if (db->d_inode == NULL) {
1297 + err = ext3_create(root, db, S_IFREG, NULL);
1299 + printk(KERN_ERR "EXT3-fs: cant create buddy: %d\n", err);
1303 + db->d_inode->i_flags |= S_IMMUTABLE | S_NOATIME;
1305 + mb_debug("no buddy file, regenerate\n");
1308 + sbi->s_buddy = igrab(db->d_inode);
1310 + /* calculate needed size */
1311 + descr_per_block = (sb->s_blocksize - sizeof(struct ext3_mb_grp_header))
1312 + / sizeof(struct ext3_mb_group_descr);
1313 + buddy_offset = (sbi->s_groups_count + descr_per_block - 1)
1314 + / descr_per_block;
1315 + len = sbi->s_groups_count * sb->s_blocksize * 2 +
1316 + buddy_offset * sb->s_blocksize;
1317 + if (len != i_size_read(sbi->s_buddy)) {
1318 + if (*created == 0)
1319 + printk("EXT3-fs: wrong i_size (%u != %u), regenerate\n",
1321 + (unsigned) i_size_read(sbi->s_buddy));
1325 + /* read/create mb group descriptors */
1326 + for (i = 0; i < buddy_offset; i++) {
1327 + handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1328 + if (IS_ERR(handle)) {
1329 + printk(KERN_ERR "EXT3-fs: cant start transaction\n");
1330 + err = PTR_ERR(handle);
1334 + bh = ext3_bread(handle, sbi->s_buddy, i, 1, &err);
1336 + printk(KERN_ERR "EXT3-fs: cant getblk grp: %d\n", err);
1339 + hdr = (struct ext3_mb_grp_header *) bh->b_data;
1340 + if (hdr->mh_magic != EXT3_MB_MAGIC_V1) {
1341 + err = ext3_journal_get_write_access(handle, bh);
1344 + if (*created == 0)
1346 + "EXT3-fs: invalid header 0x%x in %d,"
1347 + "regenerate\n", hdr->mh_magic, i);
1349 + hdr->mh_magic = EXT3_MB_MAGIC_V1;
1350 + err = ext3_journal_dirty_metadata(handle, bh);
1355 + ext3_journal_stop(handle);
1359 + * if change bb_counters size, don't forget about ext3_mb_generate_buddy()
1361 + len = sizeof(struct ext3_buddy_group_blocks);
1362 + len += sizeof(unsigned) * (sb->s_blocksize_bits + 2);
1363 + for (i = 0; i < sbi->s_groups_count; i++) {
1365 + sbi->s_buddy_blocks[i] = kmalloc(len, GFP_KERNEL);
1366 + if (sbi->s_buddy_blocks[i] == NULL) {
1367 + printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
1371 + memset(sbi->s_buddy_blocks[i], 0, len);
1373 + handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1374 + if (IS_ERR(handle)) {
1375 + printk(KERN_ERR "EXT3-fs: cant start transaction\n");
1376 + err = PTR_ERR(handle);
1380 + /* allocate block for bitmap */
1381 + block = buddy_offset + i * 2;
1382 + bh = ext3_getblk(handle, sbi->s_buddy, block, 1, &err);
1384 + printk(KERN_ERR "EXT3-fs: cant getblk bitmap: %d\n", err);
1387 + sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
1390 + /* allocate block for buddy */
1391 + block = buddy_offset + i * 2 + 1;
1392 + bh = ext3_getblk(handle, sbi->s_buddy, block, 1, &err);
1394 + printk(KERN_ERR "EXT3-fs: cant getblk for buddy: %d\n", err);
1397 + sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
1400 + size = (block + 1) << sbi->s_buddy->i_blkbits;
1401 + if (size > sbi->s_buddy->i_size) {
1403 + EXT3_I(sbi->s_buddy)->i_disksize = size;
1404 + i_size_write(sbi->s_buddy, size);
1405 + mark_inode_dirty(sbi->s_buddy);
1407 + ext3_journal_stop(handle);
1409 + spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
1410 + sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
1411 + sbi->s_buddy_blocks[i]->bb_tid = 0;
1414 + if (journal_start_commit(sbi->s_journal, &target))
1415 + log_wait_commit(sbi->s_journal, target);
1426 +int ext3_mb_write_descriptors(struct super_block *sb)
1428 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1429 + struct ext3_buddy e3b;
1430 + int ret = 0, i, err;
1432 + for (i = 0; i < sbi->s_groups_count; i++) {
1433 + if (sbi->s_buddy_blocks[i] == NULL)
1436 + err = ext3_mb_load_buddy(sb, i, &e3b);
1438 + ext3_mb_update_descr(&e3b);
1439 + ext3_mb_release_desc(&e3b);
1446 +int ext3_mb_release(struct super_block *sb)
1448 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1451 + if (!test_opt(sb, MBALLOC))
1454 + /* release freed, non-committed blocks */
1455 + spin_lock(&sbi->s_md_lock);
1456 + list_splice_init(&sbi->s_closed_transaction,
1457 + &sbi->s_committed_transaction);
1458 + list_splice_init(&sbi->s_active_transaction,
1459 + &sbi->s_committed_transaction);
1460 + spin_unlock(&sbi->s_md_lock);
1461 + ext3_mb_free_committed_blocks(sb);
1463 + if (sbi->s_buddy_blocks) {
1464 + ext3_mb_write_descriptors(sb);
1465 + for (i = 0; i < sbi->s_groups_count; i++) {
1466 + if (sbi->s_buddy_blocks[i] == NULL)
1468 + kfree(sbi->s_buddy_blocks[i]);
1470 + kfree(sbi->s_buddy_blocks);
1473 + iput(sbi->s_buddy);
1474 + if (sbi->s_blocks_reserved)
1475 + printk("ext3-fs: %ld blocks being reserved at umount!\n",
1476 + sbi->s_blocks_reserved);
1477 +#ifdef MBALLOC_STATS
1478 + printk("EXT3-fs: mballoc: %lu blocks %lu reqs (%lu success)\n",
1479 + sbi->s_bal_allocated, sbi->s_bal_reqs, sbi->s_bal_success);
1480 + printk("EXT3-fs: mballoc: %lu extents scanned, %lu goal hits, %lu breaks\n",
1481 + sbi->s_bal_ex_scanned, sbi->s_bal_goals, sbi->s_bal_breaks);
1486 +int ext3_mb_init(struct super_block *sb, int needs_recovery)
1488 + struct ext3_buddy e3b;
1489 + int i, err, created;
1491 + if (!test_opt(sb, MBALLOC))
1494 + /* init file for buddy data */
1495 + clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1496 + if ((err = ext3_mb_init_backend(sb, &created)))
1500 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
1501 + err = ext3_mb_load_buddy(sb, i, &e3b);
1503 + /* FIXME: release backend */
1506 + if (created || needs_recovery)
1507 + ext3_mb_generate_buddy(&e3b);
1509 + err = ext3_mb_load_descr(&e3b);
1510 + ext3_mb_release_desc(&e3b);
1511 + if (err == -ENODATA) {
1516 + if (created || needs_recovery)
1517 + printk(KERN_ERR "EXT3-fs: generated buddies for %lu groups\n",
1518 + EXT3_SB(sb)->s_groups_count);
1519 + spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1520 + spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1521 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1522 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1523 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1524 + set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1526 +#ifdef MBALLOC_STATS
1527 + spin_lock_init(&EXT3_SB(sb)->s_bal_lock);
1528 +#define MBALLOC_INFO " (stats)"
1530 +#define MBALLOC_INFO ""
1532 + printk("EXT3-fs: mballoc enabled%s\n", MBALLOC_INFO);
1536 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1538 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1539 + int err, i, count = 0, count2 = 0;
1540 + struct ext3_free_metadata *md;
1541 + struct ext3_buddy e3b;
1543 + if (list_empty(&sbi->s_committed_transaction))
1546 + /* there is committed blocks to be freed yet */
1548 + /* get next array of blocks */
1550 + spin_lock(&sbi->s_md_lock);
1551 + if (!list_empty(&sbi->s_committed_transaction)) {
1552 + md = list_entry(sbi->s_committed_transaction.next,
1553 + struct ext3_free_metadata, list);
1554 + list_del(&md->list);
1556 + spin_unlock(&sbi->s_md_lock);
1561 + mb_debug("gonna free %u blocks in group %u (0x%p):",
1562 + md->num, md->group, md);
1564 + err = ext3_mb_load_buddy(sb, md->group, &e3b);
1567 + /* there are blocks to put in buddy to make them really free */
1570 + ext3_lock_group(sb, md->group);
1571 + for (i = 0; i < md->num; i++) {
1572 + mb_debug(" %u", md->blocks[i]);
1573 + mb_free_blocks(&e3b, md->blocks[i], 1);
1576 + ext3_unlock_group(sb, md->group);
1579 + ext3_mb_dirty_buddy(&e3b);
1580 + ext3_mb_release_desc(&e3b);
1583 + mb_debug("freed %u blocks in %u structures\n", count, count2);
1586 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1588 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1590 + if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1593 + /* new transaction! time to close last one and free blocks for
1594 + * committed transaction. we know that only transaction can be
1595 + * active, so previos transaction can be being logged and we
1596 + * know that transaction before previous is known to be alreade
1597 + * logged. this means that now we may free blocks freed in all
1598 + * transactions before previous one. hope I'm clear enough ... */
1600 + spin_lock(&sbi->s_md_lock);
1601 + if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1602 + mb_debug("new transaction %lu, old %lu\n",
1603 + (unsigned long) handle->h_transaction->t_tid,
1604 + (unsigned long) sbi->s_last_transaction);
1605 + list_splice_init(&sbi->s_closed_transaction,
1606 + &sbi->s_committed_transaction);
1607 + list_splice_init(&sbi->s_active_transaction,
1608 + &sbi->s_closed_transaction);
1609 + sbi->s_last_transaction = handle->h_transaction->t_tid;
1611 + spin_unlock(&sbi->s_md_lock);
1613 + ext3_mb_free_committed_blocks(sb);
1616 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1617 + int group, int block, int count)
1619 + struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1620 + struct super_block *sb = e3b->bd_sb;
1621 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1622 + struct ext3_free_metadata *md;
1625 + ext3_lock_group(sb, group);
1626 + for (i = 0; i < count; i++) {
1627 + md = db->bb_md_cur;
1628 + if (md && db->bb_tid != handle->h_transaction->t_tid) {
1629 + db->bb_md_cur = NULL;
1634 + ext3_unlock_group(sb, group);
1635 + md = kmalloc(sizeof(*md), GFP_KERNEL);
1639 + md->group = group;
1641 + ext3_lock_group(sb, group);
1642 + if (db->bb_md_cur == NULL) {
1643 + spin_lock(&sbi->s_md_lock);
1644 + list_add(&md->list, &sbi->s_active_transaction);
1645 + spin_unlock(&sbi->s_md_lock);
1646 + db->bb_md_cur = md;
1647 + db->bb_tid = handle->h_transaction->t_tid;
1648 + mb_debug("new md 0x%p for group %u\n",
1652 + md = db->bb_md_cur;
1656 + BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1657 + md->blocks[md->num] = block + i;
1659 + if (md->num == EXT3_BB_MAX_BLOCKS) {
1660 + /* no more space, put full container on a sb's list */
1661 + db->bb_md_cur = NULL;
1664 + ext3_unlock_group(sb, group);
1668 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1669 + unsigned long block, unsigned long count,
1670 + int metadata, int *freed)
1672 + struct buffer_head *bitmap_bh = NULL;
1673 + struct ext3_group_desc *gdp;
1674 + struct ext3_super_block *es;
1675 + unsigned long bit, overflow;
1676 + struct buffer_head *gd_bh;
1677 + unsigned long block_group;
1678 + struct ext3_sb_info *sbi;
1679 + struct super_block *sb;
1680 + struct ext3_buddy e3b;
1686 + printk ("ext3_free_blocks: nonexistent device");
1690 + ext3_mb_poll_new_transaction(sb, handle);
1692 + sbi = EXT3_SB(sb);
1693 + es = EXT3_SB(sb)->s_es;
1694 + if (block < le32_to_cpu(es->s_first_data_block) ||
1695 + block + count < block ||
1696 + block + count > le32_to_cpu(es->s_blocks_count)) {
1697 + ext3_error (sb, "ext3_free_blocks",
1698 + "Freeing blocks not in datazone - "
1699 + "block = %lu, count = %lu", block, count);
1700 + goto error_return;
1703 + ext3_debug("freeing block %lu\n", block);
1707 + block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1708 + EXT3_BLOCKS_PER_GROUP(sb);
1709 + bit = (block - le32_to_cpu(es->s_first_data_block)) %
1710 + EXT3_BLOCKS_PER_GROUP(sb);
1712 + * Check to see if we are freeing blocks across a group
1715 + if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1716 + overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1717 + count -= overflow;
1719 + brelse(bitmap_bh);
1720 + bitmap_bh = read_block_bitmap(sb, block_group);
1722 + goto error_return;
1723 + gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1725 + goto error_return;
1727 + if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1728 + in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1729 + in_range (block, le32_to_cpu(gdp->bg_inode_table),
1730 + EXT3_SB(sb)->s_itb_per_group) ||
1731 + in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1732 + EXT3_SB(sb)->s_itb_per_group))
1733 + ext3_error (sb, "ext3_free_blocks",
1734 + "Freeing blocks in system zones - "
1735 + "Block = %lu, count = %lu",
1738 + BUFFER_TRACE(bitmap_bh, "getting write access");
1739 + err = ext3_journal_get_write_access(handle, bitmap_bh);
1741 + goto error_return;
1744 + * We are about to modify some metadata. Call the journal APIs
1745 + * to unshare ->b_data if a currently-committing transaction is
1748 + BUFFER_TRACE(gd_bh, "get_write_access");
1749 + err = ext3_journal_get_write_access(handle, gd_bh);
1751 + goto error_return;
1753 + err = ext3_mb_load_buddy(sb, block_group, &e3b);
1755 + goto error_return;
1757 +#ifdef AGGRESSIVE_CHECK
1760 + for (i = 0; i < count; i++)
1761 + J_ASSERT(mb_test_bit(bit + i, bitmap_bh->b_data));
1764 + mb_clear_bits(bitmap_bh->b_data, bit, count);
1766 + /* We dirtied the bitmap block */
1767 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1768 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1771 + /* blocks being freed are metadata. these blocks shouldn't
1772 + * be used until this transaction is committed */
1773 + ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1775 + ext3_lock_group(sb, block_group);
1776 + mb_free_blocks(&e3b, bit, count);
1777 + ext3_unlock_group(sb, block_group);
1780 + spin_lock(sb_bgl_lock(sbi, block_group));
1781 + gdp->bg_free_blocks_count =
1782 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1783 + spin_unlock(sb_bgl_lock(sbi, block_group));
1784 + percpu_counter_mod(&sbi->s_freeblocks_counter, count);
1786 + ext3_mb_dirty_buddy(&e3b);
1787 + ext3_mb_release_desc(&e3b);
1791 + /* And the group descriptor block */
1792 + BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1793 + ret = ext3_journal_dirty_metadata(handle, gd_bh);
1794 + if (!err) err = ret;
1796 + if (overflow && !err) {
1803 + brelse(bitmap_bh);
1804 + ext3_std_error(sb, err);
1808 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1810 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1811 + int free, ret = -ENOSPC;
1813 + BUG_ON(blocks < 0);
1814 + spin_lock(&sbi->s_reserve_lock);
1815 + free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1816 + if (blocks <= free - sbi->s_blocks_reserved) {
1817 + sbi->s_blocks_reserved += blocks;
1820 + spin_unlock(&sbi->s_reserve_lock);
1824 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1826 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1828 + BUG_ON(blocks < 0);
1829 + spin_lock(&sbi->s_reserve_lock);
1830 + sbi->s_blocks_reserved -= blocks;
1831 + WARN_ON(sbi->s_blocks_reserved < 0);
1832 + if (sbi->s_blocks_reserved < 0)
1833 + sbi->s_blocks_reserved = 0;
1834 + spin_unlock(&sbi->s_reserve_lock);
1837 +int ext3_new_block(handle_t *handle, struct inode *inode,
1838 + unsigned long goal, int *errp)
1842 + if (!test_opt(inode->i_sb, MBALLOC)) {
1843 + ret = ext3_new_block_old(handle, inode, goal, errp);
1847 + ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1852 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1853 + unsigned long block, unsigned long count, int metadata)
1855 + struct super_block *sb;
1859 + if (!test_opt(sb, MBALLOC))
1860 + ext3_free_blocks_sb(handle, sb, block, count, &freed);
1862 + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
1864 + DQUOT_FREE_BLOCK(inode, freed);
1867 Index: linux-2.6.10/fs/ext3/super.c
1868 ===================================================================
1869 --- linux-2.6.10.orig/fs/ext3/super.c 2005-02-25 17:27:00.231757312 +0200
1870 +++ linux-2.6.10/fs/ext3/super.c 2005-02-25 17:28:41.862307120 +0200
1872 struct ext3_super_block *es = sbi->s_es;
1875 + ext3_mb_release(sb);
1876 ext3_ext_release(sb);
1877 ext3_xattr_put_super(sb);
1878 journal_destroy(sbi->s_journal);
1880 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1881 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_pdirops,
1882 Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
1883 - Opt_extents, Opt_extdebug,
1884 + Opt_extents, Opt_extdebug, Opt_mballoc, Opt_mbfactor,
1885 Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
1889 {Opt_iopen_nopriv, "iopen_nopriv"},
1890 {Opt_extents, "extents"},
1891 {Opt_extdebug, "extdebug"},
1892 + {Opt_mballoc, "mballoc"},
1893 + {Opt_mballoc, "mbfactor=%u"},
1895 {Opt_resize, "resize"},
1897 @@ -956,6 +959,16 @@
1899 set_opt (sbi->s_mount_opt, EXTDEBUG);
1902 + set_opt (sbi->s_mount_opt, MBALLOC);
1904 + case Opt_mbfactor:
1905 + if (match_int(&args[0], &option))
1909 + sbi->s_mb_factor = option;
1913 "EXT3-fs: Unrecognized mount option \"%s\" "
1914 @@ -1639,8 +1652,9 @@
1915 percpu_counter_mod(&sbi->s_dirs_counter,
1916 ext3_count_dirs(sb));
1919 + ext3_mb_init(sb, needs_recovery);
1924 Index: linux-2.6.10/fs/ext3/Makefile
1925 ===================================================================
1926 --- linux-2.6.10.orig/fs/ext3/Makefile 2005-02-25 17:27:00.228757768 +0200
1927 +++ linux-2.6.10/fs/ext3/Makefile 2005-02-25 17:28:41.863306968 +0200
1930 ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
1931 ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o \
1933 + extents.o mballoc.o
1934 ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o extents-in-ea.o
1935 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
1936 ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o
1937 Index: linux-2.6.10/fs/ext3/balloc.c
1938 ===================================================================
1939 --- linux-2.6.10.orig/fs/ext3/balloc.c 2005-02-25 17:26:58.965949744 +0200
1940 +++ linux-2.6.10/fs/ext3/balloc.c 2005-02-25 17:28:41.865306664 +0200
1943 * Return buffer_head on success or NULL in case of failure.
1945 -static struct buffer_head *
1946 +struct buffer_head *
1947 read_block_bitmap(struct super_block *sb, unsigned int block_group)
1949 struct ext3_group_desc * desc;
1950 @@ -450,24 +450,6 @@
1954 -/* Free given blocks, update quota and i_blocks field */
1955 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
1956 - unsigned long block, unsigned long count)
1958 - struct super_block * sb;
1959 - int dquot_freed_blocks;
1963 - printk ("ext3_free_blocks: nonexistent device");
1966 - ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
1967 - if (dquot_freed_blocks)
1968 - DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
1973 * For ext3 allocations, we must not reuse any blocks which are
1974 * allocated in the bitmap buffer's "last committed data" copy. This
1975 @@ -1140,7 +1122,7 @@
1976 * bitmap, and then for any free bit if that fails.
1977 * This function also updates quota and i_blocks field.
1979 -int ext3_new_block(handle_t *handle, struct inode *inode,
1980 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
1981 unsigned long goal, int *errp)
1983 struct buffer_head *bitmap_bh = NULL;
1984 Index: linux-2.6.10/fs/ext3/namei.c
1985 ===================================================================
1986 --- linux-2.6.10.orig/fs/ext3/namei.c 2005-02-25 17:26:59.527864320 +0200
1987 +++ linux-2.6.10/fs/ext3/namei.c 2005-02-25 17:28:41.867306360 +0200
1988 @@ -1639,7 +1639,7 @@
1989 * If the create succeeds, we fill in the inode information
1990 * with d_instantiate().
1992 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1993 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1994 struct nameidata *nd)
1997 Index: linux-2.6.10/fs/ext3/inode.c
1998 ===================================================================
1999 --- linux-2.6.10.orig/fs/ext3/inode.c 2005-02-25 17:27:00.227757920 +0200
2000 +++ linux-2.6.10/fs/ext3/inode.c 2005-02-25 17:28:41.872305600 +0200
2002 ext3_journal_forget(handle, branch[i].bh);
2004 for (i = 0; i < keys; i++)
2005 - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
2006 + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
2012 for (i = 0; i < num; i++)
2013 ext3_free_blocks(handle, inode,
2014 - le32_to_cpu(where[i].key), 1);
2015 + le32_to_cpu(where[i].key), 1, 1);
2019 @@ -1831,7 +1831,7 @@
2023 - ext3_free_blocks(handle, inode, block_to_free, count);
2024 + ext3_free_blocks(handle, inode, block_to_free, count, 1);
2028 @@ -2004,7 +2004,7 @@
2029 ext3_journal_test_restart(handle, inode);
2032 - ext3_free_blocks(handle, inode, nr, 1);
2033 + ext3_free_blocks(handle, inode, nr, 1, 1);
2037 Index: linux-2.6.10/fs/ext3/extents.c
2038 ===================================================================
2039 --- linux-2.6.10.orig/fs/ext3/extents.c 2005-02-25 17:27:00.222758680 +0200
2040 +++ linux-2.6.10/fs/ext3/extents.c 2005-02-25 17:29:29.364085752 +0200
2042 for (i = 0; i < depth; i++) {
2045 - ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
2046 + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
2050 @@ -1391,7 +1391,7 @@
2051 path->p_idx->ei_leaf);
2052 bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
2053 ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
2054 - ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
2055 + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
2059 @@ -1879,10 +1879,12 @@
2060 int needed = ext3_remove_blocks_credits(tree, ex, from, to);
2061 handle_t *handle = ext3_journal_start(tree->inode, needed);
2062 struct buffer_head *bh;
2064 + int i, metadata = 0;
2067 return PTR_ERR(handle);
2068 + if (S_ISDIR(tree->inode->i_mode))
2070 if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
2072 unsigned long num, start;
2073 @@ -1894,7 +1896,7 @@
2074 bh = sb_find_get_block(tree->inode->i_sb, start + i);
2075 ext3_forget(handle, 0, tree->inode, bh, start + i);
2077 - ext3_free_blocks(handle, tree->inode, start, num);
2078 + ext3_free_blocks(handle, tree->inode, start, num, metadata);
2079 } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
2080 printk("strange request: removal %lu-%lu from %u:%u\n",
2081 from, to, ex->ee_block, ex->ee_len);
2082 Index: linux-2.6.10/fs/ext3/xattr.c
2083 ===================================================================
2084 --- linux-2.6.10.orig/fs/ext3/xattr.c 2005-02-25 17:26:59.876811272 +0200
2085 +++ linux-2.6.10/fs/ext3/xattr.c 2005-02-25 17:28:41.878304688 +0200
2086 @@ -1271,7 +1271,7 @@
2087 new_bh = sb_getblk(sb, block);
2090 - ext3_free_blocks(handle, inode, block, 1);
2091 + ext3_free_blocks(handle, inode, block, 1, 1);
2095 @@ -1318,7 +1318,7 @@
2097 mb_cache_entry_free(ce);
2098 ea_bdebug(old_bh, "freeing");
2099 - ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
2100 + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
2102 /* ext3_forget() calls bforget() for us, but we
2103 let our caller release old_bh, so we need to
2104 @@ -1417,7 +1417,7 @@
2105 if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
2107 mb_cache_entry_free(ce);
2108 - ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
2109 + ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
2111 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
2113 Index: linux-2.6.10/include/linux/ext3_fs.h
2114 ===================================================================
2115 --- linux-2.6.10.orig/include/linux/ext3_fs.h 2005-02-25 17:27:00.234756856 +0200
2116 +++ linux-2.6.10/include/linux/ext3_fs.h 2005-02-25 17:28:41.881304232 +0200
2118 #define ext3_debug(f, a...) do {} while (0)
2121 +#define EXT3_MULTIBLOCK_ALLOCATOR 1
2123 +#define EXT3_MB_HINT_MERGE 1
2124 +#define EXT3_MB_HINT_RESERVED 2
2125 +#define EXT3_MB_HINT_METADATA 4
2126 +#define EXT3_MB_HINT_FIRST 8
2127 +#define EXT3_MB_HINT_BEST 16
2130 * Special inodes numbers
2133 #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
2134 #define EXT3_MOUNT_EXTENTS 0x100000 /* Extents support */
2135 #define EXT3_MOUNT_EXTDEBUG 0x200000 /* Extents debug */
2136 +#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
2138 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
2139 #ifndef _LINUX_EXT2_FS_H
2141 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
2142 extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
2143 extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
2145 + unsigned long, int);
2146 extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
2147 unsigned long, unsigned long, int *);
2148 extern unsigned long ext3_count_free_blocks (struct super_block *);
2149 @@ -856,6 +865,37 @@
2150 extern struct inode_operations ext3_symlink_inode_operations;
2151 extern struct inode_operations ext3_fast_symlink_inode_operations;
2154 +extern int ext3_mb_init(struct super_block *, int);
2155 +extern int ext3_mb_release(struct super_block *);
2156 +extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
2157 +extern int ext3_mb_reserve_blocks(struct super_block *, int);
2158 +extern void ext3_mb_release_blocks(struct super_block *, int);
2161 +extern int ext3_wb_writepages(struct address_space *, struct writeback_control *);
2162 +extern int ext3_wb_prepare_write(struct file *file, struct page *page,
2163 + unsigned from, unsigned to);
2164 +extern int ext3_wb_commit_write(struct file *, struct page *, unsigned, unsigned);
2165 +extern int ext3_wb_writepage(struct page *, struct writeback_control *);
2166 +extern int ext3_wb_invalidatepage(struct page *, unsigned long);
2167 +extern int ext3_wb_releasepage(struct page *, int);
2168 +extern int ext3_wb_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
2169 +extern void ext3_wb_init(struct super_block *);
2170 +extern void ext3_wb_release(struct super_block *);
2173 +extern int ext3_wb_writepages(struct address_space *, struct writeback_control *);
2174 +extern int ext3_wb_prepare_write(struct file *file, struct page *page,
2175 + unsigned from, unsigned to);
2176 +extern int ext3_wb_commit_write(struct file *, struct page *, unsigned, unsigned);
2177 +extern int ext3_wb_writepage(struct page *, struct writeback_control *);
2178 +extern int ext3_wb_invalidatepage(struct page *, unsigned long);
2179 +extern int ext3_wb_releasepage(struct page *, int);
2180 +extern int ext3_wb_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
2181 +extern void ext3_wb_init(struct super_block *);
2182 +extern void ext3_wb_release(struct super_block *);
2185 extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
2186 extern int ext3_ext_get_block(handle_t *, struct inode *, long,
2187 Index: linux-2.6.10/include/linux/ext3_fs_sb.h
2188 ===================================================================
2189 --- linux-2.6.10.orig/include/linux/ext3_fs_sb.h 2005-02-25 17:26:59.641846992 +0200
2190 +++ linux-2.6.10/include/linux/ext3_fs_sb.h 2005-02-25 17:28:41.882304080 +0200
2193 #include <linux/blockgroup_lock.h>
2194 #include <linux/percpu_counter.h>
2195 +#include <linux/list.h>
2198 #include <linux/rbtree.h>
2200 +#define EXT3_BB_MAX_BLOCKS 30
2201 +struct ext3_free_metadata {
2202 + unsigned short group;
2203 + unsigned short num;
2204 + unsigned short blocks[EXT3_BB_MAX_BLOCKS];
2205 + struct list_head list;
2208 +struct ext3_buddy_group_blocks {
2211 + spinlock_t bb_lock;
2212 + unsigned long bb_tid;
2213 + struct ext3_free_metadata *bb_md_cur;
2214 + unsigned short bb_first_free;
2215 + unsigned short bb_free;
2216 + unsigned bb_counters[];
2220 * third extended-fs super-block data in memory
2223 int s_jquota_fmt; /* Format of quota to use */
2227 + /* for buddy allocator */
2228 + struct ext3_buddy_group_blocks **s_buddy_blocks;
2229 + struct inode *s_buddy;
2230 + long s_blocks_reserved;
2231 + spinlock_t s_reserve_lock;
2232 + struct list_head s_active_transaction;
2233 + struct list_head s_closed_transaction;
2234 + struct list_head s_committed_transaction;
2235 + spinlock_t s_md_lock;
2236 + tid_t s_last_transaction;
2239 + /* stats for buddy allocator */
2240 + spinlock_t s_bal_lock;
2241 + unsigned long s_bal_reqs; /* number of reqs with len > 1 */
2242 + unsigned long s_bal_success; /* we found long enough chunks */
2243 + unsigned long s_bal_allocated; /* in blocks */
2244 + unsigned long s_bal_ex_scanned; /* total extents scanned */
2245 + unsigned long s_bal_goals; /* goal hits */
2246 + unsigned long s_bal_breaks; /* too long searches */
2249 #endif /* _LINUX_EXT3_FS_SB */