1 Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
7 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 + * GNU General Public License for more details.
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
26 + * mballoc.c contains the multiblocks allocation routines
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/namei.h>
33 +#include <linux/jbd.h>
34 +#include <linux/ext3_fs.h>
35 +#include <linux/ext3_jbd.h>
36 +#include <linux/quotaops.h>
37 +#include <linux/buffer_head.h>
38 +#include <linux/module.h>
42 + * - do not scan from the beginning, try to remember first free block
43 + * - mb_mark_used_* may allocate chunk right after splitting buddy
44 + * - special flag to advice allocator to look for requested + N blocks
45 + * this may improve interaction between extents and mballoc
49 + * with AGRESSIVE_CHECK allocator runs consistency checks over
50 + * structures. this checks slow things down a lot
52 +#define AGGRESSIVE_CHECK__
58 +#define mb_debug(fmt,a...) printk(fmt, ##a)
60 +#define mb_debug(fmt,a...)
64 + * where to save buddies structures beetween umount/mount (clean case only)
66 +#define EXT3_BUDDY_FILE ".buddy"
69 + * max. number of chunks to be tracked in ext3_free_extent struct
71 +#define MB_ARR_SIZE 32
73 +struct ext3_allocation_context {
74 + struct super_block *ac_sb;
82 + /* the best found extent */
87 + /* number of iterations done. we have to track to limit searching */
89 + int ac_groups_scanned;
93 +#define AC_STATUS_CONTINUE 1
94 +#define AC_STATUS_FOUND 2
101 + struct buffer_head *bd_bh;
102 + struct buffer_head *bd_bh2;
103 + struct ext3_buddy_group_blocks *bd_bd;
104 + struct super_block *bd_sb;
107 +struct ext3_free_extent {
110 + unsigned char fe_orders[MB_ARR_SIZE];
111 + unsigned char fe_nums;
112 + unsigned char fe_back;
115 +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
118 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
119 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
120 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
121 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
122 +int ext3_mb_reserve_blocks(struct super_block *, int);
123 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
124 +void ext3_mb_free_committed_blocks(struct super_block *);
126 +#define mb_correct_addr_and_bit(bit,addr) \
128 + if ((unsigned long)addr & 1) { \
132 + if ((unsigned long)addr & 2) { \
139 +static inline int mb_test_bit(int bit, void *addr)
141 + mb_correct_addr_and_bit(bit,addr);
142 + return test_bit(bit, addr);
145 +static inline void mb_set_bit(int bit, void *addr)
147 + mb_correct_addr_and_bit(bit,addr);
148 + set_bit(bit, addr);
151 +static inline void mb_clear_bit(int bit, void *addr)
153 + mb_correct_addr_and_bit(bit,addr);
154 + clear_bit(bit, addr);
157 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
162 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
163 + J_ASSERT(max != NULL);
165 + if (order > e3b->bd_blkbits + 1)
168 + /* at order 0 we see each particular block */
169 + *max = 1 << (e3b->bd_blkbits + 3);
171 + return e3b->bd_bitmap;
173 + bb = e3b->bd_buddy;
175 + while (i < order) {
176 + bb += 1 << (e3b->bd_blkbits - i);
183 +static int ext3_mb_load_desc(struct super_block *sb, int group,
184 + struct ext3_buddy *e3b)
186 + struct ext3_sb_info *sbi = EXT3_SB(sb);
188 + J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
189 + J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
192 + e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
193 + if (e3b->bd_bh == NULL) {
194 + ext3_error(sb, "ext3_mb_load_desc",
195 + "can't get block for buddy bitmap\n");
198 + if (!buffer_uptodate(e3b->bd_bh)) {
199 + ll_rw_block(READ, 1, &e3b->bd_bh);
200 + wait_on_buffer(e3b->bd_bh);
202 + J_ASSERT(buffer_uptodate(e3b->bd_bh));
205 + e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
206 + if (e3b->bd_bh2 == NULL) {
207 + ext3_error(sb, "ext3_mb_load_desc",
208 + "can't get block for buddy bitmap\n");
211 + if (!buffer_uptodate(e3b->bd_bh2)) {
212 + ll_rw_block(READ, 1, &e3b->bd_bh2);
213 + wait_on_buffer(e3b->bd_bh2);
215 + J_ASSERT(buffer_uptodate(e3b->bd_bh2));
217 + e3b->bd_bitmap = e3b->bd_bh->b_data;
218 + e3b->bd_buddy = e3b->bd_bh2->b_data;
219 + e3b->bd_blkbits = sb->s_blocksize_bits;
220 + e3b->bd_bd = sbi->s_buddy_blocks[group];
225 + brelse(e3b->bd_bh);
226 + brelse(e3b->bd_bh2);
228 + e3b->bd_bh2 = NULL;
232 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
234 + mark_buffer_dirty(e3b->bd_bh);
235 + mark_buffer_dirty(e3b->bd_bh2);
238 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
240 + brelse(e3b->bd_bh);
241 + brelse(e3b->bd_bh2);
244 +#ifdef AGGRESSIVE_CHECK
245 +static void mb_check_buddy(struct ext3_buddy *e3b)
247 + int order = e3b->bd_blkbits + 1;
248 + int max, max2, i, j, k, count;
249 + void *buddy, *buddy2;
251 + if (!test_opt(e3b->bd_sb, MBALLOC))
254 + while (order > 1) {
255 + buddy = mb_find_buddy(e3b, order, &max);
257 + buddy2 = mb_find_buddy(e3b, order - 1, &max2);
259 + J_ASSERT(buddy != buddy2);
260 + J_ASSERT(max * 2 == max2);
263 + for (i = 0; i < max; i++) {
265 + if (!mb_test_bit(i, buddy)) {
266 + /* only single bit in buddy2 may be 1 */
267 + if (mb_test_bit(i << 1, buddy2))
268 + J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
269 + else if (mb_test_bit((i << 1) + 1, buddy2))
270 + J_ASSERT(!mb_test_bit(i << 1, buddy2));
274 + /* both bits in buddy2 must be 0 */
275 + J_ASSERT(!mb_test_bit(i << 1, buddy2));
276 + J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
278 + for (j = 0; j < (1 << order); j++) {
279 + k = (i * (1 << order)) + j;
280 + J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
284 + J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
288 + buddy = mb_find_buddy(e3b, 0, &max);
289 + for (i = 0; i < max; i++) {
290 + if (mb_test_bit(i, buddy))
292 + /* check used bits only */
293 + for (j = 0; j < e3b->bd_blkbits + 1; j++) {
294 + buddy2 = mb_find_buddy(e3b, j, &max2);
296 + J_ASSERT(k < max2);
297 + J_ASSERT(!mb_test_bit(k, buddy2));
302 +#define mb_check_buddy(e3b)
306 +ext3_lock_group(struct super_block *sb, int group)
308 + spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
312 +ext3_unlock_group(struct super_block *sb, int group)
314 + spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
317 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
322 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
323 + J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
325 + bb = e3b->bd_buddy;
326 + while (order <= e3b->bd_blkbits + 1) {
327 + block = block >> 1;
328 + if (mb_test_bit(block, bb)) {
329 + /* this block is part of buddy of order 'order' */
332 + bb += 1 << (e3b->bd_blkbits - order);
338 +static inline void mb_clear_bits(void *bm, int cur, int len)
343 + while (cur < len) {
344 + if ((cur & 31) == 0 && (len - cur) >= 32) {
345 + /* fast path: clear whole word at once */
346 + addr = bm + (cur >> 3);
351 + mb_clear_bit(cur, bm);
356 +static inline void mb_set_bits(void *bm, int cur, int len)
361 + while (cur < len) {
362 + if ((cur & 31) == 0 && (len - cur) >= 32) {
363 + /* fast path: clear whole word at once */
364 + addr = bm + (cur >> 3);
365 + *addr = 0xffffffff;
369 + mb_set_bit(cur, bm);
374 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
376 + int block, max, order;
377 + void *buddy, *buddy2;
379 + mb_check_buddy(e3b);
380 + while (count-- > 0) {
384 + J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
385 + mb_set_bit(block, e3b->bd_bitmap);
386 + e3b->bd_bd->bb_counters[order]++;
388 + /* start of the buddy */
389 + buddy = mb_find_buddy(e3b, order, &max);
393 + if (!mb_test_bit(block, buddy) ||
394 + !mb_test_bit(block + 1, buddy))
397 + /* both the buddies are free, try to coalesce them */
398 + buddy2 = mb_find_buddy(e3b, order + 1, &max);
404 + /* for special purposes, we don't clear
405 + * free bits in bitmap */
406 + mb_clear_bit(block, buddy);
407 + mb_clear_bit(block + 1, buddy);
409 + e3b->bd_bd->bb_counters[order]--;
410 + e3b->bd_bd->bb_counters[order]--;
412 + block = block >> 1;
414 + e3b->bd_bd->bb_counters[order]++;
416 + mb_set_bit(block, buddy2);
420 + mb_check_buddy(e3b);
426 + * returns 1 if out extent is enough to fill needed space
428 +int mb_make_backward_extent(struct ext3_free_extent *in,
429 + struct ext3_free_extent *out, int needed)
435 + J_ASSERT(in->fe_nums < MB_ARR_SIZE);
438 + out->fe_start = in->fe_start + in->fe_len;
441 + /* for single-chunk extent we need not back order
442 + * also, if an extent doesn't fill needed space
443 + * then it makes no sense to try back order becase
444 + * if we select this extent then it'll be use as is */
445 + if (in->fe_nums < 2 || in->fe_len < needed)
448 + i = in->fe_nums - 1;
449 + while (i >= 0 && out->fe_len < needed) {
450 + out->fe_len += (1 << in->fe_orders[i]);
451 + out->fe_start -= (1 << in->fe_orders[i]);
454 + /* FIXME: in some situation fe_orders may be too small to hold
455 + * all the buddies */
456 + J_ASSERT(out->fe_len >= needed);
458 + for (i++; i < in->fe_nums; i++)
459 + out->fe_orders[out->fe_nums++] = in->fe_orders[i];
460 + J_ASSERT(out->fe_nums < MB_ARR_SIZE);
466 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
467 + int needed, struct ext3_free_extent *ex)
469 + int space = needed;
470 + int next, max, ord;
473 + J_ASSERT(ex != NULL);
478 + buddy = mb_find_buddy(e3b, order, &max);
480 + J_ASSERT(block < max);
481 + if (!mb_test_bit(block, buddy))
485 + /* find actual order */
486 + order = mb_find_order_for_block(e3b, block);
487 + block = block >> order;
490 + ex->fe_orders[ex->fe_nums++] = order;
491 + ex->fe_len = 1 << order;
492 + ex->fe_start = block << order;
495 + while ((space = space - (1 << order)) > 0) {
497 + buddy = mb_find_buddy(e3b, order, &max);
500 + if (block + 1 >= max)
503 + next = (block + 1) * (1 << order);
504 + if (!mb_test_bit(next, e3b->bd_bitmap))
507 + ord = mb_find_order_for_block(e3b, next);
509 + if ((1 << ord) >= needed) {
510 + /* we dont want to coalesce with self-enough buddies */
514 + block = next >> order;
515 + ex->fe_len += 1 << order;
517 + if (ex->fe_nums < MB_ARR_SIZE)
518 + ex->fe_orders[ex->fe_nums++] = order;
522 + J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
526 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
527 + struct ext3_free_extent *ex, int len)
529 + int start = ex->fe_start, len0 = len;
530 + int ord, mlen, max, cur;
533 + start = ex->fe_start + ex->fe_len - 1;
535 + ord = mb_find_order_for_block(e3b, start);
536 + if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
537 + len >= (1 << ord)) {
538 + /* the whole chunk may be allocated at once! */
540 + buddy = mb_find_buddy(e3b, ord, &max);
541 + J_ASSERT((start >> ord) < max);
542 + mb_clear_bit(start >> ord, buddy);
543 + e3b->bd_bd->bb_counters[ord]--;
546 + J_ASSERT(len >= 0);
547 + J_ASSERT(start >= 0);
551 + /* we have to split large buddy */
553 + buddy = mb_find_buddy(e3b, ord, &max);
554 + mb_clear_bit(start >> ord, buddy);
555 + e3b->bd_bd->bb_counters[ord]--;
558 + cur = (start >> ord) & ~1U;
559 + buddy = mb_find_buddy(e3b, ord, &max);
560 + mb_set_bit(cur, buddy);
561 + mb_set_bit(cur + 1, buddy);
562 + e3b->bd_bd->bb_counters[ord]++;
563 + e3b->bd_bd->bb_counters[ord]++;
566 + /* now drop all the bits in bitmap */
567 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
569 + mb_check_buddy(e3b);
574 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
575 + struct ext3_free_extent *ex, int len)
577 + int start = ex->fe_start, len0 = len;
578 + int ord, mlen, max, cur;
582 + ord = mb_find_order_for_block(e3b, start);
584 + if (((start >> ord) << ord) == start && len >= (1 << ord)) {
585 + /* the whole chunk may be allocated at once! */
587 + buddy = mb_find_buddy(e3b, ord, &max);
588 + J_ASSERT((start >> ord) < max);
589 + mb_clear_bit(start >> ord, buddy);
590 + e3b->bd_bd->bb_counters[ord]--;
593 + J_ASSERT(len >= 0);
597 + /* we have to split large buddy */
599 + buddy = mb_find_buddy(e3b, ord, &max);
600 + mb_clear_bit(start >> ord, buddy);
601 + e3b->bd_bd->bb_counters[ord]--;
604 + cur = (start >> ord) & ~1U;
605 + buddy = mb_find_buddy(e3b, ord, &max);
606 + mb_set_bit(cur, buddy);
607 + mb_set_bit(cur + 1, buddy);
608 + e3b->bd_bd->bb_counters[ord]++;
609 + e3b->bd_bd->bb_counters[ord]++;
612 + /* now drop all the bits in bitmap */
613 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
615 + mb_check_buddy(e3b);
620 +int inline mb_mark_used(struct ext3_buddy *e3b,
621 + struct ext3_free_extent *ex, int len)
626 + if (ex->fe_back == 0)
627 + err = mb_mark_used_forward(e3b, ex, len);
629 + err = mb_mark_used_backward(e3b, ex, len);
633 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
634 + struct ext3_buddy *e3b, int group)
636 + struct super_block *sb = ac->ac_sb;
637 + int err, gorder, max, i;
638 + struct ext3_free_extent curex;
640 + /* let's know order of allocation */
642 + while (ac->ac_g_len > (1 << gorder))
645 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
646 + /* someone asks for space at this specified block
647 + * probably he wants to merge it into existing extent */
648 + if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
649 + /* good. at least one block is free */
650 + max = mb_find_extent(e3b, 0, ac->ac_g_start,
651 + ac->ac_g_len, &curex);
652 + max = min(curex.fe_len, ac->ac_g_len);
653 + mb_mark_used(e3b, &curex, max);
655 + ac->ac_b_group = group;
656 + ac->ac_b_start = curex.fe_start;
657 + ac->ac_b_len = max;
658 + ac->ac_status = AC_STATUS_FOUND;
662 + /* don't try to find goal anymore */
663 + ac->ac_g_flags &= ~1;
668 + i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
669 + if (i >= sb->s_blocksize * 8)
672 + max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
673 + if (max >= ac->ac_g_len) {
674 + max = min(curex.fe_len, ac->ac_g_len);
675 + mb_mark_used(e3b, &curex, max);
677 + ac->ac_b_group = group;
678 + ac->ac_b_start = curex.fe_start;
679 + ac->ac_b_len = max;
680 + ac->ac_status = AC_STATUS_FOUND;
692 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
694 + struct ext3_group_desc *gdp;
697 + gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
700 + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
701 + if (free_blocks == 0)
704 + /* someone wants this block very much */
705 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
708 + /* FIXME: I'd like to take fragmentation into account here */
710 + if (free_blocks >= ac->ac_g_len >> 1)
712 + } else if (cr == 1) {
713 + if (free_blocks >= ac->ac_g_len >> 2)
715 + } else if (cr == 2) {
723 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
724 + unsigned long goal, int *len, int flags, int *errp)
726 + struct buffer_head *bitmap_bh = NULL;
727 + struct ext3_allocation_context ac;
728 + int i, group, block, cr, err = 0;
729 + struct ext3_group_desc *gdp;
730 + struct ext3_super_block *es;
731 + struct buffer_head *gdp_bh;
732 + struct ext3_sb_info *sbi;
733 + struct super_block *sb;
734 + struct ext3_buddy e3b;
736 + J_ASSERT(len != NULL);
737 + J_ASSERT(*len > 0);
741 + printk("ext3_mb_new_nblocks: nonexistent device");
745 + if (!test_opt(sb, MBALLOC)) {
746 + static int ext3_mballoc_warning = 0;
747 + if (ext3_mballoc_warning == 0) {
748 + printk(KERN_ERR "EXT3-fs: multiblock request with "
749 + "mballoc disabled!\n");
750 + ext3_mballoc_warning++;
753 + err = ext3_new_block_old(handle, inode, goal, errp);
757 + ext3_mb_poll_new_transaction(sb, handle);
760 + es = EXT3_SB(sb)->s_es;
762 + if (!(flags & 2)) {
763 + /* someone asks for non-reserved blocks */
765 + err = ext3_mb_reserve_blocks(sb, 1);
773 + * Check quota for allocation of this blocks.
775 + while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
783 + /* start searching from the goal */
784 + if (goal < le32_to_cpu(es->s_first_data_block) ||
785 + goal >= le32_to_cpu(es->s_blocks_count))
786 + goal = le32_to_cpu(es->s_first_data_block);
787 + group = (goal - le32_to_cpu(es->s_first_data_block)) /
788 + EXT3_BLOCKS_PER_GROUP(sb);
789 + block = ((goal - le32_to_cpu(es->s_first_data_block)) %
790 + EXT3_BLOCKS_PER_GROUP(sb));
792 + /* set up allocation goals */
793 + ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
795 + ac.ac_groups_scanned = 0;
796 + ac.ac_sb = inode->i_sb;
797 + ac.ac_g_group = group;
798 + ac.ac_g_start = block;
799 + ac.ac_g_len = *len;
800 + ac.ac_g_flags = flags;
802 + /* loop over the groups */
803 + for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
804 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
805 + if (group == EXT3_SB(sb)->s_groups_count)
808 + /* check is group good for our criteries */
809 + if (!mb_good_group(&ac, group, cr))
812 + err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
816 + ext3_lock_group(sb, group);
817 + if (!mb_good_group(&ac, group, cr)) {
818 + /* someone did allocation from this group */
819 + ext3_unlock_group(sb, group);
820 + ext3_mb_release_desc(&e3b);
824 + err = ext3_mb_new_in_group(&ac, &e3b, group);
825 + ext3_unlock_group(sb, group);
826 + if (ac.ac_status == AC_STATUS_FOUND)
827 + ext3_mb_dirty_buddy(&e3b);
828 + ext3_mb_release_desc(&e3b);
831 + if (ac.ac_status == AC_STATUS_FOUND)
836 + if (ac.ac_status != AC_STATUS_FOUND) {
837 + /* unfortunately, we can't satisfy this request */
838 + J_ASSERT(ac.ac_b_len == 0);
839 + DQUOT_FREE_BLOCK(inode, *len);
845 + /* good news - free block(s) have been found. now it's time
846 + * to mark block(s) in good old journaled bitmap */
847 + block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
848 + + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
850 + /* we made a desicion, now mark found blocks in good old
851 + * bitmap to be journaled */
853 + ext3_debug("using block group %d(%d)\n",
854 + ac.ac_b_group.group, gdp->bg_free_blocks_count);
856 + bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
862 + err = ext3_journal_get_write_access(handle, bitmap_bh);
868 + gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
874 + err = ext3_journal_get_write_access(handle, gdp_bh);
878 + block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
879 + + le32_to_cpu(es->s_first_data_block);
881 + if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
882 + block == le32_to_cpu(gdp->bg_inode_bitmap) ||
883 + in_range(block, le32_to_cpu(gdp->bg_inode_table),
884 + EXT3_SB(sb)->s_itb_per_group))
885 + ext3_error(sb, "ext3_new_block",
886 + "Allocating block in system zone - "
887 + "block = %u", block);
889 + for (i = 0; i < ac.ac_b_len; i++)
890 + J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
892 + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
894 + ext3_lock_group(sb, ac.ac_b_group);
895 + gdp->bg_free_blocks_count =
896 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
898 + ext3_unlock_group(sb, ac.ac_b_group);
899 + percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
901 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
904 + err = ext3_journal_dirty_metadata(handle, gdp_bh);
912 + /* drop non-allocated, but dquote'd blocks */
913 + J_ASSERT(*len >= ac.ac_b_len);
914 + DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
916 + *len = ac.ac_b_len;
917 + J_ASSERT(block != 0);
921 + /* if we've already allocated something, roll it back */
922 + if (ac.ac_status == AC_STATUS_FOUND) {
923 + /* FIXME: free blocks here */
926 + DQUOT_FREE_BLOCK(inode, *len);
931 + if (!(flags & 2)) {
932 + /* block wasn't reserved before and we reserved it
933 + * at the beginning of allocation. it doesn't matter
934 + * whether we allocated anything or we failed: time
935 + * to release reservation. NOTE: because I expect
936 + * any multiblock request from delayed allocation
937 + * path only, here is single block always */
938 + ext3_mb_release_blocks(sb, 1);
943 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
945 + struct buffer_head *bh;
946 + int i, err, count = 0;
947 + struct ext3_buddy e3b;
949 + err = ext3_mb_load_desc(sb, group, &e3b);
952 + memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
953 + memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
955 + bh = read_block_bitmap(sb, group);
961 + /* loop over the blocks, and create buddies for free ones */
962 + for (i = 0; i < sb->s_blocksize * 8; i++) {
963 + if (!mb_test_bit(i, (void *) bh->b_data)) {
964 + mb_free_blocks(&e3b, i, 1);
969 + mb_check_buddy(&e3b);
970 + ext3_mb_dirty_buddy(&e3b);
973 + ext3_mb_release_desc(&e3b);
978 +EXPORT_SYMBOL(ext3_mb_new_blocks);
980 +#define MB_CREDITS \
981 + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
982 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
984 +int ext3_mb_init_backend(struct super_block *sb)
986 + struct inode *root = sb->s_root->d_inode;
987 + struct ext3_sb_info *sbi = EXT3_SB(sb);
992 + sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks *) *
993 + sbi->s_groups_count, GFP_KERNEL);
994 + if (sbi->s_buddy_blocks == NULL) {
995 + printk("EXT3-fs: can't allocate mem for buddy maps\n");
998 + memset(sbi->s_buddy_blocks, 0,
999 + sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count);
1000 + sbi->s_buddy = NULL;
1002 + down(&root->i_sem);
1003 + db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
1004 + strlen(EXT3_BUDDY_FILE));
1006 + err = PTR_ERR(db);
1007 + printk("EXT3-fs: can't lookup buddy file: %d\n", err);
1011 + if (db->d_inode != NULL) {
1012 + sbi->s_buddy = igrab(db->d_inode);
1016 + err = ext3_create(root, db, S_IFREG, NULL);
1018 + printk("error while creation buddy file: %d\n", err);
1020 + sbi->s_buddy = igrab(db->d_inode);
1024 + for (i = 0; i < sbi->s_groups_count; i++) {
1025 + struct buffer_head *bh = NULL;
1028 + sbi->s_buddy_blocks[i] =
1029 + kmalloc(sizeof(struct ext3_buddy_group_blocks),
1031 + if (sbi->s_buddy_blocks[i] == NULL) {
1032 + printk("EXT3-fs: can't allocate mem for buddy\n");
1037 + handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1038 + if (IS_ERR(handle)) {
1039 + err = PTR_ERR(handle);
1043 + /* allocate block for bitmap */
1044 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1046 + printk("can't get block for buddy bitmap: %d\n", err);
1049 + sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
1052 + /* allocate block for buddy */
1053 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1055 + printk("can't get block for buddy: %d\n", err);
1058 + sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
1060 + ext3_journal_stop(handle);
1061 + spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
1062 + sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
1063 + sbi->s_buddy_blocks[i]->bb_tid = 0;
1066 + if (journal_start_commit(sbi->s_journal, &target))
1067 + log_wait_commit(sbi->s_journal, target);
1076 +int ext3_mb_release(struct super_block *sb)
1078 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1081 + if (!test_opt(sb, MBALLOC))
1084 + /* release freed, non-committed blocks */
1085 + spin_lock(&sbi->s_md_lock);
1086 + list_splice_init(&sbi->s_closed_transaction,
1087 + &sbi->s_committed_transaction);
1088 + list_splice_init(&sbi->s_active_transaction,
1089 + &sbi->s_committed_transaction);
1090 + spin_unlock(&sbi->s_md_lock);
1091 + ext3_mb_free_committed_blocks(sb);
1093 + if (sbi->s_buddy_blocks) {
1094 + for (i = 0; i < sbi->s_groups_count; i++)
1095 + if (sbi->s_buddy_blocks[i])
1096 + kfree(sbi->s_buddy_blocks[i]);
1097 + kfree(sbi->s_buddy_blocks);
1100 + iput(sbi->s_buddy);
1101 + if (sbi->s_blocks_reserved)
1102 + printk("ext3-fs: %ld blocks being reserved at umount!\n",
1103 + sbi->s_blocks_reserved);
1107 +int ext3_mb_init(struct super_block *sb)
1109 + struct ext3_super_block *es;
1112 + if (!test_opt(sb, MBALLOC))
1115 + /* init file for buddy data */
1116 + clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1117 + if (ext3_mb_init_backend(sb))
1120 + es = EXT3_SB(sb)->s_es;
1121 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1122 + ext3_mb_generate_buddy(sb, i);
1123 + spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1124 + spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1125 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1126 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1127 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1128 + set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1129 + printk("EXT3-fs: mballoc enabled\n");
1133 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1135 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1136 + int err, i, count = 0, count2 = 0;
1137 + struct ext3_free_metadata *md;
1138 + struct ext3_buddy e3b;
1140 + if (list_empty(&sbi->s_committed_transaction))
1143 + /* there is committed blocks to be freed yet */
1145 + /* get next array of blocks */
1147 + spin_lock(&sbi->s_md_lock);
1148 + if (!list_empty(&sbi->s_committed_transaction)) {
1149 + md = list_entry(sbi->s_committed_transaction.next,
1150 + struct ext3_free_metadata, list);
1151 + list_del(&md->list);
1153 + spin_unlock(&sbi->s_md_lock);
1158 + mb_debug("gonna free %u blocks in group %u (0x%p):",
1159 + md->num, md->group, md);
1161 + err = ext3_mb_load_desc(sb, md->group, &e3b);
1164 + /* there are blocks to put in buddy to make them really free */
1167 + ext3_lock_group(sb, md->group);
1168 + for (i = 0; i < md->num; i++) {
1169 + mb_debug(" %u", md->blocks[i]);
1170 + mb_free_blocks(&e3b, md->blocks[i], 1);
1173 + ext3_unlock_group(sb, md->group);
1176 + ext3_mb_dirty_buddy(&e3b);
1177 + ext3_mb_release_desc(&e3b);
1180 + mb_debug("freed %u blocks in %u structures\n", count, count2);
1183 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1185 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1187 + if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1190 + /* new transaction! time to close last one and free blocks for
1191 + * committed transaction. we know that only transaction can be
1192 + * active, so previos transaction can be being logged and we
1193 + * know that transaction before previous is known to be alreade
1194 + * logged. this means that now we may free blocks freed in all
1195 + * transactions before previous one. hope I'm clear enough ... */
1197 + spin_lock(&sbi->s_md_lock);
1198 + if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1199 + mb_debug("new transaction %lu, old %lu\n",
1200 + (unsigned long) handle->h_transaction->t_tid,
1201 + (unsigned long) sbi->s_last_transaction);
1202 + list_splice_init(&sbi->s_closed_transaction,
1203 + &sbi->s_committed_transaction);
1204 + list_splice_init(&sbi->s_active_transaction,
1205 + &sbi->s_closed_transaction);
1206 + sbi->s_last_transaction = handle->h_transaction->t_tid;
1208 + spin_unlock(&sbi->s_md_lock);
1210 + ext3_mb_free_committed_blocks(sb);
1213 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1214 + int group, int block, int count)
1216 + struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1217 + struct super_block *sb = e3b->bd_sb;
1218 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1219 + struct ext3_free_metadata *md;
1222 + ext3_lock_group(sb, group);
1223 + for (i = 0; i < count; i++) {
1224 + md = db->bb_md_cur;
1225 + if (md && db->bb_tid != handle->h_transaction->t_tid) {
1226 + db->bb_md_cur = NULL;
1231 + ext3_unlock_group(sb, group);
1232 + md = kmalloc(sizeof(*md), GFP_KERNEL);
1236 + md->group = group;
1238 + ext3_lock_group(sb, group);
1239 + if (db->bb_md_cur == NULL) {
1240 + spin_lock(&sbi->s_md_lock);
1241 + list_add(&md->list, &sbi->s_active_transaction);
1242 + spin_unlock(&sbi->s_md_lock);
1243 + db->bb_md_cur = md;
1244 + db->bb_tid = handle->h_transaction->t_tid;
1245 + mb_debug("new md 0x%p for group %u\n",
1249 + md = db->bb_md_cur;
1253 + BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1254 + md->blocks[md->num] = block + i;
1256 + if (md->num == EXT3_BB_MAX_BLOCKS) {
1257 + /* no more space, put full container on a sb's list */
1258 + db->bb_md_cur = NULL;
1261 + ext3_unlock_group(sb, group);
1265 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1266 + unsigned long block, unsigned long count, int metadata)
1268 + struct buffer_head *bitmap_bh = NULL;
1269 + struct ext3_group_desc *gdp;
1270 + struct ext3_super_block *es;
1271 + unsigned long bit, overflow;
1272 + struct buffer_head *gd_bh;
1273 + unsigned long block_group;
1274 + struct ext3_sb_info *sbi;
1275 + struct super_block *sb;
1276 + struct ext3_buddy e3b;
1281 + printk ("ext3_free_blocks: nonexistent device");
1285 + ext3_mb_poll_new_transaction(sb, handle);
1287 + sbi = EXT3_SB(sb);
1288 + es = EXT3_SB(sb)->s_es;
1289 + if (block < le32_to_cpu(es->s_first_data_block) ||
1290 + block + count < block ||
1291 + block + count > le32_to_cpu(es->s_blocks_count)) {
1292 + ext3_error (sb, "ext3_free_blocks",
1293 + "Freeing blocks not in datazone - "
1294 + "block = %lu, count = %lu", block, count);
1295 + goto error_return;
1298 + ext3_debug("freeing block %lu\n", block);
1302 + block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1303 + EXT3_BLOCKS_PER_GROUP(sb);
1304 + bit = (block - le32_to_cpu(es->s_first_data_block)) %
1305 + EXT3_BLOCKS_PER_GROUP(sb);
1307 + * Check to see if we are freeing blocks across a group
1310 + if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1311 + overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1312 + count -= overflow;
1314 + brelse(bitmap_bh);
1315 + bitmap_bh = read_block_bitmap(sb, block_group);
1317 + goto error_return;
1318 + gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1320 + goto error_return;
1322 + if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1323 + in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1324 + in_range (block, le32_to_cpu(gdp->bg_inode_table),
1325 + EXT3_SB(sb)->s_itb_per_group) ||
1326 + in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1327 + EXT3_SB(sb)->s_itb_per_group))
1328 + ext3_error (sb, "ext3_free_blocks",
1329 + "Freeing blocks in system zones - "
1330 + "Block = %lu, count = %lu",
1333 + BUFFER_TRACE(bitmap_bh, "getting write access");
1334 + err = ext3_journal_get_write_access(handle, bitmap_bh);
1336 + goto error_return;
1339 + * We are about to modify some metadata. Call the journal APIs
1340 + * to unshare ->b_data if a currently-committing transaction is
1343 + BUFFER_TRACE(gd_bh, "get_write_access");
1344 + err = ext3_journal_get_write_access(handle, gd_bh);
1346 + goto error_return;
1348 + err = ext3_mb_load_desc(sb, block_group, &e3b);
1350 + goto error_return;
1353 + /* blocks being freed are metadata. these blocks shouldn't
1354 + * be used until this transaction is committed */
1355 + ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1357 + ext3_lock_group(sb, block_group);
1358 + mb_free_blocks(&e3b, bit, count);
1359 + gdp->bg_free_blocks_count =
1360 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1361 + ext3_unlock_group(sb, block_group);
1362 + percpu_counter_mod(&sbi->s_freeblocks_counter, count);
1365 + ext3_mb_dirty_buddy(&e3b);
1366 + ext3_mb_release_desc(&e3b);
1368 + /* FIXME: undo logic will be implemented later and another way */
1369 + mb_clear_bits(bitmap_bh->b_data, bit, count);
1370 + DQUOT_FREE_BLOCK(inode, count);
1372 + /* We dirtied the bitmap block */
1373 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1374 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1376 + /* And the group descriptor block */
1377 + BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1378 + ret = ext3_journal_dirty_metadata(handle, gd_bh);
1379 + if (!err) err = ret;
1381 + if (overflow && !err) {
1388 + brelse(bitmap_bh);
1389 + ext3_std_error(sb, err);
1393 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1395 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1396 + int free, ret = -ENOSPC;
1398 + BUG_ON(blocks < 0);
1399 + spin_lock(&sbi->s_reserve_lock);
1400 + free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1401 + if (blocks <= free - sbi->s_blocks_reserved) {
1402 + sbi->s_blocks_reserved += blocks;
1405 + spin_unlock(&sbi->s_reserve_lock);
1409 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1411 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1413 + BUG_ON(blocks < 0);
1414 + spin_lock(&sbi->s_reserve_lock);
1415 + sbi->s_blocks_reserved -= blocks;
1416 + WARN_ON(sbi->s_blocks_reserved < 0);
1417 + if (sbi->s_blocks_reserved < 0)
1418 + sbi->s_blocks_reserved = 0;
1419 + spin_unlock(&sbi->s_reserve_lock);
1422 +int ext3_new_block(handle_t *handle, struct inode *inode,
1423 + unsigned long goal, int *errp)
1427 + if (!test_opt(inode->i_sb, MBALLOC)) {
1428 + ret = ext3_new_block_old(handle, inode, goal, errp);
1432 + ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1438 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1439 + unsigned long block, unsigned long count, int metadata)
1441 + if (!test_opt(inode->i_sb, MBALLOC))
1442 + ext3_free_blocks_old(handle, inode, block, count);
1444 + ext3_mb_free_blocks(handle, inode, block, count, metadata);
1447 Index: linux-2.6.5-sles9/fs/ext3/super.c
1448 ===================================================================
1449 --- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
1450 +++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300
1452 struct ext3_super_block *es = sbi->s_es;
1455 + ext3_mb_release(sb);
1456 ext3_ext_release(sb);
1457 ext3_xattr_put_super(sb);
1458 journal_destroy(sbi->s_journal);
1460 Opt_commit, Opt_journal_update, Opt_journal_inum,
1461 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1462 Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
1463 - Opt_err, Opt_extents, Opt_extdebug
1464 + Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
1467 static match_table_t tokens = {
1469 {Opt_iopen_nopriv, "iopen_nopriv"},
1470 {Opt_extents, "extents"},
1471 {Opt_extdebug, "extdebug"},
1472 + {Opt_mballoc, "mballoc"},
1478 set_opt (sbi->s_mount_opt, EXTDEBUG);
1481 + set_opt (sbi->s_mount_opt, MBALLOC);
1485 "EXT3-fs: Unrecognized mount option \"%s\" "
1486 @@ -1463,7 +1468,8 @@
1487 ext3_count_dirs(sb));
1496 Index: linux-2.6.5-sles9/fs/ext3/Makefile
1497 ===================================================================
1498 --- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
1499 +++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
1501 obj-$(CONFIG_EXT3_FS) += ext3.o
1503 ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1504 - ioctl.o namei.o super.o symlink.o hash.o extents.o
1505 + ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
1507 ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
1508 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
1509 Index: linux-2.6.5-sles9/fs/ext3/balloc.c
1510 ===================================================================
1511 --- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300
1512 +++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
1515 * Return buffer_head on success or NULL in case of failure.
1517 -static struct buffer_head *
1518 +struct buffer_head *
1519 read_block_bitmap(struct super_block *sb, unsigned int block_group)
1521 struct ext3_group_desc * desc;
1525 /* Free given blocks, update quota and i_blocks field */
1526 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
1527 +void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
1528 unsigned long block, unsigned long count)
1530 struct buffer_head *bitmap_bh = NULL;
1531 @@ -1142,7 +1142,7 @@
1532 * bitmap, and then for any free bit if that fails.
1533 * This function also updates quota and i_blocks field.
1535 -int ext3_new_block(handle_t *handle, struct inode *inode,
1536 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
1537 unsigned long goal, int *errp)
1539 struct buffer_head *bitmap_bh = NULL;
1540 Index: linux-2.6.5-sles9/fs/ext3/namei.c
1541 ===================================================================
1542 --- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300
1543 +++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300
1544 @@ -1640,7 +1640,7 @@
1545 * If the create succeeds, we fill in the inode information
1546 * with d_instantiate().
1548 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1549 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1550 struct nameidata *nd)
1553 Index: linux-2.6.5-sles9/fs/ext3/inode.c
1554 ===================================================================
1555 --- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
1556 +++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300
1558 ext3_journal_forget(handle, branch[i].bh);
1560 for (i = 0; i < keys; i++)
1561 - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1562 + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1568 for (i = 0; i < num; i++)
1569 ext3_free_blocks(handle, inode,
1570 - le32_to_cpu(where[i].key), 1);
1571 + le32_to_cpu(where[i].key), 1, 1);
1575 @@ -1829,7 +1829,7 @@
1579 - ext3_free_blocks(handle, inode, block_to_free, count);
1580 + ext3_free_blocks(handle, inode, block_to_free, count, 1);
1584 @@ -2000,7 +2000,7 @@
1585 ext3_journal_test_restart(handle, inode);
1588 - ext3_free_blocks(handle, inode, nr, 1);
1589 + ext3_free_blocks(handle, inode, nr, 1, 1);
1593 Index: linux-2.6.5-sles9/fs/ext3/extents.c
1594 ===================================================================
1595 --- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
1596 +++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300
1598 for (i = 0; i < depth; i++) {
1601 - ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1602 + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1606 @@ -1391,7 +1391,7 @@
1607 path->p_idx->ei_leaf);
1608 bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
1609 ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
1610 - ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
1611 + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
1615 @@ -1879,10 +1879,12 @@
1616 int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1617 handle_t *handle = ext3_journal_start(tree->inode, needed);
1618 struct buffer_head *bh;
1620 + int i, metadata = 0;
1623 return PTR_ERR(handle);
1624 + if (S_ISDIR(tree->inode->i_mode))
1626 if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
1628 unsigned long num, start;
1629 @@ -1894,7 +1896,7 @@
1630 bh = sb_find_get_block(tree->inode->i_sb, start + i);
1631 ext3_forget(handle, 0, tree->inode, bh, start + i);
1633 - ext3_free_blocks(handle, tree->inode, start, num);
1634 + ext3_free_blocks(handle, tree->inode, start, num, metadata);
1635 } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
1636 printk("strange request: removal %lu-%lu from %u:%u\n",
1637 from, to, ex->ee_block, ex->ee_len);
1638 Index: linux-2.6.5-sles9/fs/ext3/xattr.c
1639 ===================================================================
1640 --- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300
1641 +++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300
1642 @@ -1366,7 +1366,7 @@
1643 new_bh = sb_getblk(sb, block);
1646 - ext3_free_blocks(handle, inode, block, 1);
1647 + ext3_free_blocks(handle, inode, block, 1, 1);
1651 @@ -1408,7 +1408,7 @@
1652 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
1653 /* Free the old block. */
1654 ea_bdebug(old_bh, "freeing");
1655 - ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
1656 + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
1658 /* ext3_forget() calls bforget() for us, but we
1659 let our caller release old_bh, so we need to
1660 @@ -1504,7 +1504,7 @@
1662 if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
1663 ext3_xattr_cache_remove(bh);
1664 - ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
1665 + ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
1667 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
1669 Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
1670 ===================================================================
1671 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
1672 +++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300
1674 #define ext3_debug(f, a...) do {} while (0)
1677 +#define EXT3_MULTIBLOCK_ALLOCATOR 1
1680 * Special inodes numbers
1683 #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
1684 #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
1685 #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
1686 +#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
1688 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1691 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
1692 extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
1693 extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1695 + unsigned long, int);
1696 extern unsigned long ext3_count_free_blocks (struct super_block *);
1697 extern void ext3_check_blocks_bitmap (struct super_block *);
1698 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1699 @@ -743,6 +746,13 @@
1700 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
1704 +extern int ext3_mb_init(struct super_block *sb);
1705 +extern int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
1706 + unsigned long goal,int *len, int flags,int *errp);
1707 +extern int ext3_mb_release(struct super_block *sb);
1708 +extern void ext3_mb_release_blocks(struct super_block *, int);
1711 extern int ext3_orphan_add(handle_t *, struct inode *);
1712 extern int ext3_orphan_del(handle_t *, struct inode *);
1713 Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
1714 ===================================================================
1715 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
1716 +++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300
1719 #include <linux/blockgroup_lock.h>
1720 #include <linux/percpu_counter.h>
1721 +#include <linux/list.h>
1724 #include <linux/rbtree.h>
1726 +#define EXT3_BB_MAX_BLOCKS 30
1727 +struct ext3_free_metadata {
1728 + unsigned short group;
1729 + unsigned short num;
1730 + unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1731 + struct list_head list;
1734 +#define EXT3_BB_MAX_ORDER 14
1736 +struct ext3_buddy_group_blocks {
1737 + sector_t bb_bitmap;
1738 + sector_t bb_buddy;
1739 + spinlock_t bb_lock;
1740 + unsigned bb_counters[EXT3_BB_MAX_ORDER];
1741 + struct ext3_free_metadata *bb_md_cur;
1742 + unsigned long bb_tid;
1746 * third extended-fs super-block data in memory
1749 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
1750 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
1753 + /* for buddy allocator */
1754 + struct ext3_buddy_group_blocks **s_buddy_blocks;
1755 + struct inode *s_buddy;
1756 + long s_blocks_reserved;
1757 + spinlock_t s_reserve_lock;
1758 + struct list_head s_active_transaction;
1759 + struct list_head s_closed_transaction;
1760 + struct list_head s_committed_transaction;
1761 + spinlock_t s_md_lock;
1762 + tid_t s_last_transaction;
1765 #endif /* _LINUX_EXT3_FS_SB */