1 Index: linux-2.4.24/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.4.24.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.4.24/fs/ext3/mballoc.c 2004-08-06 04:50:53.000000000 +0400
7 + * Copyright (c) 2004, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 + * GNU General Public License for more details.
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
26 + * mballoc.c contains the multiblocks allocation routines
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/locks.h>
33 +#include <linux/jbd.h>
34 +#include <linux/slab.h>
35 +#include <linux/ext3_fs.h>
36 +#include <linux/ext3_jbd.h>
37 +#include <linux/quotaops.h>
41 + * - do not scan from the beginning, try to remember first free block
42 + * - mb_mark_used_* may allocate chunk right after splitting buddy
43 + * - special flag to advice allocator to look for requested + N blocks
44 + * this may improve interaction between extents and mballoc
48 + * with AGRESSIVE_CHECK allocator runs consistency checks over
49 + * structures. this checks slow things down a lot
51 +#define AGGRESSIVE_CHECK__
57 +#define mb_debug(fmt,a...) printk(fmt, ##a)
59 +#define mb_debug(fmt,a...)
63 + * where to save buddies structures beetween umount/mount (clean case only)
65 +#define EXT3_BUDDY_FILE ".buddy"
68 + * max. number of chunks to be tracked in ext3_free_extent struct
70 +#define MB_ARR_SIZE 32
72 +struct ext3_allocation_context {
73 + struct super_block *ac_sb;
81 + /* the best found extent */
86 + /* number of iterations done. we have to track to limit searching */
88 + int ac_groups_scanned;
92 +#define AC_STATUS_CONTINUE 1
93 +#define AC_STATUS_FOUND 2
100 + struct buffer_head *bd_bh;
101 + struct buffer_head *bd_bh2;
102 + struct ext3_buddy_group_blocks *bd_bd;
103 + struct super_block *bd_sb;
106 +struct ext3_free_extent {
109 + unsigned char fe_orders[MB_ARR_SIZE];
110 + unsigned char fe_nums;
111 + unsigned char fe_back;
114 +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
117 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
118 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
119 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
120 +int ext3_mb_reserve_blocks(struct super_block *, int);
121 +void ext3_mb_release_blocks(struct super_block *, int);
122 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
123 +void ext3_mb_free_committed_blocks(struct super_block *);
124 +int load_block_bitmap (struct super_block *, unsigned int);
126 +struct buffer_head *
127 +read_block_bitmap_bh(struct super_block *sb, unsigned int block_group)
129 + struct buffer_head *bh;
132 + bitmap_nr = load_block_bitmap(sb, block_group);
136 + bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
140 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
145 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
146 + J_ASSERT(max != NULL);
148 + if (order > e3b->bd_blkbits + 1)
151 + /* at order 0 we see each particular block */
152 + *max = 1 << (e3b->bd_blkbits + 3);
154 + return e3b->bd_bitmap;
156 + bb = e3b->bd_buddy;
158 + while (i < order) {
159 + bb += 1 << (e3b->bd_blkbits - i);
166 +static int ext3_mb_load_desc(struct super_block *sb, int group,
167 + struct ext3_buddy *e3b)
169 + struct ext3_sb_info *sbi = EXT3_SB(sb);
171 + J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
172 + J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
175 + e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
176 + if (e3b->bd_bh == NULL) {
177 + ext3_error(sb, "ext3_mb_load_desc",
178 + "can't get block for buddy bitmap\n");
181 + if (!buffer_uptodate(e3b->bd_bh)) {
182 + ll_rw_block(READ, 1, &e3b->bd_bh);
183 + wait_on_buffer(e3b->bd_bh);
185 + J_ASSERT(buffer_uptodate(e3b->bd_bh));
188 + e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
189 + if (e3b->bd_bh2 == NULL) {
190 + ext3_error(sb, "ext3_mb_load_desc",
191 + "can't get block for buddy bitmap\n");
194 + if (!buffer_uptodate(e3b->bd_bh2)) {
195 + ll_rw_block(READ, 1, &e3b->bd_bh2);
196 + wait_on_buffer(e3b->bd_bh2);
198 + J_ASSERT(buffer_uptodate(e3b->bd_bh2));
200 + e3b->bd_bitmap = e3b->bd_bh->b_data;
201 + e3b->bd_buddy = e3b->bd_bh2->b_data;
202 + e3b->bd_blkbits = sb->s_blocksize_bits;
203 + e3b->bd_bd = sbi->s_buddy_blocks + group;
208 + brelse(e3b->bd_bh);
209 + brelse(e3b->bd_bh2);
211 + e3b->bd_bh2 = NULL;
215 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
217 + mark_buffer_dirty(e3b->bd_bh);
218 + mark_buffer_dirty(e3b->bd_bh2);
221 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
223 + brelse(e3b->bd_bh);
224 + brelse(e3b->bd_bh2);
227 +#ifdef AGGRESSIVE_CHECK
228 +static void mb_check_buddy(struct ext3_buddy *e3b)
230 + int order = e3b->bd_blkbits + 1;
231 + int max, max2, i, j, k, count;
232 + void *buddy, *buddy2;
234 + if (!test_opt(e3b->bd_sb, MBALLOC))
237 + while (order > 1) {
238 + buddy = mb_find_buddy(e3b, order, &max);
240 + buddy2 = mb_find_buddy(e3b, order - 1, &max2);
242 + J_ASSERT(buddy != buddy2);
243 + J_ASSERT(max * 2 == max2);
246 + for (i = 0; i < max; i++) {
248 + if (!test_bit(i, buddy)) {
249 + /* only single bit in buddy2 may be 1 */
250 + if (test_bit(i << 1, buddy2))
251 + J_ASSERT(!test_bit((i<<1)+1, buddy2));
252 + else if (test_bit((i << 1) + 1, buddy2))
253 + J_ASSERT(!test_bit(i << 1, buddy2));
257 + /* both bits in buddy2 must be 0 */
258 + J_ASSERT(!test_bit(i << 1, buddy2));
259 + J_ASSERT(!test_bit((i << 1) + 1, buddy2));
261 + for (j = 0; j < (1 << order); j++) {
262 + k = (i * (1 << order)) + j;
263 + J_ASSERT(test_bit(k, e3b->bd_bitmap));
267 + J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
271 + buddy = mb_find_buddy(e3b, 0, &max);
272 + for (i = 0; i < max; i++) {
273 + if (test_bit(i, buddy))
275 + /* check used bits only */
276 + for (j = 0; j < e3b->bd_blkbits + 1; j++) {
277 + buddy2 = mb_find_buddy(e3b, j, &max2);
279 + J_ASSERT(k < max2);
280 + J_ASSERT(!test_bit(k, buddy2));
285 +#define mb_check_buddy(e3b)
289 +ext3_lock_group(struct super_block *sb, int group)
291 + spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
295 +ext3_unlock_group(struct super_block *sb, int group)
297 + spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
300 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
305 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
306 + J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
308 + bb = e3b->bd_buddy;
309 + while (order <= e3b->bd_blkbits + 1) {
310 + block = block >> 1;
311 + if (test_bit(block, bb)) {
312 + /* this block is part of buddy of order 'order' */
315 + bb += 1 << (e3b->bd_blkbits - order);
321 +static inline void mb_clear_bits(void *bm, int cur, int len)
326 + while (cur < len) {
327 + if ((cur & 31) == 0 && (len - cur) >= 32) {
328 + /* fast path: clear whole word at once */
329 + addr = bm + (cur >> 3);
334 + clear_bit(cur, bm);
339 +static inline void mb_set_bits(void *bm, int cur, int len)
344 + while (cur < len) {
345 + if ((cur & 31) == 0 && (len - cur) >= 32) {
346 + /* fast path: clear whole word at once */
347 + addr = bm + (cur >> 3);
348 + *addr = 0xffffffff;
357 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
359 + int block, max, order;
360 + void *buddy, *buddy2;
362 + mb_check_buddy(e3b);
363 + while (count-- > 0) {
367 + J_ASSERT(!test_bit(block, e3b->bd_bitmap));
368 + set_bit(block, e3b->bd_bitmap);
369 + e3b->bd_bd->bb_counters[order]++;
371 + /* start of the buddy */
372 + buddy = mb_find_buddy(e3b, order, &max);
376 + if (!test_bit(block, buddy) ||
377 + !test_bit(block + 1, buddy))
380 + /* both the buddies are free, try to coalesce them */
381 + buddy2 = mb_find_buddy(e3b, order + 1, &max);
387 + /* for special purposes, we don't clear
388 + * free bits in bitmap */
389 + clear_bit(block, buddy);
390 + clear_bit(block + 1, buddy);
392 + e3b->bd_bd->bb_counters[order]--;
393 + e3b->bd_bd->bb_counters[order]--;
395 + block = block >> 1;
397 + e3b->bd_bd->bb_counters[order]++;
399 + set_bit(block, buddy2);
403 + mb_check_buddy(e3b);
409 + * returns 1 if out extent is enough to fill needed space
411 +int mb_make_backward_extent(struct ext3_free_extent *in,
412 + struct ext3_free_extent *out, int needed)
418 + J_ASSERT(in->fe_nums < MB_ARR_SIZE);
421 + out->fe_start = in->fe_start + in->fe_len;
424 + /* for single-chunk extent we need not back order
425 + * also, if an extent doesn't fill needed space
426 + * then it makes no sense to try back order becase
427 + * if we select this extent then it'll be use as is */
428 + if (in->fe_nums < 2 || in->fe_len < needed)
431 + i = in->fe_nums - 1;
432 + while (i >= 0 && out->fe_len < needed) {
433 + out->fe_len += (1 << in->fe_orders[i]);
434 + out->fe_start -= (1 << in->fe_orders[i]);
437 + /* FIXME: in some situation fe_orders may be too small to hold
438 + * all the buddies */
439 + J_ASSERT(out->fe_len >= needed);
441 + for (i++; i < in->fe_nums; i++)
442 + out->fe_orders[out->fe_nums++] = in->fe_orders[i];
443 + J_ASSERT(out->fe_nums < MB_ARR_SIZE);
449 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
450 + int needed, struct ext3_free_extent *ex)
452 + int space = needed;
453 + int next, max, ord;
456 + J_ASSERT(ex != NULL);
461 + buddy = mb_find_buddy(e3b, order, &max);
463 + J_ASSERT(block < max);
464 + if (!test_bit(block, buddy))
468 + /* find actual order */
469 + order = mb_find_order_for_block(e3b, block);
470 + block = block >> order;
473 + ex->fe_orders[ex->fe_nums++] = order;
474 + ex->fe_len = 1 << order;
475 + ex->fe_start = block << order;
478 + while ((space = space - (1 << order)) > 0) {
480 + buddy = mb_find_buddy(e3b, order, &max);
483 + if (block + 1 >= max)
486 + next = (block + 1) * (1 << order);
487 + if (!test_bit(next, e3b->bd_bitmap))
490 + ord = mb_find_order_for_block(e3b, next);
492 + if ((1 << ord) >= needed) {
493 + /* we dont want to coalesce with self-enough buddies */
497 + block = next >> order;
498 + ex->fe_len += 1 << order;
500 + if (ex->fe_nums < MB_ARR_SIZE)
501 + ex->fe_orders[ex->fe_nums++] = order;
505 + J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
509 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
510 + struct ext3_free_extent *ex, int len)
512 + int start = ex->fe_start, len0 = len;
513 + int ord, mlen, max, cur;
516 + start = ex->fe_start + ex->fe_len - 1;
518 + ord = mb_find_order_for_block(e3b, start);
519 + if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
520 + len >= (1 << ord)) {
521 + /* the whole chunk may be allocated at once! */
523 + buddy = mb_find_buddy(e3b, ord, &max);
524 + J_ASSERT((start >> ord) < max);
525 + clear_bit(start >> ord, buddy);
526 + e3b->bd_bd->bb_counters[ord]--;
529 + J_ASSERT(len >= 0);
530 + J_ASSERT(start >= 0);
534 + /* we have to split large buddy */
536 + buddy = mb_find_buddy(e3b, ord, &max);
537 + clear_bit(start >> ord, buddy);
538 + e3b->bd_bd->bb_counters[ord]--;
541 + cur = (start >> ord) & ~1U;
542 + buddy = mb_find_buddy(e3b, ord, &max);
543 + set_bit(cur, buddy);
544 + set_bit(cur + 1, buddy);
545 + e3b->bd_bd->bb_counters[ord]++;
546 + e3b->bd_bd->bb_counters[ord]++;
549 + /* now drop all the bits in bitmap */
550 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
552 + mb_check_buddy(e3b);
557 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
558 + struct ext3_free_extent *ex, int len)
560 + int start = ex->fe_start, len0 = len;
561 + int ord, mlen, max, cur;
565 + ord = mb_find_order_for_block(e3b, start);
567 + if (((start >> ord) << ord) == start && len >= (1 << ord)) {
568 + /* the whole chunk may be allocated at once! */
570 + buddy = mb_find_buddy(e3b, ord, &max);
571 + J_ASSERT((start >> ord) < max);
572 + clear_bit(start >> ord, buddy);
573 + e3b->bd_bd->bb_counters[ord]--;
576 + J_ASSERT(len >= 0);
580 + /* we have to split large buddy */
582 + buddy = mb_find_buddy(e3b, ord, &max);
583 + clear_bit(start >> ord, buddy);
584 + e3b->bd_bd->bb_counters[ord]--;
587 + cur = (start >> ord) & ~1U;
588 + buddy = mb_find_buddy(e3b, ord, &max);
589 + set_bit(cur, buddy);
590 + set_bit(cur + 1, buddy);
591 + e3b->bd_bd->bb_counters[ord]++;
592 + e3b->bd_bd->bb_counters[ord]++;
595 + /* now drop all the bits in bitmap */
596 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
598 + mb_check_buddy(e3b);
603 +int inline mb_mark_used(struct ext3_buddy *e3b,
604 + struct ext3_free_extent *ex, int len)
609 + if (ex->fe_back == 0)
610 + err = mb_mark_used_forward(e3b, ex, len);
612 + err = mb_mark_used_backward(e3b, ex, len);
616 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
617 + struct ext3_buddy *e3b, int group)
619 + struct super_block *sb = ac->ac_sb;
620 + int err, gorder, max, i;
621 + struct ext3_free_extent curex;
623 + /* let's know order of allocation */
625 + while (ac->ac_g_len > (1 << gorder))
628 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
629 + /* someone asks for space at this specified block
630 + * probably he wants to merge it into existing extent */
631 + if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
632 + /* good. at least one block is free */
633 + max = mb_find_extent(e3b, 0, ac->ac_g_start,
634 + ac->ac_g_len, &curex);
635 + max = min(curex.fe_len, ac->ac_g_len);
636 + mb_mark_used(e3b, &curex, max);
638 + ac->ac_b_group = group;
639 + ac->ac_b_start = curex.fe_start;
640 + ac->ac_b_len = max;
641 + ac->ac_status = AC_STATUS_FOUND;
645 + /* don't try to find goal anymore */
646 + ac->ac_g_flags &= ~1;
651 + i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
652 + if (i >= sb->s_blocksize * 8)
655 + max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
656 + if (max >= ac->ac_g_len) {
657 + max = min(curex.fe_len, ac->ac_g_len);
658 + mb_mark_used(e3b, &curex, max);
660 + ac->ac_b_group = group;
661 + ac->ac_b_start = curex.fe_start;
662 + ac->ac_b_len = max;
663 + ac->ac_status = AC_STATUS_FOUND;
675 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
677 + struct ext3_group_desc *gdp;
680 + gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
683 + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
684 + if (free_blocks == 0)
687 + /* someone wants this block very much */
688 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
691 + /* FIXME: I'd like to take fragmentation into account here */
693 + if (free_blocks >= ac->ac_g_len >> 1)
695 + } else if (cr == 1) {
696 + if (free_blocks >= ac->ac_g_len >> 2)
698 + } else if (cr == 2) {
706 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
707 + unsigned long goal, int *len, int flags, int *errp)
709 + struct buffer_head *bitmap_bh = NULL;
710 + struct ext3_allocation_context ac;
711 + int i, group, block, cr, err = 0;
712 + struct ext3_group_desc *gdp;
713 + struct ext3_super_block *es;
714 + struct buffer_head *gdp_bh;
715 + struct ext3_sb_info *sbi;
716 + struct super_block *sb;
717 + struct ext3_buddy e3b;
719 + J_ASSERT(len != NULL);
720 + J_ASSERT(*len > 0);
724 + printk("ext3_mb_new_nblocks: nonexistent device");
728 + ext3_mb_poll_new_transaction(sb, handle);
731 + es = EXT3_SB(sb)->s_es;
733 + if (!(flags & 2)) {
734 + /* someone asks for non-reserved blocks */
736 + err = ext3_mb_reserve_blocks(sb, 1);
744 + * Check quota for allocation of this blocks.
746 + while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
754 + /* start searching from the goal */
755 + if (goal < le32_to_cpu(es->s_first_data_block) ||
756 + goal >= le32_to_cpu(es->s_blocks_count))
757 + goal = le32_to_cpu(es->s_first_data_block);
758 + group = (goal - le32_to_cpu(es->s_first_data_block)) /
759 + EXT3_BLOCKS_PER_GROUP(sb);
760 + block = ((goal - le32_to_cpu(es->s_first_data_block)) %
761 + EXT3_BLOCKS_PER_GROUP(sb));
763 + /* set up allocation goals */
764 + ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
766 + ac.ac_groups_scanned = 0;
767 + ac.ac_sb = inode->i_sb;
768 + ac.ac_g_group = group;
769 + ac.ac_g_start = block;
770 + ac.ac_g_len = *len;
771 + ac.ac_g_flags = flags;
773 + /* loop over the groups */
774 + for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
775 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
776 + if (group == EXT3_SB(sb)->s_groups_count)
779 + /* check is group good for our criteries */
780 + if (!mb_good_group(&ac, group, cr))
783 + err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
787 + ext3_lock_group(sb, group);
788 + if (!mb_good_group(&ac, group, cr)) {
789 + /* someone did allocation from this group */
790 + ext3_unlock_group(sb, group);
791 + ext3_mb_release_desc(&e3b);
795 + err = ext3_mb_new_in_group(&ac, &e3b, group);
796 + ext3_unlock_group(sb, group);
797 + if (ac.ac_status == AC_STATUS_FOUND)
798 + ext3_mb_dirty_buddy(&e3b);
799 + ext3_mb_release_desc(&e3b);
802 + if (ac.ac_status == AC_STATUS_FOUND)
807 + if (ac.ac_status != AC_STATUS_FOUND) {
808 + /* unfortunately, we can't satisfy this request */
809 + J_ASSERT(ac.ac_b_len == 0);
810 + DQUOT_FREE_BLOCK(inode, *len);
816 + /* good news - free block(s) have been found. now it's time
817 + * to mark block(s) in good old journaled bitmap */
818 + block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
819 + + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
821 + /* we made a desicion, now mark found blocks in good old
822 + * bitmap to be journaled */
824 + ext3_debug("using block group %d(%d)\n",
825 + ac.ac_b_group.group, gdp->bg_free_blocks_count);
827 + bitmap_bh = read_block_bitmap_bh(sb, ac.ac_b_group);
833 + err = ext3_journal_get_write_access(handle, bitmap_bh);
839 + gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
845 + err = ext3_journal_get_write_access(handle, gdp_bh);
849 + block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
850 + + le32_to_cpu(es->s_first_data_block);
852 + if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
853 + block == le32_to_cpu(gdp->bg_inode_bitmap) ||
854 + in_range(block, le32_to_cpu(gdp->bg_inode_table),
855 + EXT3_SB(sb)->s_itb_per_group))
856 + ext3_error(sb, "ext3_new_block",
857 + "Allocating block in system zone - "
858 + "block = %u", block);
860 + for (i = 0; i < ac.ac_b_len; i++)
861 + J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
863 + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
865 + ext3_lock_group(sb, ac.ac_b_group);
866 + gdp->bg_free_blocks_count =
867 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
869 + ext3_unlock_group(sb, ac.ac_b_group);
870 + spin_lock(&sbi->s_md_lock);
871 + es->s_free_blocks_count =
872 + cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - ac.ac_b_len);
873 + spin_unlock(&sbi->s_md_lock);
875 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
878 + err = ext3_journal_dirty_metadata(handle, gdp_bh);
885 + /* drop non-allocated, but dquote'd blocks */
886 + J_ASSERT(*len >= ac.ac_b_len);
887 + DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
889 + *len = ac.ac_b_len;
890 + J_ASSERT(block != 0);
894 + /* if we've already allocated something, roll it back */
895 + if (ac.ac_status == AC_STATUS_FOUND) {
896 + /* FIXME: free blocks here */
899 + DQUOT_FREE_BLOCK(inode, *len);
903 + if (!(flags & 2)) {
904 + /* block wasn't reserved before and we reserved it
905 + * at the beginning of allocation. it doesn't matter
906 + * whether we allocated anything or we failed: time
907 + * to release reservation. NOTE: because I expect
908 + * any multiblock request from delayed allocation
909 + * path only, here is single block always */
910 + ext3_mb_release_blocks(sb, 1);
915 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
917 + struct buffer_head *bh;
918 + int i, err, count = 0;
919 + struct ext3_buddy e3b;
921 + err = ext3_mb_load_desc(sb, group, &e3b);
924 + memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
925 + memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
927 + bh = read_block_bitmap_bh(sb, group);
933 + /* loop over the blocks, nad create buddies for free ones */
934 + for (i = 0; i < sb->s_blocksize * 8; i++) {
935 + if (!test_bit(i, (void *) bh->b_data)) {
936 + mb_free_blocks(&e3b, i, 1);
940 + mb_check_buddy(&e3b);
941 + ext3_mb_dirty_buddy(&e3b);
944 + ext3_mb_release_desc(&e3b);
949 +#define MB_CREDITS \
950 + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS)
952 +int ext3_mb_init_backend(struct super_block *sb)
954 + struct inode *root = sb->s_root->d_inode;
955 + struct ext3_sb_info *sbi = EXT3_SB(sb);
960 + sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
961 + sbi->s_groups_count, GFP_KERNEL);
962 + if (sbi->s_buddy_blocks == NULL) {
963 + printk("can't allocate mem for buddy maps\n");
966 + memset(sbi->s_buddy_blocks, 0,
967 + sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
968 + sbi->s_buddy = NULL;
970 + down(&root->i_sem);
971 + db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
972 + strlen(EXT3_BUDDY_FILE));
975 + printk("can't lookup buddy file: %d\n", err);
979 + if (db->d_inode != NULL) {
980 + sbi->s_buddy = igrab(db->d_inode);
984 + err = ext3_create(root, db, S_IFREG, NULL);
986 + printk("error while creation buddy file: %d\n", err);
988 + sbi->s_buddy = igrab(db->d_inode);
992 + for (i = 0; i < sbi->s_groups_count; i++) {
993 + struct buffer_head *bh = NULL;
996 + handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
997 + if (IS_ERR(handle)) {
998 + err = PTR_ERR(handle);
1002 + /* allocate block for bitmap */
1003 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1005 + printk("can't get block for buddy bitmap: %d\n", err);
1008 + sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
1011 + /* allocate block for buddy */
1012 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1014 + printk("can't get block for buddy: %d\n", err);
1017 + sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
1019 + ext3_journal_stop(handle, sbi->s_buddy);
1020 + spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
1021 + sbi->s_buddy_blocks[i].bb_md_cur = NULL;
1022 + sbi->s_buddy_blocks[i].bb_tid = 0;
1025 + if ((target = log_start_commit(sbi->s_journal, NULL)))
1026 + log_wait_commit(sbi->s_journal, target);
1035 +int ext3_mb_release(struct super_block *sb)
1037 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1039 + if (!test_opt(sb, MBALLOC))
1042 + /* release freed, non-committed blocks */
1043 + spin_lock(&sbi->s_md_lock);
1044 + list_splice_init(&sbi->s_closed_transaction,
1045 + &sbi->s_committed_transaction);
1046 + list_splice_init(&sbi->s_active_transaction,
1047 + &sbi->s_committed_transaction);
1048 + spin_unlock(&sbi->s_md_lock);
1049 + ext3_mb_free_committed_blocks(sb);
1051 + if (sbi->s_buddy_blocks)
1052 + kfree(sbi->s_buddy_blocks);
1054 + iput(sbi->s_buddy);
1055 + if (sbi->s_blocks_reserved)
1056 + printk("ext3-fs: %ld blocks being reserved at umount!\n",
1057 + sbi->s_blocks_reserved);
1061 +int ext3_mb_init(struct super_block *sb)
1063 + struct ext3_super_block *es;
1066 + if (!test_opt(sb, MBALLOC))
1069 + /* init file for buddy data */
1070 + clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1071 + ext3_mb_init_backend(sb);
1073 + es = EXT3_SB(sb)->s_es;
1074 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1075 + ext3_mb_generate_buddy(sb, i);
1076 + spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1077 + spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1078 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1079 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1080 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1081 + set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1082 + printk("EXT3-fs: mballoc enabled\n");
1086 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1088 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1089 + int err, i, count = 0, count2 = 0;
1090 + struct ext3_free_metadata *md;
1091 + struct ext3_buddy e3b;
1093 + if (list_empty(&sbi->s_committed_transaction))
1096 + /* there is committed blocks to be freed yet */
1098 + /* get next array of blocks */
1100 + spin_lock(&sbi->s_md_lock);
1101 + if (!list_empty(&sbi->s_committed_transaction)) {
1102 + md = list_entry(sbi->s_committed_transaction.next,
1103 + struct ext3_free_metadata, list);
1104 + list_del(&md->list);
1106 + spin_unlock(&sbi->s_md_lock);
1111 + mb_debug("gonna free %u blocks in group %u (0x%p):",
1112 + md->num, md->group, md);
1114 + err = ext3_mb_load_desc(sb, md->group, &e3b);
1117 + /* there are blocks to put in buddy to make them really free */
1120 + ext3_lock_group(sb, md->group);
1121 + for (i = 0; i < md->num; i++) {
1122 + mb_debug(" %u", md->blocks[i]);
1123 + mb_free_blocks(&e3b, md->blocks[i], 1);
1126 + ext3_unlock_group(sb, md->group);
1129 + ext3_mb_dirty_buddy(&e3b);
1130 + ext3_mb_release_desc(&e3b);
1133 + mb_debug("freed %u blocks in %u structures\n", count, count2);
1136 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1138 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1140 + if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1143 + /* new transaction! time to close last one and free blocks for
1144 + * committed transaction. we know that only transaction can be
1145 + * active, so previos transaction can be being logged and we
1146 + * know that transaction before previous is known to be alreade
1147 + * logged. this means that now we may free blocks freed in all
1148 + * transactions before previous one. hope I'm clear enough ... */
1150 + spin_lock(&sbi->s_md_lock);
1151 + if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1152 + mb_debug("new transaction %lu, old %lu\n",
1153 + (unsigned long) handle->h_transaction->t_tid,
1154 + (unsigned long) sbi->s_last_transaction);
1155 + list_splice_init(&sbi->s_closed_transaction,
1156 + &sbi->s_committed_transaction);
1157 + list_splice_init(&sbi->s_active_transaction,
1158 + &sbi->s_closed_transaction);
1159 + sbi->s_last_transaction = handle->h_transaction->t_tid;
1161 + spin_unlock(&sbi->s_md_lock);
1163 + ext3_mb_free_committed_blocks(sb);
1166 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1167 + int group, int block, int count)
1169 + struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1170 + struct super_block *sb = e3b->bd_sb;
1171 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1172 + struct ext3_free_metadata *md;
1175 + ext3_lock_group(sb, group);
1176 + for (i = 0; i < count; i++) {
1177 + md = db->bb_md_cur;
1178 + if (md && db->bb_tid != handle->h_transaction->t_tid) {
1179 + db->bb_md_cur = NULL;
1184 + ext3_unlock_group(sb, group);
1185 + md = kmalloc(sizeof(*md), GFP_KERNEL);
1189 + md->group = group;
1191 + ext3_lock_group(sb, group);
1192 + if (db->bb_md_cur == NULL) {
1193 + spin_lock(&sbi->s_md_lock);
1194 + list_add(&md->list, &sbi->s_active_transaction);
1195 + spin_unlock(&sbi->s_md_lock);
1196 + db->bb_md_cur = md;
1197 + db->bb_tid = handle->h_transaction->t_tid;
1198 + mb_debug("new md 0x%p for group %u\n",
1202 + md = db->bb_md_cur;
1206 + BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1207 + md->blocks[md->num] = block + i;
1209 + if (md->num == EXT3_BB_MAX_BLOCKS) {
1210 + /* no more space, put full container on a sb's list */
1211 + db->bb_md_cur = NULL;
1214 + ext3_unlock_group(sb, group);
1218 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1219 + unsigned long block, unsigned long count, int metadata)
1221 + struct buffer_head *bitmap_bh = NULL;
1222 + struct ext3_group_desc *gdp;
1223 + struct ext3_super_block *es;
1224 + unsigned long bit, overflow;
1225 + struct buffer_head *gd_bh;
1226 + unsigned long block_group;
1227 + struct ext3_sb_info *sbi;
1228 + struct super_block *sb;
1229 + struct ext3_buddy e3b;
1234 + printk ("ext3_free_blocks: nonexistent device");
1238 + ext3_mb_poll_new_transaction(sb, handle);
1240 + sbi = EXT3_SB(sb);
1241 + es = EXT3_SB(sb)->s_es;
1242 + if (block < le32_to_cpu(es->s_first_data_block) ||
1243 + block + count < block ||
1244 + block + count > le32_to_cpu(es->s_blocks_count)) {
1245 + ext3_error (sb, "ext3_free_blocks",
1246 + "Freeing blocks not in datazone - "
1247 + "block = %lu, count = %lu", block, count);
1248 + goto error_return;
1251 + ext3_debug("freeing block %lu\n", block);
1255 + block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1256 + EXT3_BLOCKS_PER_GROUP(sb);
1257 + bit = (block - le32_to_cpu(es->s_first_data_block)) %
1258 + EXT3_BLOCKS_PER_GROUP(sb);
1260 + * Check to see if we are freeing blocks across a group
1263 + if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1264 + overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1265 + count -= overflow;
1267 + bitmap_bh = read_block_bitmap_bh(sb, block_group);
1269 + goto error_return;
1270 + gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1272 + goto error_return;
1274 + if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1275 + in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1276 + in_range (block, le32_to_cpu(gdp->bg_inode_table),
1277 + EXT3_SB(sb)->s_itb_per_group) ||
1278 + in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1279 + EXT3_SB(sb)->s_itb_per_group))
1280 + ext3_error (sb, "ext3_free_blocks",
1281 + "Freeing blocks in system zones - "
1282 + "Block = %lu, count = %lu",
1285 + BUFFER_TRACE(bitmap_bh, "getting write access");
1286 + err = ext3_journal_get_write_access(handle, bitmap_bh);
1288 + goto error_return;
1291 + * We are about to modify some metadata. Call the journal APIs
1292 + * to unshare ->b_data if a currently-committing transaction is
1295 + BUFFER_TRACE(gd_bh, "get_write_access");
1296 + err = ext3_journal_get_write_access(handle, gd_bh);
1298 + goto error_return;
1300 + err = ext3_mb_load_desc(sb, block_group, &e3b);
1302 + goto error_return;
1305 + /* blocks being freed are metadata. these blocks shouldn't
1306 + * be used until this transaction is committed */
1307 + ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1309 + ext3_lock_group(sb, block_group);
1310 + mb_free_blocks(&e3b, bit, count);
1311 + gdp->bg_free_blocks_count =
1312 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1313 + ext3_unlock_group(sb, block_group);
1314 + spin_lock(&sbi->s_md_lock);
1315 + es->s_free_blocks_count =
1316 + cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) + count);
1317 + spin_unlock(&sbi->s_md_lock);
1320 + ext3_mb_dirty_buddy(&e3b);
1321 + ext3_mb_release_desc(&e3b);
1323 + /* FIXME: undo logic will be implemented later and another way */
1324 + mb_clear_bits(bitmap_bh->b_data, bit, count);
1325 + DQUOT_FREE_BLOCK(inode, count);
1327 + /* We dirtied the bitmap block */
1328 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1329 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1331 + /* And the group descriptor block */
1332 + BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1333 + ret = ext3_journal_dirty_metadata(handle, gd_bh);
1334 + if (!err) err = ret;
1336 + if (overflow && !err) {
1343 + ext3_std_error(sb, err);
1347 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1349 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1350 + struct ext3_super_block *es;
1351 + int free, ret = -ENOSPC;
1353 + BUG_ON(blocks < 0);
1354 + es = EXT3_SB(sb)->s_es;
1355 + spin_lock(&sbi->s_reserve_lock);
1356 + free = le32_to_cpu(es->s_free_blocks_count);
1357 + if (blocks <= free - sbi->s_blocks_reserved) {
1358 + sbi->s_blocks_reserved += blocks;
1361 + spin_unlock(&sbi->s_reserve_lock);
1365 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1367 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1369 + BUG_ON(blocks < 0);
1370 + spin_lock(&sbi->s_reserve_lock);
1371 + sbi->s_blocks_reserved -= blocks;
1372 + if (sbi->s_blocks_reserved < 0)
1373 + printk("EXT3-fs: reserve leak %ld\n", sbi->s_blocks_reserved);
1374 + if (sbi->s_blocks_reserved < 0)
1375 + sbi->s_blocks_reserved = 0;
1376 + spin_unlock(&sbi->s_reserve_lock);
1379 +int ext3_new_block(handle_t *handle, struct inode *inode,
1380 + unsigned long goal, u32 *pc, u32 *pb, int *errp)
1384 + if (!test_opt(inode->i_sb, MBALLOC)) {
1385 + ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
1389 + ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1395 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1396 + unsigned long block, unsigned long count, int metadata)
1398 + if (!test_opt(inode->i_sb, MBALLOC))
1399 + ext3_free_blocks_old(handle, inode, block, count);
1401 + ext3_mb_free_blocks(handle, inode, block, count, metadata);
1405 Index: linux-2.4.24/fs/ext3/super.c
1406 ===================================================================
1407 --- linux-2.4.24.orig/fs/ext3/super.c 2004-08-06 03:59:09.000000000 +0400
1408 +++ linux-2.4.24/fs/ext3/super.c 2004-08-06 03:59:09.000000000 +0400
1410 kdev_t j_dev = sbi->s_journal->j_dev;
1413 + ext3_mb_release(sb);
1414 J_ASSERT(sbi->s_delete_inodes == 0);
1415 ext3_ext_release(sb);
1416 ext3_xattr_put_super(sb);
1418 else if (want_numeric(value, "journal", inum))
1421 + else if (!strcmp (this_char, "mballoc"))
1422 + set_opt (*mount_options, MBALLOC);
1423 else if (!strcmp (this_char, "noload"))
1424 set_opt (*mount_options, NOLOAD);
1425 else if (!strcmp (this_char, "data")) {
1426 @@ -1406,6 +1409,7 @@
1432 if (test_opt(sb, PDIROPS)) {
1433 printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
1434 Index: linux-2.4.24/fs/ext3/Makefile
1435 ===================================================================
1436 --- linux-2.4.24.orig/fs/ext3/Makefile 2004-08-06 03:59:07.000000000 +0400
1437 +++ linux-2.4.24/fs/ext3/Makefile 2004-08-06 03:59:09.000000000 +0400
1440 obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1441 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
1442 - xattr_trusted.o extents.o extents-in-ea.o
1443 + xattr_trusted.o extents.o extents-in-ea.o mballoc.o
1444 export-objs += extents.o
1446 obj-m := $(O_TARGET)
1447 Index: linux-2.4.24/fs/ext3/balloc.c
1448 ===================================================================
1449 --- linux-2.4.24.orig/fs/ext3/balloc.c 2004-08-06 00:42:23.000000000 +0400
1450 +++ linux-2.4.24/fs/ext3/balloc.c 2004-08-06 03:59:09.000000000 +0400
1452 * differentiating between a group for which we have never performed a bitmap
1453 * IO request, and a group for which the last bitmap read request failed.
1455 -static inline int load_block_bitmap (struct super_block * sb,
1456 - unsigned int block_group)
1457 +int load_block_bitmap (struct super_block * sb, unsigned int block_group)
1464 /* Free given blocks, update quota and i_blocks field */
1465 -void ext3_free_blocks (handle_t *handle, struct inode * inode,
1466 - unsigned long block, unsigned long count)
1467 +void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
1468 + unsigned long block, unsigned long count)
1470 struct buffer_head *bitmap_bh;
1471 struct buffer_head *gd_bh;
1473 * bitmap, and then for any free bit if that fails.
1474 * This function also updates quota and i_blocks field.
1476 -int ext3_new_block (handle_t *handle, struct inode * inode,
1477 - unsigned long goal, u32 * prealloc_count,
1478 - u32 * prealloc_block, int * errp)
1479 +int ext3_new_block_old (handle_t *handle, struct inode * inode,
1480 + unsigned long goal, u32 * prealloc_count,
1481 + u32 * prealloc_block, int * errp)
1483 struct buffer_head * bh, *bhtmp;
1484 struct buffer_head * bh2;
1485 Index: linux-2.4.24/fs/ext3/namei.c
1486 ===================================================================
1487 --- linux-2.4.24.orig/fs/ext3/namei.c 2004-08-06 03:59:09.000000000 +0400
1488 +++ linux-2.4.24/fs/ext3/namei.c 2004-08-06 03:59:09.000000000 +0400
1489 @@ -1944,7 +1944,7 @@
1490 * If the create succeeds, we fill in the inode information
1491 * with d_instantiate().
1493 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
1494 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
1497 struct inode * inode;
1498 Index: linux-2.4.24/fs/ext3/inode.c
1499 ===================================================================
1500 --- linux-2.4.24.orig/fs/ext3/inode.c 2004-08-06 03:59:09.000000000 +0400
1501 +++ linux-2.4.24/fs/ext3/inode.c 2004-08-06 03:59:09.000000000 +0400
1503 inode->u.ext3_i.i_prealloc_count = 0;
1504 inode->u.ext3_i.i_prealloc_block = 0;
1506 - ext3_free_blocks (inode, block, total);
1507 + ext3_free_blocks (inode, block, total, 1);
1512 ext3_journal_forget(handle, branch[i].bh);
1514 for (i = 0; i < keys; i++)
1515 - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1516 + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1522 for (i = 0; i < num; i++)
1523 ext3_free_blocks(handle, inode,
1524 - le32_to_cpu(where[i].key), 1);
1525 + le32_to_cpu(where[i].key), 1, 1);
1529 @@ -1650,7 +1650,7 @@
1533 - ext3_free_blocks(handle, inode, block_to_free, count);
1534 + ext3_free_blocks(handle, inode, block_to_free, count, 1);
1538 @@ -1821,7 +1821,7 @@
1539 ext3_journal_test_restart(handle, inode);
1542 - ext3_free_blocks(handle, inode, nr, 1);
1543 + ext3_free_blocks(handle, inode, nr, 1, 1);
1547 Index: linux-2.4.24/fs/ext3/extents.c
1548 ===================================================================
1549 --- linux-2.4.24.orig/fs/ext3/extents.c 2004-08-06 03:59:01.000000000 +0400
1550 +++ linux-2.4.24/fs/ext3/extents.c 2004-08-06 03:59:09.000000000 +0400
1552 for (i = 0; i < depth; i++) {
1555 - ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1556 + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1560 @@ -1385,7 +1385,7 @@
1561 path->p_idx->e_leaf);
1562 bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->e_leaf);
1563 ext3_forget(handle, 1, tree->inode, bh, path->p_idx->e_leaf);
1564 - ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1);
1565 + ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1, 1);
1569 @@ -1842,10 +1842,12 @@
1570 int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1571 handle_t *handle = ext3_journal_start(tree->inode, needed);
1572 struct buffer_head *bh;
1574 + int i, metadata = 0;
1577 return PTR_ERR(handle);
1578 + if (S_ISDIR(tree->inode->i_mode))
1580 if (from >= ex->e_block && to == ex->e_block + ex->e_num - 1) {
1582 unsigned long num, start;
1583 @@ -1857,7 +1859,7 @@
1584 bh = sb_get_hash_table(tree->inode->i_sb, start + i);
1585 ext3_forget(handle, 0, tree->inode, bh, start + i);
1587 - ext3_free_blocks(handle, tree->inode, start, num);
1588 + ext3_free_blocks(handle, tree->inode, start, num, metadata);
1589 } else if (from == ex->e_block && to <= ex->e_block + ex->e_num - 1) {
1590 printk("strange request: removal %lu-%lu from %u:%u\n",
1591 from, to, ex->e_block, ex->e_num);
1592 Index: linux-2.4.24/fs/ext3/xattr.c
1593 ===================================================================
1594 --- linux-2.4.24.orig/fs/ext3/xattr.c 2004-08-06 03:59:08.000000000 +0400
1595 +++ linux-2.4.24/fs/ext3/xattr.c 2004-08-06 03:59:09.000000000 +0400
1597 ext3_xattr_free_block(handle_t *handle, struct inode * inode,
1598 unsigned long block)
1600 - ext3_free_blocks(handle, inode, block, 1);
1601 + ext3_free_blocks(handle, inode, block, 1, 1);
1602 inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
1606 # define ext3_xattr_quota_free(inode) \
1607 DQUOT_FREE_BLOCK(inode, 1)
1608 # define ext3_xattr_free_block(handle, inode, block) \
1609 - ext3_free_blocks(handle, inode, block, 1)
1610 + ext3_free_blocks(handle, inode, block, 1, 1)
1613 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
1614 Index: linux-2.4.24/include/linux/ext3_fs.h
1615 ===================================================================
1616 --- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-08-06 03:59:09.000000000 +0400
1617 +++ linux-2.4.24/include/linux/ext3_fs.h 2004-08-06 03:59:09.000000000 +0400
1619 #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
1620 #define EXT3_MOUNT_EXTENTS 0x40000 /* Extents support */
1621 #define EXT3_MOUNT_EXTDEBUG 0x80000 /* Extents debug */
1622 +#define EXT3_MOUNT_MBALLOC 0x100000/* buddy allocation support */
1624 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1625 #ifndef _LINUX_EXT2_FS_H
1627 extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
1628 __u32 *, __u32 *, int *);
1629 extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1631 + unsigned long, int);
1632 extern unsigned long ext3_count_free_blocks (struct super_block *);
1633 extern void ext3_check_blocks_bitmap (struct super_block *);
1634 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1635 Index: linux-2.4.24/include/linux/ext3_fs_sb.h
1636 ===================================================================
1637 --- linux-2.4.24.orig/include/linux/ext3_fs_sb.h 2004-08-06 03:59:09.000000000 +0400
1638 +++ linux-2.4.24/include/linux/ext3_fs_sb.h 2004-08-06 04:01:55.000000000 +0400
1641 #include <linux/timer.h>
1642 #include <linux/wait.h>
1643 +#include <linux/list.h>
1649 #define EXT3_DELETE_THREAD
1651 +#define EXT3_BB_MAX_BLOCKS 30
1652 +struct ext3_free_metadata {
1653 + unsigned short group;
1654 + unsigned short num;
1655 + unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1656 + struct list_head list;
1659 +#define EXT3_BB_MAX_ORDER 14
1661 +struct ext3_buddy_group_blocks {
1662 + unsigned long bb_bitmap;
1663 + unsigned long bb_buddy;
1664 + spinlock_t bb_lock;
1665 + unsigned bb_counters[EXT3_BB_MAX_ORDER];
1666 + struct ext3_free_metadata *bb_md_cur;
1667 + unsigned long bb_tid;
1671 * third extended-fs super-block data in memory
1674 wait_queue_head_t s_delete_waiter_queue;
1678 + /* for buddy allocator */
1679 + struct ext3_buddy_group_blocks *s_buddy_blocks;
1680 + struct inode *s_buddy;
1681 + long s_blocks_reserved;
1682 + spinlock_t s_reserve_lock;
1683 + struct list_head s_active_transaction;
1684 + struct list_head s_closed_transaction;
1685 + struct list_head s_committed_transaction;
1686 + spinlock_t s_md_lock;
1687 + unsigned int s_last_transaction;
1690 #endif /* _LINUX_EXT3_FS_SB */
1691 Index: linux-2.4.24/include/asm-i386/bitops.h
1692 ===================================================================
1693 --- linux-2.4.24.orig/include/asm-i386/bitops.h 2004-08-06 01:43:20.000000000 +0400
1694 +++ linux-2.4.24/include/asm-i386/bitops.h 2004-08-06 03:59:09.000000000 +0400
1695 @@ -352,6 +352,67 @@
1699 + * find_first_bit - find the first set bit in a memory region
1700 + * @addr: The address to start the search at
1701 + * @size: The maximum size to search
1703 + * Returns the bit-number of the first set bit, not the number of the byte
1704 + * containing a bit.
1706 +static __inline__ int find_first_bit(const unsigned long *addr, unsigned size)
1711 + /* This looks at memory. Mark it volatile to tell gcc not to move it around */
1712 + __asm__ __volatile__(
1713 + "xorl %%eax,%%eax\n\t"
1716 + "leal -4(%%edi),%%edi\n\t"
1717 + "bsfl (%%edi),%%eax\n"
1718 + "1:\tsubl %%ebx,%%edi\n\t"
1719 + "shll $3,%%edi\n\t"
1720 + "addl %%edi,%%eax"
1721 + :"=a" (res), "=&c" (d0), "=&D" (d1)
1722 + :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
1727 + * find_next_bit - find the first set bit in a memory region
1728 + * @addr: The address to base the search on
1729 + * @offset: The bitnumber to start searching at
1730 + * @size: The maximum size to search
1732 +static __inline__ int find_next_bit(const unsigned long *addr, int size, int offset)
1734 + const unsigned long *p = addr + (offset >> 5);
1735 + int set = 0, bit = offset & 31, res;
1739 + * Look for nonzero in the first 32 bits:
1741 + __asm__("bsfl %1,%0\n\t"
1746 + : "r" (*p >> bit));
1747 + if (set < (32 - bit))
1748 + return set + offset;
1753 + * No set bit yet, search remaining full words for a bit
1755 + res = find_first_bit (p, size - 32 * (p - addr));
1756 + return (offset + set + res);
1760 * hweightN - returns the hamming weight of a N-bit word
1761 * @x: the word to weigh