1 Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
7 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 + * GNU General Public License for more details.
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
26 + * mballoc.c contains the multiblocks allocation routines
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/namei.h>
33 +#include <linux/jbd.h>
34 +#include <linux/ext3_fs.h>
35 +#include <linux/ext3_jbd.h>
36 +#include <linux/quotaops.h>
37 +#include <linux/buffer_head.h>
38 +#include <linux/module.h>
42 + * - do not scan from the beginning, try to remember first free block
43 + * - mb_mark_used_* may allocate chunk right after splitting buddy
44 + * - special flag to advice allocator to look for requested + N blocks
45 + * this may improve interaction between extents and mballoc
49 + * with AGRESSIVE_CHECK allocator runs consistency checks over
50 + * structures. this checks slow things down a lot
52 +#define AGGRESSIVE_CHECK__
58 +#define mb_debug(fmt,a...) printk(fmt, ##a)
60 +#define mb_debug(fmt,a...)
64 + * where to save buddies structures beetween umount/mount (clean case only)
66 +#define EXT3_BUDDY_FILE ".buddy"
69 + * max. number of chunks to be tracked in ext3_free_extent struct
71 +#define MB_ARR_SIZE 32
73 +struct ext3_allocation_context {
74 + struct super_block *ac_sb;
82 + /* the best found extent */
87 + /* number of iterations done. we have to track to limit searching */
89 + int ac_groups_scanned;
93 +#define AC_STATUS_CONTINUE 1
94 +#define AC_STATUS_FOUND 2
101 + struct buffer_head *bd_bh;
102 + struct buffer_head *bd_bh2;
103 + struct ext3_buddy_group_blocks *bd_bd;
104 + struct super_block *bd_sb;
107 +struct ext3_free_extent {
110 + unsigned char fe_orders[MB_ARR_SIZE];
111 + unsigned char fe_nums;
112 + unsigned char fe_back;
115 +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
118 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
119 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
120 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
121 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
122 +int ext3_mb_reserve_blocks(struct super_block *, int);
123 +void ext3_mb_release_blocks(struct super_block *, int);
124 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
125 +void ext3_mb_free_committed_blocks(struct super_block *);
127 +#define mb_correct_addr_and_bit(bit,addr) \
129 + if ((unsigned) addr & 1) { \
133 + if ((unsigned) addr & 2) { \
140 +static inline int mb_test_bit(int bit, void *addr)
142 + mb_correct_addr_and_bit(bit,addr);
143 + return test_bit(bit, addr);
146 +static inline void mb_set_bit(int bit, void *addr)
148 + mb_correct_addr_and_bit(bit,addr);
149 + set_bit(bit, addr);
152 +static inline void mb_clear_bit(int bit, void *addr)
154 + mb_correct_addr_and_bit(bit,addr);
155 + clear_bit(bit, addr);
158 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
163 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
164 + J_ASSERT(max != NULL);
166 + if (order > e3b->bd_blkbits + 1)
169 + /* at order 0 we see each particular block */
170 + *max = 1 << (e3b->bd_blkbits + 3);
172 + return e3b->bd_bitmap;
174 + bb = e3b->bd_buddy;
176 + while (i < order) {
177 + bb += 1 << (e3b->bd_blkbits - i);
184 +static int ext3_mb_load_desc(struct super_block *sb, int group,
185 + struct ext3_buddy *e3b)
187 + struct ext3_sb_info *sbi = EXT3_SB(sb);
189 + J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
190 + J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
193 + e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
194 + if (e3b->bd_bh == NULL) {
195 + ext3_error(sb, "ext3_mb_load_desc",
196 + "can't get block for buddy bitmap\n");
199 + if (!buffer_uptodate(e3b->bd_bh)) {
200 + ll_rw_block(READ, 1, &e3b->bd_bh);
201 + wait_on_buffer(e3b->bd_bh);
203 + J_ASSERT(buffer_uptodate(e3b->bd_bh));
206 + e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
207 + if (e3b->bd_bh2 == NULL) {
208 + ext3_error(sb, "ext3_mb_load_desc",
209 + "can't get block for buddy bitmap\n");
212 + if (!buffer_uptodate(e3b->bd_bh2)) {
213 + ll_rw_block(READ, 1, &e3b->bd_bh2);
214 + wait_on_buffer(e3b->bd_bh2);
216 + J_ASSERT(buffer_uptodate(e3b->bd_bh2));
218 + e3b->bd_bitmap = e3b->bd_bh->b_data;
219 + e3b->bd_buddy = e3b->bd_bh2->b_data;
220 + e3b->bd_blkbits = sb->s_blocksize_bits;
221 + e3b->bd_bd = sbi->s_buddy_blocks + group;
226 + brelse(e3b->bd_bh);
227 + brelse(e3b->bd_bh2);
229 + e3b->bd_bh2 = NULL;
233 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
235 + mark_buffer_dirty(e3b->bd_bh);
236 + mark_buffer_dirty(e3b->bd_bh2);
239 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
241 + brelse(e3b->bd_bh);
242 + brelse(e3b->bd_bh2);
245 +#ifdef AGGRESSIVE_CHECK
246 +static void mb_check_buddy(struct ext3_buddy *e3b)
248 + int order = e3b->bd_blkbits + 1;
249 + int max, max2, i, j, k, count;
250 + void *buddy, *buddy2;
252 + if (!test_opt(e3b->bd_sb, MBALLOC))
255 + while (order > 1) {
256 + buddy = mb_find_buddy(e3b, order, &max);
258 + buddy2 = mb_find_buddy(e3b, order - 1, &max2);
260 + J_ASSERT(buddy != buddy2);
261 + J_ASSERT(max * 2 == max2);
264 + for (i = 0; i < max; i++) {
266 + if (!mb_test_bit(i, buddy)) {
267 + /* only single bit in buddy2 may be 1 */
268 + if (mb_test_bit(i << 1, buddy2))
269 + J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
270 + else if (mb_test_bit((i << 1) + 1, buddy2))
271 + J_ASSERT(!mb_test_bit(i << 1, buddy2));
275 + /* both bits in buddy2 must be 0 */
276 + J_ASSERT(!mb_test_bit(i << 1, buddy2));
277 + J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
279 + for (j = 0; j < (1 << order); j++) {
280 + k = (i * (1 << order)) + j;
281 + J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
285 + J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
289 + buddy = mb_find_buddy(e3b, 0, &max);
290 + for (i = 0; i < max; i++) {
291 + if (mb_test_bit(i, buddy))
293 + /* check used bits only */
294 + for (j = 0; j < e3b->bd_blkbits + 1; j++) {
295 + buddy2 = mb_find_buddy(e3b, j, &max2);
297 + J_ASSERT(k < max2);
298 + J_ASSERT(!mb_test_bit(k, buddy2));
303 +#define mb_check_buddy(e3b)
307 +ext3_lock_group(struct super_block *sb, int group)
309 + spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
313 +ext3_unlock_group(struct super_block *sb, int group)
315 + spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
318 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
323 + J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
324 + J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
326 + bb = e3b->bd_buddy;
327 + while (order <= e3b->bd_blkbits + 1) {
328 + block = block >> 1;
329 + if (mb_test_bit(block, bb)) {
330 + /* this block is part of buddy of order 'order' */
333 + bb += 1 << (e3b->bd_blkbits - order);
339 +static inline void mb_clear_bits(void *bm, int cur, int len)
344 + while (cur < len) {
345 + if ((cur & 31) == 0 && (len - cur) >= 32) {
346 + /* fast path: clear whole word at once */
347 + addr = bm + (cur >> 3);
352 + mb_clear_bit(cur, bm);
357 +static inline void mb_set_bits(void *bm, int cur, int len)
362 + while (cur < len) {
363 + if ((cur & 31) == 0 && (len - cur) >= 32) {
364 + /* fast path: clear whole word at once */
365 + addr = bm + (cur >> 3);
366 + *addr = 0xffffffff;
370 + mb_set_bit(cur, bm);
375 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
377 + int block, max, order;
378 + void *buddy, *buddy2;
380 + mb_check_buddy(e3b);
381 + while (count-- > 0) {
385 + J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
386 + mb_set_bit(block, e3b->bd_bitmap);
387 + e3b->bd_bd->bb_counters[order]++;
389 + /* start of the buddy */
390 + buddy = mb_find_buddy(e3b, order, &max);
394 + if (!mb_test_bit(block, buddy) ||
395 + !mb_test_bit(block + 1, buddy))
398 + /* both the buddies are free, try to coalesce them */
399 + buddy2 = mb_find_buddy(e3b, order + 1, &max);
405 + /* for special purposes, we don't clear
406 + * free bits in bitmap */
407 + mb_clear_bit(block, buddy);
408 + mb_clear_bit(block + 1, buddy);
410 + e3b->bd_bd->bb_counters[order]--;
411 + e3b->bd_bd->bb_counters[order]--;
413 + block = block >> 1;
415 + e3b->bd_bd->bb_counters[order]++;
417 + mb_set_bit(block, buddy2);
421 + mb_check_buddy(e3b);
427 + * returns 1 if out extent is enough to fill needed space
429 +int mb_make_backward_extent(struct ext3_free_extent *in,
430 + struct ext3_free_extent *out, int needed)
436 + J_ASSERT(in->fe_nums < MB_ARR_SIZE);
439 + out->fe_start = in->fe_start + in->fe_len;
442 + /* for single-chunk extent we need not back order
443 + * also, if an extent doesn't fill needed space
444 + * then it makes no sense to try back order becase
445 + * if we select this extent then it'll be use as is */
446 + if (in->fe_nums < 2 || in->fe_len < needed)
449 + i = in->fe_nums - 1;
450 + while (i >= 0 && out->fe_len < needed) {
451 + out->fe_len += (1 << in->fe_orders[i]);
452 + out->fe_start -= (1 << in->fe_orders[i]);
455 + /* FIXME: in some situation fe_orders may be too small to hold
456 + * all the buddies */
457 + J_ASSERT(out->fe_len >= needed);
459 + for (i++; i < in->fe_nums; i++)
460 + out->fe_orders[out->fe_nums++] = in->fe_orders[i];
461 + J_ASSERT(out->fe_nums < MB_ARR_SIZE);
467 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
468 + int needed, struct ext3_free_extent *ex)
470 + int space = needed;
471 + int next, max, ord;
474 + J_ASSERT(ex != NULL);
479 + buddy = mb_find_buddy(e3b, order, &max);
481 + J_ASSERT(block < max);
482 + if (!mb_test_bit(block, buddy))
486 + /* find actual order */
487 + order = mb_find_order_for_block(e3b, block);
488 + block = block >> order;
491 + ex->fe_orders[ex->fe_nums++] = order;
492 + ex->fe_len = 1 << order;
493 + ex->fe_start = block << order;
496 + while ((space = space - (1 << order)) > 0) {
498 + buddy = mb_find_buddy(e3b, order, &max);
501 + if (block + 1 >= max)
504 + next = (block + 1) * (1 << order);
505 + if (!mb_test_bit(next, e3b->bd_bitmap))
508 + ord = mb_find_order_for_block(e3b, next);
510 + if ((1 << ord) >= needed) {
511 + /* we dont want to coalesce with self-enough buddies */
515 + block = next >> order;
516 + ex->fe_len += 1 << order;
518 + if (ex->fe_nums < MB_ARR_SIZE)
519 + ex->fe_orders[ex->fe_nums++] = order;
523 + J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
527 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
528 + struct ext3_free_extent *ex, int len)
530 + int start = ex->fe_start, len0 = len;
531 + int ord, mlen, max, cur;
534 + start = ex->fe_start + ex->fe_len - 1;
536 + ord = mb_find_order_for_block(e3b, start);
537 + if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
538 + len >= (1 << ord)) {
539 + /* the whole chunk may be allocated at once! */
541 + buddy = mb_find_buddy(e3b, ord, &max);
542 + J_ASSERT((start >> ord) < max);
543 + mb_clear_bit(start >> ord, buddy);
544 + e3b->bd_bd->bb_counters[ord]--;
547 + J_ASSERT(len >= 0);
548 + J_ASSERT(start >= 0);
552 + /* we have to split large buddy */
554 + buddy = mb_find_buddy(e3b, ord, &max);
555 + mb_clear_bit(start >> ord, buddy);
556 + e3b->bd_bd->bb_counters[ord]--;
559 + cur = (start >> ord) & ~1U;
560 + buddy = mb_find_buddy(e3b, ord, &max);
561 + mb_set_bit(cur, buddy);
562 + mb_set_bit(cur + 1, buddy);
563 + e3b->bd_bd->bb_counters[ord]++;
564 + e3b->bd_bd->bb_counters[ord]++;
567 + /* now drop all the bits in bitmap */
568 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
570 + mb_check_buddy(e3b);
575 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
576 + struct ext3_free_extent *ex, int len)
578 + int start = ex->fe_start, len0 = len;
579 + int ord, mlen, max, cur;
583 + ord = mb_find_order_for_block(e3b, start);
585 + if (((start >> ord) << ord) == start && len >= (1 << ord)) {
586 + /* the whole chunk may be allocated at once! */
588 + buddy = mb_find_buddy(e3b, ord, &max);
589 + J_ASSERT((start >> ord) < max);
590 + mb_clear_bit(start >> ord, buddy);
591 + e3b->bd_bd->bb_counters[ord]--;
594 + J_ASSERT(len >= 0);
598 + /* we have to split large buddy */
600 + buddy = mb_find_buddy(e3b, ord, &max);
601 + mb_clear_bit(start >> ord, buddy);
602 + e3b->bd_bd->bb_counters[ord]--;
605 + cur = (start >> ord) & ~1U;
606 + buddy = mb_find_buddy(e3b, ord, &max);
607 + mb_set_bit(cur, buddy);
608 + mb_set_bit(cur + 1, buddy);
609 + e3b->bd_bd->bb_counters[ord]++;
610 + e3b->bd_bd->bb_counters[ord]++;
613 + /* now drop all the bits in bitmap */
614 + mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
616 + mb_check_buddy(e3b);
621 +int inline mb_mark_used(struct ext3_buddy *e3b,
622 + struct ext3_free_extent *ex, int len)
627 + if (ex->fe_back == 0)
628 + err = mb_mark_used_forward(e3b, ex, len);
630 + err = mb_mark_used_backward(e3b, ex, len);
634 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
635 + struct ext3_buddy *e3b, int group)
637 + struct super_block *sb = ac->ac_sb;
638 + int err, gorder, max, i;
639 + struct ext3_free_extent curex;
641 + /* let's know order of allocation */
643 + while (ac->ac_g_len > (1 << gorder))
646 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
647 + /* someone asks for space at this specified block
648 + * probably he wants to merge it into existing extent */
649 + if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
650 + /* good. at least one block is free */
651 + max = mb_find_extent(e3b, 0, ac->ac_g_start,
652 + ac->ac_g_len, &curex);
653 + max = min(curex.fe_len, ac->ac_g_len);
654 + mb_mark_used(e3b, &curex, max);
656 + ac->ac_b_group = group;
657 + ac->ac_b_start = curex.fe_start;
658 + ac->ac_b_len = max;
659 + ac->ac_status = AC_STATUS_FOUND;
663 + /* don't try to find goal anymore */
664 + ac->ac_g_flags &= ~1;
669 + i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
670 + if (i >= sb->s_blocksize * 8)
673 + max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
674 + if (max >= ac->ac_g_len) {
675 + max = min(curex.fe_len, ac->ac_g_len);
676 + mb_mark_used(e3b, &curex, max);
678 + ac->ac_b_group = group;
679 + ac->ac_b_start = curex.fe_start;
680 + ac->ac_b_len = max;
681 + ac->ac_status = AC_STATUS_FOUND;
693 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
695 + struct ext3_group_desc *gdp;
698 + gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
701 + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
702 + if (free_blocks == 0)
705 + /* someone wants this block very much */
706 + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
709 + /* FIXME: I'd like to take fragmentation into account here */
711 + if (free_blocks >= ac->ac_g_len >> 1)
713 + } else if (cr == 1) {
714 + if (free_blocks >= ac->ac_g_len >> 2)
716 + } else if (cr == 2) {
724 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
725 + unsigned long goal, int *len, int flags, int *errp)
727 + struct buffer_head *bitmap_bh = NULL;
728 + struct ext3_allocation_context ac;
729 + int i, group, block, cr, err = 0;
730 + struct ext3_group_desc *gdp;
731 + struct ext3_super_block *es;
732 + struct buffer_head *gdp_bh;
733 + struct ext3_sb_info *sbi;
734 + struct super_block *sb;
735 + struct ext3_buddy e3b;
737 + J_ASSERT(len != NULL);
738 + J_ASSERT(*len > 0);
742 + printk("ext3_mb_new_nblocks: nonexistent device");
746 + if (!test_opt(sb, MBALLOC)) {
747 + static int ext3_mballoc_warning = 0;
748 + if (ext3_mballoc_warning == 0) {
749 + printk(KERN_ERR "EXT3-fs: multiblock request with "
750 + "mballoc disabled!\n");
751 + ext3_mballoc_warning++;
754 + err = ext3_new_block_old(handle, inode, goal, errp);
758 + ext3_mb_poll_new_transaction(sb, handle);
761 + es = EXT3_SB(sb)->s_es;
763 + if (!(flags & 2)) {
764 + /* someone asks for non-reserved blocks */
766 + err = ext3_mb_reserve_blocks(sb, 1);
774 + * Check quota for allocation of this blocks.
776 + while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
784 + /* start searching from the goal */
785 + if (goal < le32_to_cpu(es->s_first_data_block) ||
786 + goal >= le32_to_cpu(es->s_blocks_count))
787 + goal = le32_to_cpu(es->s_first_data_block);
788 + group = (goal - le32_to_cpu(es->s_first_data_block)) /
789 + EXT3_BLOCKS_PER_GROUP(sb);
790 + block = ((goal - le32_to_cpu(es->s_first_data_block)) %
791 + EXT3_BLOCKS_PER_GROUP(sb));
793 + /* set up allocation goals */
794 + ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
796 + ac.ac_groups_scanned = 0;
797 + ac.ac_sb = inode->i_sb;
798 + ac.ac_g_group = group;
799 + ac.ac_g_start = block;
800 + ac.ac_g_len = *len;
801 + ac.ac_g_flags = flags;
803 + /* loop over the groups */
804 + for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
805 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
806 + if (group == EXT3_SB(sb)->s_groups_count)
809 + /* check is group good for our criteries */
810 + if (!mb_good_group(&ac, group, cr))
813 + err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
817 + ext3_lock_group(sb, group);
818 + if (!mb_good_group(&ac, group, cr)) {
819 + /* someone did allocation from this group */
820 + ext3_unlock_group(sb, group);
821 + ext3_mb_release_desc(&e3b);
825 + err = ext3_mb_new_in_group(&ac, &e3b, group);
826 + ext3_unlock_group(sb, group);
827 + if (ac.ac_status == AC_STATUS_FOUND)
828 + ext3_mb_dirty_buddy(&e3b);
829 + ext3_mb_release_desc(&e3b);
832 + if (ac.ac_status == AC_STATUS_FOUND)
837 + if (ac.ac_status != AC_STATUS_FOUND) {
838 + /* unfortunately, we can't satisfy this request */
839 + J_ASSERT(ac.ac_b_len == 0);
840 + DQUOT_FREE_BLOCK(inode, *len);
846 + /* good news - free block(s) have been found. now it's time
847 + * to mark block(s) in good old journaled bitmap */
848 + block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
849 + + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
851 + /* we made a desicion, now mark found blocks in good old
852 + * bitmap to be journaled */
854 + ext3_debug("using block group %d(%d)\n",
855 + ac.ac_b_group.group, gdp->bg_free_blocks_count);
857 + bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
863 + err = ext3_journal_get_write_access(handle, bitmap_bh);
869 + gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
875 + err = ext3_journal_get_write_access(handle, gdp_bh);
879 + block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
880 + + le32_to_cpu(es->s_first_data_block);
882 + if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
883 + block == le32_to_cpu(gdp->bg_inode_bitmap) ||
884 + in_range(block, le32_to_cpu(gdp->bg_inode_table),
885 + EXT3_SB(sb)->s_itb_per_group))
886 + ext3_error(sb, "ext3_new_block",
887 + "Allocating block in system zone - "
888 + "block = %u", block);
890 + for (i = 0; i < ac.ac_b_len; i++)
891 + J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
893 + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
895 + ext3_lock_group(sb, ac.ac_b_group);
896 + gdp->bg_free_blocks_count =
897 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
899 + ext3_unlock_group(sb, ac.ac_b_group);
900 + percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
902 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
905 + err = ext3_journal_dirty_metadata(handle, gdp_bh);
913 + /* drop non-allocated, but dquote'd blocks */
914 + J_ASSERT(*len >= ac.ac_b_len);
915 + DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
917 + *len = ac.ac_b_len;
918 + J_ASSERT(block != 0);
922 + /* if we've already allocated something, roll it back */
923 + if (ac.ac_status == AC_STATUS_FOUND) {
924 + /* FIXME: free blocks here */
927 + DQUOT_FREE_BLOCK(inode, *len);
932 + if (!(flags & 2)) {
933 + /* block wasn't reserved before and we reserved it
934 + * at the beginning of allocation. it doesn't matter
935 + * whether we allocated anything or we failed: time
936 + * to release reservation. NOTE: because I expect
937 + * any multiblock request from delayed allocation
938 + * path only, here is single block always */
939 + ext3_mb_release_blocks(sb, 1);
944 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
946 + struct buffer_head *bh;
947 + int i, err, count = 0;
948 + struct ext3_buddy e3b;
950 + err = ext3_mb_load_desc(sb, group, &e3b);
953 + memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
954 + memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
956 + bh = read_block_bitmap(sb, group);
962 + /* loop over the blocks, nad create buddies for free ones */
963 + for (i = 0; i < sb->s_blocksize * 8; i++) {
964 + if (!mb_test_bit(i, (void *) bh->b_data)) {
965 + mb_free_blocks(&e3b, i, 1);
970 + mb_check_buddy(&e3b);
971 + ext3_mb_dirty_buddy(&e3b);
974 + ext3_mb_release_desc(&e3b);
979 +EXPORT_SYMBOL(ext3_mb_new_blocks);
981 +#define MB_CREDITS \
982 + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
983 + + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
985 +int ext3_mb_init_backend(struct super_block *sb)
987 + struct inode *root = sb->s_root->d_inode;
988 + struct ext3_sb_info *sbi = EXT3_SB(sb);
993 + sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
994 + sbi->s_groups_count, GFP_KERNEL);
995 + if (sbi->s_buddy_blocks == NULL) {
996 + printk("can't allocate mem for buddy maps\n");
999 + memset(sbi->s_buddy_blocks, 0,
1000 + sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
1001 + sbi->s_buddy = NULL;
1003 + down(&root->i_sem);
1004 + db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
1005 + strlen(EXT3_BUDDY_FILE));
1007 + err = PTR_ERR(db);
1008 + printk("can't lookup buddy file: %d\n", err);
1012 + if (db->d_inode != NULL) {
1013 + sbi->s_buddy = igrab(db->d_inode);
1017 + err = ext3_create(root, db, S_IFREG, NULL);
1019 + printk("error while creation buddy file: %d\n", err);
1021 + sbi->s_buddy = igrab(db->d_inode);
1025 + for (i = 0; i < sbi->s_groups_count; i++) {
1026 + struct buffer_head *bh = NULL;
1029 + handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1030 + if (IS_ERR(handle)) {
1031 + err = PTR_ERR(handle);
1035 + /* allocate block for bitmap */
1036 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1038 + printk("can't get block for buddy bitmap: %d\n", err);
1041 + sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
1044 + /* allocate block for buddy */
1045 + bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1047 + printk("can't get block for buddy: %d\n", err);
1050 + sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
1052 + ext3_journal_stop(handle);
1053 + spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
1054 + sbi->s_buddy_blocks[i].bb_md_cur = NULL;
1055 + sbi->s_buddy_blocks[i].bb_tid = 0;
1058 + if (journal_start_commit(sbi->s_journal, &target))
1059 + log_wait_commit(sbi->s_journal, target);
1068 +int ext3_mb_release(struct super_block *sb)
1070 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1072 + if (!test_opt(sb, MBALLOC))
1075 + /* release freed, non-committed blocks */
1076 + spin_lock(&sbi->s_md_lock);
1077 + list_splice_init(&sbi->s_closed_transaction,
1078 + &sbi->s_committed_transaction);
1079 + list_splice_init(&sbi->s_active_transaction,
1080 + &sbi->s_committed_transaction);
1081 + spin_unlock(&sbi->s_md_lock);
1082 + ext3_mb_free_committed_blocks(sb);
1084 + if (sbi->s_buddy_blocks)
1085 + kfree(sbi->s_buddy_blocks);
1087 + iput(sbi->s_buddy);
1088 + if (sbi->s_blocks_reserved)
1089 + printk("ext3-fs: %ld blocks being reserved at umount!\n",
1090 + sbi->s_blocks_reserved);
1094 +int ext3_mb_init(struct super_block *sb)
1096 + struct ext3_super_block *es;
1099 + if (!test_opt(sb, MBALLOC))
1102 + /* init file for buddy data */
1103 + clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1104 + ext3_mb_init_backend(sb);
1106 + es = EXT3_SB(sb)->s_es;
1107 + for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1108 + ext3_mb_generate_buddy(sb, i);
1109 + spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1110 + spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1111 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1112 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1113 + INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1114 + set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1115 + printk("EXT3-fs: mballoc enabled\n");
1119 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1121 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1122 + int err, i, count = 0, count2 = 0;
1123 + struct ext3_free_metadata *md;
1124 + struct ext3_buddy e3b;
1126 + if (list_empty(&sbi->s_committed_transaction))
1129 + /* there is committed blocks to be freed yet */
1131 + /* get next array of blocks */
1133 + spin_lock(&sbi->s_md_lock);
1134 + if (!list_empty(&sbi->s_committed_transaction)) {
1135 + md = list_entry(sbi->s_committed_transaction.next,
1136 + struct ext3_free_metadata, list);
1137 + list_del(&md->list);
1139 + spin_unlock(&sbi->s_md_lock);
1144 + mb_debug("gonna free %u blocks in group %u (0x%p):",
1145 + md->num, md->group, md);
1147 + err = ext3_mb_load_desc(sb, md->group, &e3b);
1150 + /* there are blocks to put in buddy to make them really free */
1153 + ext3_lock_group(sb, md->group);
1154 + for (i = 0; i < md->num; i++) {
1155 + mb_debug(" %u", md->blocks[i]);
1156 + mb_free_blocks(&e3b, md->blocks[i], 1);
1159 + ext3_unlock_group(sb, md->group);
1162 + ext3_mb_dirty_buddy(&e3b);
1163 + ext3_mb_release_desc(&e3b);
1166 + mb_debug("freed %u blocks in %u structures\n", count, count2);
1169 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1171 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1173 + if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1176 + /* new transaction! time to close last one and free blocks for
1177 + * committed transaction. we know that only transaction can be
1178 + * active, so previos transaction can be being logged and we
1179 + * know that transaction before previous is known to be alreade
1180 + * logged. this means that now we may free blocks freed in all
1181 + * transactions before previous one. hope I'm clear enough ... */
1183 + spin_lock(&sbi->s_md_lock);
1184 + if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1185 + mb_debug("new transaction %lu, old %lu\n",
1186 + (unsigned long) handle->h_transaction->t_tid,
1187 + (unsigned long) sbi->s_last_transaction);
1188 + list_splice_init(&sbi->s_closed_transaction,
1189 + &sbi->s_committed_transaction);
1190 + list_splice_init(&sbi->s_active_transaction,
1191 + &sbi->s_closed_transaction);
1192 + sbi->s_last_transaction = handle->h_transaction->t_tid;
1194 + spin_unlock(&sbi->s_md_lock);
1196 + ext3_mb_free_committed_blocks(sb);
1199 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1200 + int group, int block, int count)
1202 + struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1203 + struct super_block *sb = e3b->bd_sb;
1204 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1205 + struct ext3_free_metadata *md;
1208 + ext3_lock_group(sb, group);
1209 + for (i = 0; i < count; i++) {
1210 + md = db->bb_md_cur;
1211 + if (md && db->bb_tid != handle->h_transaction->t_tid) {
1212 + db->bb_md_cur = NULL;
1217 + ext3_unlock_group(sb, group);
1218 + md = kmalloc(sizeof(*md), GFP_KERNEL);
1222 + md->group = group;
1224 + ext3_lock_group(sb, group);
1225 + if (db->bb_md_cur == NULL) {
1226 + spin_lock(&sbi->s_md_lock);
1227 + list_add(&md->list, &sbi->s_active_transaction);
1228 + spin_unlock(&sbi->s_md_lock);
1229 + db->bb_md_cur = md;
1230 + db->bb_tid = handle->h_transaction->t_tid;
1231 + mb_debug("new md 0x%p for group %u\n",
1235 + md = db->bb_md_cur;
1239 + BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1240 + md->blocks[md->num] = block + i;
1242 + if (md->num == EXT3_BB_MAX_BLOCKS) {
1243 + /* no more space, put full container on a sb's list */
1244 + db->bb_md_cur = NULL;
1247 + ext3_unlock_group(sb, group);
1251 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1252 + unsigned long block, unsigned long count, int metadata)
1254 + struct buffer_head *bitmap_bh = NULL;
1255 + struct ext3_group_desc *gdp;
1256 + struct ext3_super_block *es;
1257 + unsigned long bit, overflow;
1258 + struct buffer_head *gd_bh;
1259 + unsigned long block_group;
1260 + struct ext3_sb_info *sbi;
1261 + struct super_block *sb;
1262 + struct ext3_buddy e3b;
1267 + printk ("ext3_free_blocks: nonexistent device");
1271 + ext3_mb_poll_new_transaction(sb, handle);
1273 + sbi = EXT3_SB(sb);
1274 + es = EXT3_SB(sb)->s_es;
1275 + if (block < le32_to_cpu(es->s_first_data_block) ||
1276 + block + count < block ||
1277 + block + count > le32_to_cpu(es->s_blocks_count)) {
1278 + ext3_error (sb, "ext3_free_blocks",
1279 + "Freeing blocks not in datazone - "
1280 + "block = %lu, count = %lu", block, count);
1281 + goto error_return;
1284 + ext3_debug("freeing block %lu\n", block);
1288 + block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1289 + EXT3_BLOCKS_PER_GROUP(sb);
1290 + bit = (block - le32_to_cpu(es->s_first_data_block)) %
1291 + EXT3_BLOCKS_PER_GROUP(sb);
1293 + * Check to see if we are freeing blocks across a group
1296 + if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1297 + overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1298 + count -= overflow;
1300 + brelse(bitmap_bh);
1301 + bitmap_bh = read_block_bitmap(sb, block_group);
1303 + goto error_return;
1304 + gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1306 + goto error_return;
1308 + if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1309 + in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1310 + in_range (block, le32_to_cpu(gdp->bg_inode_table),
1311 + EXT3_SB(sb)->s_itb_per_group) ||
1312 + in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1313 + EXT3_SB(sb)->s_itb_per_group))
1314 + ext3_error (sb, "ext3_free_blocks",
1315 + "Freeing blocks in system zones - "
1316 + "Block = %lu, count = %lu",
1319 + BUFFER_TRACE(bitmap_bh, "getting write access");
1320 + err = ext3_journal_get_write_access(handle, bitmap_bh);
1322 + goto error_return;
1325 + * We are about to modify some metadata. Call the journal APIs
1326 + * to unshare ->b_data if a currently-committing transaction is
1329 + BUFFER_TRACE(gd_bh, "get_write_access");
1330 + err = ext3_journal_get_write_access(handle, gd_bh);
1332 + goto error_return;
1334 + err = ext3_mb_load_desc(sb, block_group, &e3b);
1336 + goto error_return;
1339 + /* blocks being freed are metadata. these blocks shouldn't
1340 + * be used until this transaction is committed */
1341 + ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1343 + ext3_lock_group(sb, block_group);
1344 + mb_free_blocks(&e3b, bit, count);
1345 + gdp->bg_free_blocks_count =
1346 + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1347 + ext3_unlock_group(sb, block_group);
1348 + percpu_counter_mod(&sbi->s_freeblocks_counter, count);
1351 + ext3_mb_dirty_buddy(&e3b);
1352 + ext3_mb_release_desc(&e3b);
1354 + /* FIXME: undo logic will be implemented later and another way */
1355 + mb_clear_bits(bitmap_bh->b_data, bit, count);
1356 + DQUOT_FREE_BLOCK(inode, count);
1358 + /* We dirtied the bitmap block */
1359 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1360 + err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1362 + /* And the group descriptor block */
1363 + BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1364 + ret = ext3_journal_dirty_metadata(handle, gd_bh);
1365 + if (!err) err = ret;
1367 + if (overflow && !err) {
1374 + brelse(bitmap_bh);
1375 + ext3_std_error(sb, err);
1379 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1381 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1382 + int free, ret = -ENOSPC;
1384 + BUG_ON(blocks < 0);
1385 + spin_lock(&sbi->s_reserve_lock);
1386 + free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1387 + if (blocks <= free - sbi->s_blocks_reserved) {
1388 + sbi->s_blocks_reserved += blocks;
1391 + spin_unlock(&sbi->s_reserve_lock);
1395 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1397 + struct ext3_sb_info *sbi = EXT3_SB(sb);
1399 + BUG_ON(blocks < 0);
1400 + spin_lock(&sbi->s_reserve_lock);
1401 + sbi->s_blocks_reserved -= blocks;
1402 + WARN_ON(sbi->s_blocks_reserved < 0);
1403 + if (sbi->s_blocks_reserved < 0)
1404 + sbi->s_blocks_reserved = 0;
1405 + spin_unlock(&sbi->s_reserve_lock);
1408 +int ext3_new_block(handle_t *handle, struct inode *inode,
1409 + unsigned long goal, int *errp)
1413 + if (!test_opt(inode->i_sb, MBALLOC)) {
1414 + ret = ext3_new_block_old(handle, inode, goal, errp);
1418 + ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1424 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1425 + unsigned long block, unsigned long count, int metadata)
1427 + if (!test_opt(inode->i_sb, MBALLOC))
1428 + ext3_free_blocks_old(handle, inode, block, count);
1430 + ext3_mb_free_blocks(handle, inode, block, count, metadata);
1434 Index: linux-2.6.5-sles9/fs/ext3/super.c
1435 ===================================================================
1436 --- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
1437 +++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300
1439 struct ext3_super_block *es = sbi->s_es;
1442 + ext3_mb_release(sb);
1443 ext3_ext_release(sb);
1444 ext3_xattr_put_super(sb);
1445 journal_destroy(sbi->s_journal);
1447 Opt_commit, Opt_journal_update, Opt_journal_inum,
1448 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1449 Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
1450 - Opt_err, Opt_extents, Opt_extdebug
1451 + Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
1454 static match_table_t tokens = {
1456 {Opt_iopen_nopriv, "iopen_nopriv"},
1457 {Opt_extents, "extents"},
1458 {Opt_extdebug, "extdebug"},
1459 + {Opt_mballoc, "mballoc"},
1465 set_opt (sbi->s_mount_opt, EXTDEBUG);
1468 + set_opt (sbi->s_mount_opt, MBALLOC);
1472 "EXT3-fs: Unrecognized mount option \"%s\" "
1473 @@ -1463,7 +1468,8 @@
1474 ext3_count_dirs(sb));
1483 Index: linux-2.6.5-sles9/fs/ext3/Makefile
1484 ===================================================================
1485 --- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
1486 +++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
1488 obj-$(CONFIG_EXT3_FS) += ext3.o
1490 ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1491 - ioctl.o namei.o super.o symlink.o hash.o extents.o
1492 + ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
1494 ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
1495 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
1496 Index: linux-2.6.5-sles9/fs/ext3/balloc.c
1497 ===================================================================
1498 --- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300
1499 +++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
1502 * Return buffer_head on success or NULL in case of failure.
1504 -static struct buffer_head *
1505 +struct buffer_head *
1506 read_block_bitmap(struct super_block *sb, unsigned int block_group)
1508 struct ext3_group_desc * desc;
1512 /* Free given blocks, update quota and i_blocks field */
1513 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
1514 +void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
1515 unsigned long block, unsigned long count)
1517 struct buffer_head *bitmap_bh = NULL;
1518 @@ -1142,7 +1142,7 @@
1519 * bitmap, and then for any free bit if that fails.
1520 * This function also updates quota and i_blocks field.
1522 -int ext3_new_block(handle_t *handle, struct inode *inode,
1523 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
1524 unsigned long goal, int *errp)
1526 struct buffer_head *bitmap_bh = NULL;
1527 Index: linux-2.6.5-sles9/fs/ext3/namei.c
1528 ===================================================================
1529 --- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300
1530 +++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300
1531 @@ -1640,7 +1640,7 @@
1532 * If the create succeeds, we fill in the inode information
1533 * with d_instantiate().
1535 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1536 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1537 struct nameidata *nd)
1540 Index: linux-2.6.5-sles9/fs/ext3/inode.c
1541 ===================================================================
1542 --- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
1543 +++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300
1545 ext3_journal_forget(handle, branch[i].bh);
1547 for (i = 0; i < keys; i++)
1548 - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1549 + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1555 for (i = 0; i < num; i++)
1556 ext3_free_blocks(handle, inode,
1557 - le32_to_cpu(where[i].key), 1);
1558 + le32_to_cpu(where[i].key), 1, 1);
1562 @@ -1829,7 +1829,7 @@
1566 - ext3_free_blocks(handle, inode, block_to_free, count);
1567 + ext3_free_blocks(handle, inode, block_to_free, count, 1);
1571 @@ -2000,7 +2000,7 @@
1572 ext3_journal_test_restart(handle, inode);
1575 - ext3_free_blocks(handle, inode, nr, 1);
1576 + ext3_free_blocks(handle, inode, nr, 1, 1);
1580 Index: linux-2.6.5-sles9/fs/ext3/extents.c
1581 ===================================================================
1582 --- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
1583 +++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300
1585 for (i = 0; i < depth; i++) {
1588 - ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1589 + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1593 @@ -1391,7 +1391,7 @@
1594 path->p_idx->ei_leaf);
1595 bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
1596 ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
1597 - ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
1598 + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
1602 @@ -1879,10 +1879,12 @@
1603 int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1604 handle_t *handle = ext3_journal_start(tree->inode, needed);
1605 struct buffer_head *bh;
1607 + int i, metadata = 0;
1610 return PTR_ERR(handle);
1611 + if (S_ISDIR(tree->inode->i_mode))
1613 if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
1615 unsigned long num, start;
1616 @@ -1894,7 +1896,7 @@
1617 bh = sb_find_get_block(tree->inode->i_sb, start + i);
1618 ext3_forget(handle, 0, tree->inode, bh, start + i);
1620 - ext3_free_blocks(handle, tree->inode, start, num);
1621 + ext3_free_blocks(handle, tree->inode, start, num, metadata);
1622 } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
1623 printk("strange request: removal %lu-%lu from %u:%u\n",
1624 from, to, ex->ee_block, ex->ee_len);
1625 Index: linux-2.6.5-sles9/fs/ext3/xattr.c
1626 ===================================================================
1627 --- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300
1628 +++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300
1629 @@ -1366,7 +1366,7 @@
1630 new_bh = sb_getblk(sb, block);
1633 - ext3_free_blocks(handle, inode, block, 1);
1634 + ext3_free_blocks(handle, inode, block, 1, 1);
1638 @@ -1408,7 +1408,7 @@
1639 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
1640 /* Free the old block. */
1641 ea_bdebug(old_bh, "freeing");
1642 - ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
1643 + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
1645 /* ext3_forget() calls bforget() for us, but we
1646 let our caller release old_bh, so we need to
1647 @@ -1504,7 +1504,7 @@
1649 if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
1650 ext3_xattr_cache_remove(bh);
1651 - ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
1652 + ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
1654 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
1656 Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
1657 ===================================================================
1658 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
1659 +++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300
1661 #define ext3_debug(f, a...) do {} while (0)
1664 +#define EXT3_MULTIBLOCK_ALLOCATOR 1
1667 * Special inodes numbers
1670 #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
1671 #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
1672 #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
1673 +#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
1675 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1678 extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
1679 extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
1680 extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1682 + unsigned long, int);
1683 extern unsigned long ext3_count_free_blocks (struct super_block *);
1684 extern void ext3_check_blocks_bitmap (struct super_block *);
1685 extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1686 Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
1687 ===================================================================
1688 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
1689 +++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300
1692 #include <linux/blockgroup_lock.h>
1693 #include <linux/percpu_counter.h>
1694 +#include <linux/list.h>
1697 #include <linux/rbtree.h>
1699 +#define EXT3_BB_MAX_BLOCKS 30
1700 +struct ext3_free_metadata {
1701 + unsigned short group;
1702 + unsigned short num;
1703 + unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1704 + struct list_head list;
1707 +#define EXT3_BB_MAX_ORDER 14
1709 +struct ext3_buddy_group_blocks {
1710 + sector_t bb_bitmap;
1711 + sector_t bb_buddy;
1712 + spinlock_t bb_lock;
1713 + unsigned bb_counters[EXT3_BB_MAX_ORDER];
1714 + struct ext3_free_metadata *bb_md_cur;
1715 + unsigned long bb_tid;
1719 * third extended-fs super-block data in memory
1722 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
1723 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
1726 + /* for buddy allocator */
1727 + struct ext3_buddy_group_blocks *s_buddy_blocks;
1728 + struct inode *s_buddy;
1729 + long s_blocks_reserved;
1730 + spinlock_t s_reserve_lock;
1731 + struct list_head s_active_transaction;
1732 + struct list_head s_closed_transaction;
1733 + struct list_head s_committed_transaction;
1734 + spinlock_t s_md_lock;
1735 + tid_t s_last_transaction;
1738 #endif /* _LINUX_EXT3_FS_SB */