--- /dev/null
+Index: linux-stage/fs/ext3/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/mballoc.c 2003-01-30 05:24:37.000000000 -0500
++++ linux-stage/fs/ext3/mballoc.c 2004-10-13 17:06:53.000000000 -0400
+@@ -0,0 +1,1397 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ * - do not scan from the beginning, try to remember first free block
++ * - mb_mark_used_* may allocate chunk right after splitting buddy
++ * - special flag to advice allocator to look for requested + N blocks
++ * this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...) printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE 32
++
++struct ext3_allocation_context {
++ struct super_block *ac_sb;
++
++ /* search goals */
++ int ac_g_group;
++ int ac_g_start;
++ int ac_g_len;
++ int ac_g_flags;
++
++ /* the best found extent */
++ int ac_b_group;
++ int ac_b_start;
++ int ac_b_len;
++
++ /* number of iterations done. we have to track to limit searching */
++ int ac_repeats;
++ int ac_groups_scanned;
++ int ac_status;
++};
++
++#define AC_STATUS_CONTINUE 1
++#define AC_STATUS_FOUND 2
++
++
++struct ext3_buddy {
++ void *bd_bitmap;
++ void *bd_buddy;
++ int bd_blkbits;
++ struct buffer_head *bd_bh;
++ struct buffer_head *bd_bh2;
++ struct ext3_buddy_group_blocks *bd_bd;
++ struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++ int fe_start;
++ int fe_len;
++ unsigned char fe_orders[MB_ARR_SIZE];
++ unsigned char fe_nums;
++ unsigned char fe_back;
++};
++
++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++ int i = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(max != NULL);
++
++ if (order > e3b->bd_blkbits + 1)
++ return NULL;
++
++ /* at order 0 we see each particular block */
++ *max = 1 << (e3b->bd_blkbits + 3);
++ if (order == 0)
++ return e3b->bd_bitmap;
++
++ bb = e3b->bd_buddy;
++ *max = *max >> 1;
++ while (i < order) {
++ bb += 1 << (e3b->bd_blkbits - i);
++ i++;
++ *max = *max >> 1;
++ }
++ return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++ struct ext3_buddy *e3b)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++ /* load bitmap */
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ if (e3b->bd_bh == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh);
++ wait_on_buffer(e3b->bd_bh);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++ /* load buddy */
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ if (e3b->bd_bh2 == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh2)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh2);
++ wait_on_buffer(e3b->bd_bh2);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++ e3b->bd_bitmap = e3b->bd_bh->b_data;
++ e3b->bd_buddy = e3b->bd_bh2->b_data;
++ e3b->bd_blkbits = sb->s_blocksize_bits;
++ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_sb = sb;
++
++ return 0;
++out:
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++ e3b->bd_bh = NULL;
++ e3b->bd_bh2 = NULL;
++ return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++ mark_buffer_dirty(e3b->bd_bh);
++ mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++ int order = e3b->bd_blkbits + 1;
++ int max, max2, i, j, k, count;
++ void *buddy, *buddy2;
++
++ if (!test_opt(e3b->bd_sb, MBALLOC))
++ return;
++
++ while (order > 1) {
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++ J_ASSERT(buddy2);
++ J_ASSERT(buddy != buddy2);
++ J_ASSERT(max * 2 == max2);
++
++ count = 0;
++ for (i = 0; i < max; i++) {
++
++ if (!test_bit(i, buddy)) {
++ /* only single bit in buddy2 may be 1 */
++ if (test_bit(i << 1, buddy2))
++ J_ASSERT(!test_bit((i<<1)+1, buddy2));
++ else if (test_bit((i << 1) + 1, buddy2))
++ J_ASSERT(!test_bit(i << 1, buddy2));
++ continue;
++ }
++
++ /* both bits in buddy2 must be 0 */
++ J_ASSERT(!test_bit(i << 1, buddy2));
++ J_ASSERT(!test_bit((i << 1) + 1, buddy2));
++
++ for (j = 0; j < (1 << order); j++) {
++ k = (i * (1 << order)) + j;
++ J_ASSERT(test_bit(k, e3b->bd_bitmap));
++ }
++ count++;
++ }
++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++ order--;
++ }
++
++ buddy = mb_find_buddy(e3b, 0, &max);
++ for (i = 0; i < max; i++) {
++ if (test_bit(i, buddy))
++ continue;
++ /* check used bits only */
++ for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++ buddy2 = mb_find_buddy(e3b, j, &max2);
++ k = i >> j;
++ J_ASSERT(k < max2);
++ J_ASSERT(!test_bit(k, buddy2));
++ }
++ }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++ int order = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++ bb = e3b->bd_buddy;
++ while (order <= e3b->bd_blkbits + 1) {
++ block = block >> 1;
++ if (test_bit(block, bb)) {
++ /* this block is part of buddy of order 'order' */
++ return order;
++ }
++ bb += 1 << (e3b->bd_blkbits - order);
++ order++;
++ }
++ return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0;
++ cur += 32;
++ continue;
++ }
++ clear_bit(cur, bm);
++ cur++;
++ }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0xffffffff;
++ cur += 32;
++ continue;
++ }
++ set_bit(cur, bm);
++ cur++;
++ }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++ int block, max, order;
++ void *buddy, *buddy2;
++
++ mb_check_buddy(e3b);
++ while (count-- > 0) {
++ block = first++;
++ order = 0;
++
++ J_ASSERT(!test_bit(block, e3b->bd_bitmap));
++ set_bit(block, e3b->bd_bitmap);
++ e3b->bd_bd->bb_counters[order]++;
++
++ /* start of the buddy */
++ buddy = mb_find_buddy(e3b, order, &max);
++
++ do {
++ block &= ~1UL;
++ if (!test_bit(block, buddy) ||
++ !test_bit(block + 1, buddy))
++ break;
++
++ /* both the buddies are free, try to coalesce them */
++ buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++ if (!buddy2)
++ break;
++
++ if (order > 0) {
++ /* for special purposes, we don't clear
++ * free bits in bitmap */
++ clear_bit(block, buddy);
++ clear_bit(block + 1, buddy);
++ }
++ e3b->bd_bd->bb_counters[order]--;
++ e3b->bd_bd->bb_counters[order]--;
++
++ block = block >> 1;
++ order++;
++ e3b->bd_bd->bb_counters[order]++;
++
++ set_bit(block, buddy2);
++ buddy = buddy2;
++ } while (1);
++ }
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++ struct ext3_free_extent *out, int needed)
++{
++ int i;
++
++ J_ASSERT(in);
++ J_ASSERT(out);
++ J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++ out->fe_len = 0;
++ out->fe_start = in->fe_start + in->fe_len;
++ out->fe_nums = 0;
++
++ /* for single-chunk extent we need not back order
++ * also, if an extent doesn't fill needed space
++ * then it makes no sense to try back order becase
++ * if we select this extent then it'll be use as is */
++ if (in->fe_nums < 2 || in->fe_len < needed)
++ return 0;
++
++ i = in->fe_nums - 1;
++ while (i >= 0 && out->fe_len < needed) {
++ out->fe_len += (1 << in->fe_orders[i]);
++ out->fe_start -= (1 << in->fe_orders[i]);
++ i--;
++ }
++ /* FIXME: in some situation fe_orders may be too small to hold
++ * all the buddies */
++ J_ASSERT(out->fe_len >= needed);
++
++ for (i++; i < in->fe_nums; i++)
++ out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++ J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++ out->fe_back = 1;
++
++ return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++ int needed, struct ext3_free_extent *ex)
++{
++ int space = needed;
++ int next, max, ord;
++ void *buddy;
++
++ J_ASSERT(ex != NULL);
++
++ ex->fe_nums = 0;
++ ex->fe_len = 0;
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ J_ASSERT(block < max);
++ if (!test_bit(block, buddy))
++ goto nofree;
++
++ if (order == 0) {
++ /* find actual order */
++ order = mb_find_order_for_block(e3b, block);
++ block = block >> order;
++ }
++
++ ex->fe_orders[ex->fe_nums++] = order;
++ ex->fe_len = 1 << order;
++ ex->fe_start = block << order;
++ ex->fe_back = 0;
++
++ while ((space = space - (1 << order)) > 0) {
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++
++ if (block + 1 >= max)
++ break;
++
++ next = (block + 1) * (1 << order);
++ if (!test_bit(next, e3b->bd_bitmap))
++ break;
++
++ ord = mb_find_order_for_block(e3b, next);
++
++ if ((1 << ord) >= needed) {
++ /* we dont want to coalesce with self-enough buddies */
++ break;
++ }
++ order = ord;
++ block = next >> order;
++ ex->fe_len += 1 << order;
++
++ if (ex->fe_nums < MB_ARR_SIZE)
++ ex->fe_orders[ex->fe_nums++] = order;
++ }
++
++nofree:
++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++ return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ start = ex->fe_start + ex->fe_len - 1;
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++ len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start -= mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ J_ASSERT(start >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ set_bit(cur, buddy);
++ set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++
++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start += mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ set_bit(cur, buddy);
++ set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int err;
++
++ J_ASSERT(ex);
++ if (ex->fe_back == 0)
++ err = mb_mark_used_forward(e3b, ex, len);
++ else
++ err = mb_mark_used_backward(e3b, ex, len);
++ return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b, int group)
++{
++ struct super_block *sb = ac->ac_sb;
++ int err, gorder, max, i;
++ struct ext3_free_extent curex;
++
++ /* let's know order of allocation */
++ gorder = 0;
++ while (ac->ac_g_len > (1 << gorder))
++ gorder++;
++
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++ /* someone asks for space at this specified block
++ * probably he wants to merge it into existing extent */
++ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++ /* good. at least one block is free */
++ max = mb_find_extent(e3b, 0, ac->ac_g_start,
++ ac->ac_g_len, &curex);
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ err = 0;
++ goto out;
++ }
++ /* don't try to find goal anymore */
++ ac->ac_g_flags &= ~1;
++ }
++
++ i = 0;
++ while (1) {
++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++ if (i >= sb->s_blocksize * 8)
++ break;
++
++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++ if (max >= ac->ac_g_len) {
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ break;
++ }
++ i += max;
++ }
++
++ return 0;
++
++out:
++ return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++ struct ext3_group_desc *gdp;
++ int free_blocks;
++
++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++ if (!gdp)
++ return 0;
++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++ if (free_blocks == 0)
++ return 0;
++
++ /* someone wants this block very much */
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++ return 1;
++
++ /* FIXME: I'd like to take fragmentation into account here */
++ if (cr == 0) {
++ if (free_blocks >= ac->ac_g_len >> 1)
++ return 1;
++ } else if (cr == 1) {
++ if (free_blocks >= ac->ac_g_len >> 2)
++ return 1;
++ } else if (cr == 2) {
++ return 1;
++ } else {
++ BUG();
++ }
++ return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *len, int flags, int *errp)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_allocation_context ac;
++ int i, group, block, cr, err = 0;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ struct buffer_head *gdp_bh;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++
++ J_ASSERT(len != NULL);
++ J_ASSERT(*len > 0);
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk("ext3_mb_new_nblocks: nonexistent device");
++ return 0;
++ }
++
++ if (!test_opt(sb, MBALLOC)) {
++ static int ext3_mballoc_warning = 0;
++ if (ext3_mballoc_warning == 0) {
++ printk(KERN_ERR "EXT3-fs: multiblock request with "
++ "mballoc disabled!\n");
++ ext3_mballoc_warning++;
++ }
++ *len = 1;
++ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
++ return err;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++
++ if (!(flags & 2)) {
++ /* someone asks for non-reserved blocks */
++ BUG_ON(*len > 1);
++ err = ext3_mb_reserve_blocks(sb, 1);
++ if (err) {
++ *errp = err;
++ return 0;
++ }
++ }
++
++ /*
++ * Check quota for allocation of this blocks.
++ */
++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++ *len -= 1;
++ if (*len == 0) {
++ *errp = -EDQUOT;
++ block = 0;
++ goto out;
++ }
++
++ /* start searching from the goal */
++ if (goal < le32_to_cpu(es->s_first_data_block) ||
++ goal >= le32_to_cpu(es->s_blocks_count))
++ goal = le32_to_cpu(es->s_first_data_block);
++ group = (goal - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb));
++
++ /* set up allocation goals */
++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++ ac.ac_status = 0;
++ ac.ac_groups_scanned = 0;
++ ac.ac_sb = inode->i_sb;
++ ac.ac_g_group = group;
++ ac.ac_g_start = block;
++ ac.ac_g_len = *len;
++ ac.ac_g_flags = flags;
++
++ /* loop over the groups */
++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++ if (group == EXT3_SB(sb)->s_groups_count)
++ group = 0;
++
++ /* check is group good for our criteries */
++ if (!mb_good_group(&ac, group, cr))
++ continue;
++
++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++ if (err)
++ goto out_err;
++
++ ext3_lock_group(sb, group);
++ if (!mb_good_group(&ac, group, cr)) {
++ /* someone did allocation from this group */
++ ext3_unlock_group(sb, group);
++ ext3_mb_release_desc(&e3b);
++ continue;
++ }
++
++ err = ext3_mb_new_in_group(&ac, &e3b, group);
++ ext3_unlock_group(sb, group);
++ if (ac.ac_status == AC_STATUS_FOUND)
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ break;
++ }
++ }
++
++ if (ac.ac_status != AC_STATUS_FOUND) {
++ /* unfortunately, we can't satisfy this request */
++ J_ASSERT(ac.ac_b_len == 0);
++ DQUOT_FREE_BLOCK(inode, *len);
++ *errp = -ENOSPC;
++ block = 0;
++ goto out;
++ }
++
++ /* good news - free block(s) have been found. now it's time
++ * to mark block(s) in good old journaled bitmap */
++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++ /* we made a desicion, now mark found blocks in good old
++ * bitmap to be journaled */
++
++ ext3_debug("using block group %d(%d)\n",
++ ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++ if (!bitmap_bh) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err) {
++ *errp = err;
++ goto out_err;
++ }
++
++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++ if (!gdp) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(es->s_first_data_block);
++
++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++ in_range(block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error(sb, "ext3_new_block",
++ "Allocating block in system zone - "
++ "block = %u", block);
++#if 0
++ for (i = 0; i < ac.ac_b_len; i++)
++ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++ ext3_lock_group(sb, ac.ac_b_group);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
++ ac.ac_b_len);
++ ext3_unlock_group(sb, ac.ac_b_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++ if (err)
++ goto out_err;
++ err = ext3_journal_dirty_metadata(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ sb->s_dirt = 1;
++ *errp = 0;
++ brelse(bitmap_bh);
++
++ /* drop non-allocated, but dquote'd blocks */
++ J_ASSERT(*len >= ac.ac_b_len);
++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++ *len = ac.ac_b_len;
++ J_ASSERT(block != 0);
++ goto out;
++
++out_err:
++ /* if we've already allocated something, roll it back */
++ if (ac.ac_status == AC_STATUS_FOUND) {
++ /* FIXME: free blocks here */
++ }
++
++ DQUOT_FREE_BLOCK(inode, *len);
++ brelse(bitmap_bh);
++ *errp = err;
++ block = 0;
++out:
++ if (!(flags & 2)) {
++ /* block wasn't reserved before and we reserved it
++ * at the beginning of allocation. it doesn't matter
++ * whether we allocated anything or we failed: time
++ * to release reservation. NOTE: because I expect
++ * any multiblock request from delayed allocation
++ * path only, here is single block always */
++ ext3_mb_release_blocks(sb, 1);
++ }
++ return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++ struct buffer_head *bh;
++ int i, err, count = 0;
++ struct ext3_buddy e3b;
++
++ err = ext3_mb_load_desc(sb, group, &e3b);
++ if (err)
++ goto out;
++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++ bh = read_block_bitmap(sb, group);
++ if (bh == NULL) {
++ err = -EIO;
++ goto out2;
++ }
++
++ /* loop over the blocks, nad create buddies for free ones */
++ for (i = 0; i < sb->s_blocksize * 8; i++) {
++ if (!test_bit(i, (void *) bh->b_data)) {
++ mb_free_blocks(&e3b, i, 1);
++ count++;
++ }
++ }
++ brelse(bh);
++ mb_check_buddy(&e3b);
++ ext3_mb_dirty_buddy(&e3b);
++
++out2:
++ ext3_mb_release_desc(&e3b);
++out:
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS \
++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
++ 2 * EXT3_QUOTA_INIT_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++ struct inode *root = sb->s_root->d_inode;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct dentry *db;
++ tid_t target;
++ int err, i;
++
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_groups_count, GFP_KERNEL);
++ if (sbi->s_buddy_blocks == NULL) {
++ printk("can't allocate mem for buddy maps\n");
++ return -ENOMEM;
++ }
++ memset(sbi->s_buddy_blocks, 0,
++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sbi->s_buddy = NULL;
++
++ down(&root->i_sem);
++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++ strlen(EXT3_BUDDY_FILE));
++ if (IS_ERR(db)) {
++ err = PTR_ERR(db);
++ printk("can't lookup buddy file: %d\n", err);
++ goto out;
++ }
++
++ if (db->d_inode != NULL) {
++ sbi->s_buddy = igrab(db->d_inode);
++ goto map;
++ }
++
++ err = ext3_create(root, db, S_IFREG, NULL);
++ if (err) {
++ printk("error while creation buddy file: %d\n", err);
++ } else {
++ sbi->s_buddy = igrab(db->d_inode);
++ }
++
++map:
++ for (i = 0; i < sbi->s_groups_count; i++) {
++ struct buffer_head *bh = NULL;
++ handle_t *handle;
++
++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out2;
++ }
++
++ /* allocate block for bitmap */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy bitmap: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ brelse(bh);
++
++ /* allocate block for buddy */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ brelse(bh);
++ ext3_journal_stop(handle);
++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i].bb_tid = 0;
++ }
++
++ if (journal_start_commit(sbi->s_journal, &target))
++ log_wait_commit(sbi->s_journal, target);
++
++out2:
++ dput(db);
++out:
++ up(&root->i_sem);
++ return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* release freed, non-committed blocks */
++ spin_lock(&sbi->s_md_lock);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_committed_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ ext3_mb_free_committed_blocks(sb);
++
++ if (sbi->s_buddy_blocks)
++ kfree(sbi->s_buddy_blocks);
++ if (sbi->s_buddy)
++ iput(sbi->s_buddy);
++ if (sbi->s_blocks_reserved)
++ printk("ext3-fs: %ld blocks being reserved at umount!\n",
++ sbi->s_blocks_reserved);
++ return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++ struct ext3_super_block *es;
++ int i;
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* init file for buddy data */
++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ ext3_mb_init_backend(sb);
++
++ es = EXT3_SB(sb)->s_es;
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ ext3_mb_generate_buddy(sb, i);
++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++ spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ printk("EXT3-fs: mballoc enabled\n");
++ return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int err, i, count = 0, count2 = 0;
++ struct ext3_free_metadata *md;
++ struct ext3_buddy e3b;
++
++ if (list_empty(&sbi->s_committed_transaction))
++ return;
++
++ /* there is committed blocks to be freed yet */
++ do {
++ /* get next array of blocks */
++ md = NULL;
++ spin_lock(&sbi->s_md_lock);
++ if (!list_empty(&sbi->s_committed_transaction)) {
++ md = list_entry(sbi->s_committed_transaction.next,
++ struct ext3_free_metadata, list);
++ list_del(&md->list);
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ if (md == NULL)
++ break;
++
++ mb_debug("gonna free %u blocks in group %u (0x%p):",
++ md->num, md->group, md);
++
++ err = ext3_mb_load_desc(sb, md->group, &e3b);
++ BUG_ON(err != 0);
++
++ /* there are blocks to put in buddy to make them really free */
++ count += md->num;
++ count2++;
++ ext3_lock_group(sb, md->group);
++ for (i = 0; i < md->num; i++) {
++ mb_debug(" %u", md->blocks[i]);
++ mb_free_blocks(&e3b, md->blocks[i], 1);
++ }
++ mb_debug("\n");
++ ext3_unlock_group(sb, md->group);
++
++ kfree(md);
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ } while (md);
++ mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++ return;
++
++ /* new transaction! time to close last one and free blocks for
++ * committed transaction. we know that only transaction can be
++ * active, so previos transaction can be being logged and we
++ * know that transaction before previous is known to be alreade
++ * logged. this means that now we may free blocks freed in all
++ * transactions before previous one. hope I'm clear enough ... */
++
++ spin_lock(&sbi->s_md_lock);
++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++ mb_debug("new transaction %lu, old %lu\n",
++ (unsigned long) handle->h_transaction->t_tid,
++ (unsigned long) sbi->s_last_transaction);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_closed_transaction);
++ sbi->s_last_transaction = handle->h_transaction->t_tid;
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++ int group, int block, int count)
++{
++ struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++ struct super_block *sb = e3b->bd_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_free_metadata *md;
++ int i;
++
++ ext3_lock_group(sb, group);
++ for (i = 0; i < count; i++) {
++ md = db->bb_md_cur;
++ if (md && db->bb_tid != handle->h_transaction->t_tid) {
++ db->bb_md_cur = NULL;
++ md = NULL;
++ }
++
++ if (md == NULL) {
++ ext3_unlock_group(sb, group);
++ md = kmalloc(sizeof(*md), GFP_KERNEL);
++ if (md == NULL)
++ return -ENOMEM;
++ md->num = 0;
++ md->group = group;
++
++ ext3_lock_group(sb, group);
++ if (db->bb_md_cur == NULL) {
++ spin_lock(&sbi->s_md_lock);
++ list_add(&md->list, &sbi->s_active_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ db->bb_md_cur = md;
++ db->bb_tid = handle->h_transaction->t_tid;
++ mb_debug("new md 0x%p for group %u\n",
++ md, md->group);
++ } else {
++ kfree(md);
++ md = db->bb_md_cur;
++ }
++ }
++
++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++ md->blocks[md->num] = block + i;
++ md->num++;
++ if (md->num == EXT3_BB_MAX_BLOCKS) {
++ /* no more space, put full container on a sb's list */
++ db->bb_md_cur = NULL;
++ }
++ }
++ ext3_unlock_group(sb, group);
++ return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ unsigned long bit, overflow;
++ struct buffer_head *gd_bh;
++ unsigned long block_group;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++ int err = 0, ret;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++ if (block < le32_to_cpu(es->s_first_data_block) ||
++ block + count < block ||
++ block + count > le32_to_cpu(es->s_blocks_count)) {
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks not in datazone - "
++ "block = %lu, count = %lu", block, count);
++ goto error_return;
++ }
++
++ ext3_debug("freeing block %lu\n", block);
++
++do_more:
++ overflow = 0;
++ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ bit = (block - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ */
++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++ count -= overflow;
++ }
++ brelse(bitmap_bh);
++ bitmap_bh = read_block_bitmap(sb, block_group);
++ if (!bitmap_bh)
++ goto error_return;
++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++ if (!gdp)
++ goto error_return;
++
++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++ in_range (block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group) ||
++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks in system zones - "
++ "Block = %lu, count = %lu",
++ block, count);
++
++ BUFFER_TRACE(bitmap_bh, "getting write access");
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err)
++ goto error_return;
++
++ /*
++ * We are about to modify some metadata. Call the journal APIs
++ * to unshare ->b_data if a currently-committing transaction is
++ * using it
++ */
++ BUFFER_TRACE(gd_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, gd_bh);
++ if (err)
++ goto error_return;
++
++ err = ext3_mb_load_desc(sb, block_group, &e3b);
++ if (err)
++ goto error_return;
++
++ if (metadata) {
++ /* blocks being freed are metadata. these blocks shouldn't
++ * be used until this transaction is committed */
++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++ } else {
++ ext3_lock_group(sb, block_group);
++ mb_free_blocks(&e3b, bit, count);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++ ext3_unlock_group(sb, block_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++ }
++
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ /* FIXME: undo logic will be implemented later and another way */
++ mb_clear_bits(bitmap_bh->b_data, bit, count);
++ DQUOT_FREE_BLOCK(inode, count);
++
++ /* We dirtied the bitmap block */
++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++ /* And the group descriptor block */
++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++ ret = ext3_journal_dirty_metadata(handle, gd_bh);
++ if (!err) err = ret;
++
++ if (overflow && !err) {
++ block += count;
++ count = overflow;
++ goto do_more;
++ }
++ sb->s_dirt = 1;
++error_return:
++ brelse(bitmap_bh);
++ ext3_std_error(sb, err);
++ return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int free, ret = -ENOSPC;
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ if (blocks <= free - sbi->s_blocks_reserved) {
++ sbi->s_blocks_reserved += blocks;
++ ret = 0;
++ }
++ spin_unlock(&sbi->s_reserve_lock);
++ return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ sbi->s_blocks_reserved -= blocks;
++ WARN_ON(sbi->s_blocks_reserved < 0);
++ if (sbi->s_blocks_reserved < 0)
++ sbi->s_blocks_reserved = 0;
++ spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++ unsigned long goal, u32 *pc, u32 *pb, int *errp)
++{
++ int ret, len;
++
++ if (!test_opt(inode->i_sb, MBALLOC)) {
++ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
++ goto out;
++ }
++ len = 1;
++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++ return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ if (!test_opt(inode->i_sb, MBALLOC))
++ ext3_free_blocks_old(handle, inode, block, count);
++ else
++ ext3_mb_free_blocks(handle, inode, block, count, metadata);
++ return;
++}
++
+Index: linux-stage/fs/ext3/super.c
+===================================================================
+--- linux-stage.orig/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_mb_release(sb);
+ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -543,7 +544,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err, Opt_extents, Opt_extdebug
++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc
+ };
+
+ static match_table_t tokens = {
+@@ -588,6 +589,7 @@
+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_extents, "extents"},
+ {Opt_extdebug, "extdebug"},
++ {Opt_mballoc, "mballoc"},
+ {Opt_err, NULL}
+ };
+
+@@ -803,6 +805,9 @@
+ case Opt_extdebug:
+ set_opt (sbi->s_mount_opt, EXTDEBUG);
+ break;
++ case Opt_mballoc:
++ set_opt (sbi->s_mount_opt, MBALLOC);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1444,7 +1449,8 @@
+ ext3_count_dirs(sb));
+
+ ext3_ext_init(sb);
+-
++ ext3_mb_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-stage/fs/ext3/Makefile
+===================================================================
+--- linux-stage.orig/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o extents.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-stage/fs/ext3/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/balloc.c 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/fs/ext3/balloc.c 2004-10-13 17:06:53.000000000 -0400
+@@ -78,7 +78,7 @@
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -98,8 +98,8 @@
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+- unsigned long block, unsigned long count)
++void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gd_bh;
+@@ -528,8 +528,8 @@
+ * This function also updates quota and i_blocks field.
+ */
+ int
+-ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
+- u32 *prealloc_count, u32 *prealloc_block, int *errp)
++ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal,
++ u32 *prealloc_count, u32 *prealloc_block, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL; /* bh */
+ struct buffer_head *gdp_bh; /* bh2 */
+Index: linux-stage/fs/ext3/namei.c
+===================================================================
+--- linux-stage.orig/fs/ext3/namei.c 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/fs/ext3/namei.c 2004-10-13 17:06:53.000000000 -0400
+@@ -1640,7 +1640,7 @@
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+ handle_t *handle;
+Index: linux-stage/fs/ext3/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400
+@@ -256,7 +256,7 @@
+ ei->i_prealloc_count = 0;
+ ei->i_prealloc_block = 0;
+ /* Writer: end */
+- ext3_free_blocks (inode, block, total);
++ ext3_free_blocks (inode, block, total, 1);
+ }
+ #endif
+ }
+@@ -635,7 +635,7 @@
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
+
+@@ -736,7 +736,7 @@
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1924,7 +1924,7 @@
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2095,7 +2095,7 @@
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
+Index: linux-stage/fs/ext3/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400
+@@ -740,7 +740,7 @@
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+ }
+ }
+ kfree(ablocks);
+@@ -1388,7 +1388,7 @@
+ path->p_idx->ei_leaf);
+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+ return err;
+ }
+
+@@ -1876,10 +1876,12 @@
+ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+ handle_t *handle = ext3_journal_start(tree->inode, needed);
+ struct buffer_head *bh;
+- int i;
++ int i, metadata = 0;
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
++ if (S_ISDIR(tree->inode->i_mode))
++ metadata = 1;
+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+ /* tail removal */
+ unsigned long num, start;
+@@ -1891,7 +1893,7 @@
+ bh = sb_find_get_block(tree->inode->i_sb, start + i);
+ ext3_forget(handle, 0, tree->inode, bh, start + i);
+ }
+- ext3_free_blocks(handle, tree->inode, start, num);
++ ext3_free_blocks(handle, tree->inode, start, num, metadata);
+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, ex->ee_block, ex->ee_len);
+Index: linux-stage/fs/ext3/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400
+@@ -1366,7 +1366,7 @@
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ getblk_failed:
+- ext3_free_blocks(handle, inode, block, 1);
++ ext3_free_blocks(handle, inode, block, 1, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+@@ -1408,7 +1408,7 @@
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ ea_bdebug(old_bh, "freeing");
+- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+
+ /* ext3_forget() calls bforget() for us, but we
+ let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ext3_xattr_cache_remove(bh);
+- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+ } else {
+Index: linux-stage/include/linux/ext3_fs.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
+ /*
+ * Special inodes numbers
+ */
+@@ -336,6 +338,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -696,7 +699,7 @@
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
+ __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-stage/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/include/linux/ext3_fs_sb.h 2004-10-13 17:06:53.000000000 -0400
+@@ -23,9 +23,29 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+
++#define EXT3_BB_MAX_BLOCKS 30
++struct ext3_free_metadata {
++ unsigned short group;
++ unsigned short num;
++ unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++ struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER 14
++
++struct ext3_buddy_group_blocks {
++ sector_t bb_bitmap;
++ sector_t bb_buddy;
++ spinlock_t bb_lock;
++ unsigned bb_counters[EXT3_BB_MAX_ORDER];
++ struct ext3_free_metadata *bb_md_cur;
++ unsigned long bb_tid;
++};
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -72,6 +92,17 @@
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++
++ /* for buddy allocator */
++ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct inode *s_buddy;
++ long s_blocks_reserved;
++ spinlock_t s_reserve_lock;
++ struct list_head s_active_transaction;
++ struct list_head s_closed_transaction;
++ struct list_head s_committed_transaction;
++ spinlock_t s_md_lock;
++ tid_t s_last_transaction;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
+Index: linux-stage/include/linux/ext3_jbd.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_jbd.h 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/include/linux/ext3_jbd.h 2004-10-13 19:12:30.000000000 -0400
+@@ -72,6 +72,23 @@
+
+ #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
+
++#ifdef CONFIG_QUOTA
++/* Maximal numbers of writes for quota operation (insert/delete/update)
++ * (over all formats) - info block, 4 pointer blocks, data block */
++#define DQUOT_MAX_WRITES 6
++
++/* Amount of blocks needed for quota update - we know that the structure was
++ * allocated so we need to update only inode+data */
++#define EXT3_QUOTA_TRANS_BLOCKS 2
++/* Amount of blocks needed for quota insert/delete - we do some block writes
++ * but inode, sb and group updates are done only once */
++#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\
++ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3)
++#else
++#define EXT3_QUOTA_TRANS_BLOCKS 0
++#define EXT3_QUOTA_INIT_BLOCKS 0
++#endif
++
+ int
+ ext3_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
export-ext3-2.6-suse.patch
ext3-include-fixes-2.6-suse.patch
ext3-extents-2.6.5.patch
-ext3-mballoc2-2.6.7.patch
+ext3-mballoc2-2.6-suse.patch
ext3-nlinks-2.6.7.patch
--- /dev/null
+Index: linux-stage/fs/ext3/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/mballoc.c 2003-01-30 05:24:37.000000000 -0500
++++ linux-stage/fs/ext3/mballoc.c 2004-10-13 17:06:53.000000000 -0400
+@@ -0,0 +1,1397 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ * - do not scan from the beginning, try to remember first free block
++ * - mb_mark_used_* may allocate chunk right after splitting buddy
++ * - special flag to advice allocator to look for requested + N blocks
++ * this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...) printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE 32
++
++struct ext3_allocation_context {
++ struct super_block *ac_sb;
++
++ /* search goals */
++ int ac_g_group;
++ int ac_g_start;
++ int ac_g_len;
++ int ac_g_flags;
++
++ /* the best found extent */
++ int ac_b_group;
++ int ac_b_start;
++ int ac_b_len;
++
++ /* number of iterations done. we have to track to limit searching */
++ int ac_repeats;
++ int ac_groups_scanned;
++ int ac_status;
++};
++
++#define AC_STATUS_CONTINUE 1
++#define AC_STATUS_FOUND 2
++
++
++struct ext3_buddy {
++ void *bd_bitmap;
++ void *bd_buddy;
++ int bd_blkbits;
++ struct buffer_head *bd_bh;
++ struct buffer_head *bd_bh2;
++ struct ext3_buddy_group_blocks *bd_bd;
++ struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++ int fe_start;
++ int fe_len;
++ unsigned char fe_orders[MB_ARR_SIZE];
++ unsigned char fe_nums;
++ unsigned char fe_back;
++};
++
++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++ int i = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(max != NULL);
++
++ if (order > e3b->bd_blkbits + 1)
++ return NULL;
++
++ /* at order 0 we see each particular block */
++ *max = 1 << (e3b->bd_blkbits + 3);
++ if (order == 0)
++ return e3b->bd_bitmap;
++
++ bb = e3b->bd_buddy;
++ *max = *max >> 1;
++ while (i < order) {
++ bb += 1 << (e3b->bd_blkbits - i);
++ i++;
++ *max = *max >> 1;
++ }
++ return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++ struct ext3_buddy *e3b)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++ /* load bitmap */
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ if (e3b->bd_bh == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh);
++ wait_on_buffer(e3b->bd_bh);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++ /* load buddy */
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ if (e3b->bd_bh2 == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh2)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh2);
++ wait_on_buffer(e3b->bd_bh2);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++ e3b->bd_bitmap = e3b->bd_bh->b_data;
++ e3b->bd_buddy = e3b->bd_bh2->b_data;
++ e3b->bd_blkbits = sb->s_blocksize_bits;
++ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_sb = sb;
++
++ return 0;
++out:
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++ e3b->bd_bh = NULL;
++ e3b->bd_bh2 = NULL;
++ return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++ mark_buffer_dirty(e3b->bd_bh);
++ mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++ int order = e3b->bd_blkbits + 1;
++ int max, max2, i, j, k, count;
++ void *buddy, *buddy2;
++
++ if (!test_opt(e3b->bd_sb, MBALLOC))
++ return;
++
++ while (order > 1) {
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++ J_ASSERT(buddy2);
++ J_ASSERT(buddy != buddy2);
++ J_ASSERT(max * 2 == max2);
++
++ count = 0;
++ for (i = 0; i < max; i++) {
++
++ if (!test_bit(i, buddy)) {
++ /* only single bit in buddy2 may be 1 */
++ if (test_bit(i << 1, buddy2))
++ J_ASSERT(!test_bit((i<<1)+1, buddy2));
++ else if (test_bit((i << 1) + 1, buddy2))
++ J_ASSERT(!test_bit(i << 1, buddy2));
++ continue;
++ }
++
++ /* both bits in buddy2 must be 0 */
++ J_ASSERT(!test_bit(i << 1, buddy2));
++ J_ASSERT(!test_bit((i << 1) + 1, buddy2));
++
++ for (j = 0; j < (1 << order); j++) {
++ k = (i * (1 << order)) + j;
++ J_ASSERT(test_bit(k, e3b->bd_bitmap));
++ }
++ count++;
++ }
++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++ order--;
++ }
++
++ buddy = mb_find_buddy(e3b, 0, &max);
++ for (i = 0; i < max; i++) {
++ if (test_bit(i, buddy))
++ continue;
++ /* check used bits only */
++ for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++ buddy2 = mb_find_buddy(e3b, j, &max2);
++ k = i >> j;
++ J_ASSERT(k < max2);
++ J_ASSERT(!test_bit(k, buddy2));
++ }
++ }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++ int order = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++ bb = e3b->bd_buddy;
++ while (order <= e3b->bd_blkbits + 1) {
++ block = block >> 1;
++ if (test_bit(block, bb)) {
++ /* this block is part of buddy of order 'order' */
++ return order;
++ }
++ bb += 1 << (e3b->bd_blkbits - order);
++ order++;
++ }
++ return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0;
++ cur += 32;
++ continue;
++ }
++ clear_bit(cur, bm);
++ cur++;
++ }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0xffffffff;
++ cur += 32;
++ continue;
++ }
++ set_bit(cur, bm);
++ cur++;
++ }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++ int block, max, order;
++ void *buddy, *buddy2;
++
++ mb_check_buddy(e3b);
++ while (count-- > 0) {
++ block = first++;
++ order = 0;
++
++ J_ASSERT(!test_bit(block, e3b->bd_bitmap));
++ set_bit(block, e3b->bd_bitmap);
++ e3b->bd_bd->bb_counters[order]++;
++
++ /* start of the buddy */
++ buddy = mb_find_buddy(e3b, order, &max);
++
++ do {
++ block &= ~1UL;
++ if (!test_bit(block, buddy) ||
++ !test_bit(block + 1, buddy))
++ break;
++
++ /* both the buddies are free, try to coalesce them */
++ buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++ if (!buddy2)
++ break;
++
++ if (order > 0) {
++ /* for special purposes, we don't clear
++ * free bits in bitmap */
++ clear_bit(block, buddy);
++ clear_bit(block + 1, buddy);
++ }
++ e3b->bd_bd->bb_counters[order]--;
++ e3b->bd_bd->bb_counters[order]--;
++
++ block = block >> 1;
++ order++;
++ e3b->bd_bd->bb_counters[order]++;
++
++ set_bit(block, buddy2);
++ buddy = buddy2;
++ } while (1);
++ }
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++ struct ext3_free_extent *out, int needed)
++{
++ int i;
++
++ J_ASSERT(in);
++ J_ASSERT(out);
++ J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++ out->fe_len = 0;
++ out->fe_start = in->fe_start + in->fe_len;
++ out->fe_nums = 0;
++
++ /* for single-chunk extent we need not back order
++ * also, if an extent doesn't fill needed space
++ * then it makes no sense to try back order becase
++ * if we select this extent then it'll be use as is */
++ if (in->fe_nums < 2 || in->fe_len < needed)
++ return 0;
++
++ i = in->fe_nums - 1;
++ while (i >= 0 && out->fe_len < needed) {
++ out->fe_len += (1 << in->fe_orders[i]);
++ out->fe_start -= (1 << in->fe_orders[i]);
++ i--;
++ }
++ /* FIXME: in some situation fe_orders may be too small to hold
++ * all the buddies */
++ J_ASSERT(out->fe_len >= needed);
++
++ for (i++; i < in->fe_nums; i++)
++ out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++ J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++ out->fe_back = 1;
++
++ return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++ int needed, struct ext3_free_extent *ex)
++{
++ int space = needed;
++ int next, max, ord;
++ void *buddy;
++
++ J_ASSERT(ex != NULL);
++
++ ex->fe_nums = 0;
++ ex->fe_len = 0;
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ J_ASSERT(block < max);
++ if (!test_bit(block, buddy))
++ goto nofree;
++
++ if (order == 0) {
++ /* find actual order */
++ order = mb_find_order_for_block(e3b, block);
++ block = block >> order;
++ }
++
++ ex->fe_orders[ex->fe_nums++] = order;
++ ex->fe_len = 1 << order;
++ ex->fe_start = block << order;
++ ex->fe_back = 0;
++
++ while ((space = space - (1 << order)) > 0) {
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++
++ if (block + 1 >= max)
++ break;
++
++ next = (block + 1) * (1 << order);
++ if (!test_bit(next, e3b->bd_bitmap))
++ break;
++
++ ord = mb_find_order_for_block(e3b, next);
++
++ if ((1 << ord) >= needed) {
++ /* we dont want to coalesce with self-enough buddies */
++ break;
++ }
++ order = ord;
++ block = next >> order;
++ ex->fe_len += 1 << order;
++
++ if (ex->fe_nums < MB_ARR_SIZE)
++ ex->fe_orders[ex->fe_nums++] = order;
++ }
++
++nofree:
++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++ return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ start = ex->fe_start + ex->fe_len - 1;
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++ len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start -= mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ J_ASSERT(start >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ set_bit(cur, buddy);
++ set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++
++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start += mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ set_bit(cur, buddy);
++ set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int err;
++
++ J_ASSERT(ex);
++ if (ex->fe_back == 0)
++ err = mb_mark_used_forward(e3b, ex, len);
++ else
++ err = mb_mark_used_backward(e3b, ex, len);
++ return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b, int group)
++{
++ struct super_block *sb = ac->ac_sb;
++ int err, gorder, max, i;
++ struct ext3_free_extent curex;
++
++ /* let's know order of allocation */
++ gorder = 0;
++ while (ac->ac_g_len > (1 << gorder))
++ gorder++;
++
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++ /* someone asks for space at this specified block
++ * probably he wants to merge it into existing extent */
++ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++ /* good. at least one block is free */
++ max = mb_find_extent(e3b, 0, ac->ac_g_start,
++ ac->ac_g_len, &curex);
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ err = 0;
++ goto out;
++ }
++ /* don't try to find goal anymore */
++ ac->ac_g_flags &= ~1;
++ }
++
++ i = 0;
++ while (1) {
++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++ if (i >= sb->s_blocksize * 8)
++ break;
++
++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++ if (max >= ac->ac_g_len) {
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ break;
++ }
++ i += max;
++ }
++
++ return 0;
++
++out:
++ return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++ struct ext3_group_desc *gdp;
++ int free_blocks;
++
++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++ if (!gdp)
++ return 0;
++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++ if (free_blocks == 0)
++ return 0;
++
++ /* someone wants this block very much */
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++ return 1;
++
++ /* FIXME: I'd like to take fragmentation into account here */
++ if (cr == 0) {
++ if (free_blocks >= ac->ac_g_len >> 1)
++ return 1;
++ } else if (cr == 1) {
++ if (free_blocks >= ac->ac_g_len >> 2)
++ return 1;
++ } else if (cr == 2) {
++ return 1;
++ } else {
++ BUG();
++ }
++ return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *len, int flags, int *errp)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_allocation_context ac;
++ int i, group, block, cr, err = 0;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ struct buffer_head *gdp_bh;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++
++ J_ASSERT(len != NULL);
++ J_ASSERT(*len > 0);
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk("ext3_mb_new_nblocks: nonexistent device");
++ return 0;
++ }
++
++ if (!test_opt(sb, MBALLOC)) {
++ static int ext3_mballoc_warning = 0;
++ if (ext3_mballoc_warning == 0) {
++ printk(KERN_ERR "EXT3-fs: multiblock request with "
++ "mballoc disabled!\n");
++ ext3_mballoc_warning++;
++ }
++ *len = 1;
++ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
++ return err;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++
++ if (!(flags & 2)) {
++ /* someone asks for non-reserved blocks */
++ BUG_ON(*len > 1);
++ err = ext3_mb_reserve_blocks(sb, 1);
++ if (err) {
++ *errp = err;
++ return 0;
++ }
++ }
++
++ /*
++ * Check quota for allocation of this blocks.
++ */
++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++ *len -= 1;
++ if (*len == 0) {
++ *errp = -EDQUOT;
++ block = 0;
++ goto out;
++ }
++
++ /* start searching from the goal */
++ if (goal < le32_to_cpu(es->s_first_data_block) ||
++ goal >= le32_to_cpu(es->s_blocks_count))
++ goal = le32_to_cpu(es->s_first_data_block);
++ group = (goal - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb));
++
++ /* set up allocation goals */
++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++ ac.ac_status = 0;
++ ac.ac_groups_scanned = 0;
++ ac.ac_sb = inode->i_sb;
++ ac.ac_g_group = group;
++ ac.ac_g_start = block;
++ ac.ac_g_len = *len;
++ ac.ac_g_flags = flags;
++
++ /* loop over the groups */
++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++ if (group == EXT3_SB(sb)->s_groups_count)
++ group = 0;
++
++ /* check is group good for our criteries */
++ if (!mb_good_group(&ac, group, cr))
++ continue;
++
++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++ if (err)
++ goto out_err;
++
++ ext3_lock_group(sb, group);
++ if (!mb_good_group(&ac, group, cr)) {
++ /* someone did allocation from this group */
++ ext3_unlock_group(sb, group);
++ ext3_mb_release_desc(&e3b);
++ continue;
++ }
++
++ err = ext3_mb_new_in_group(&ac, &e3b, group);
++ ext3_unlock_group(sb, group);
++ if (ac.ac_status == AC_STATUS_FOUND)
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ break;
++ }
++ }
++
++ if (ac.ac_status != AC_STATUS_FOUND) {
++ /* unfortunately, we can't satisfy this request */
++ J_ASSERT(ac.ac_b_len == 0);
++ DQUOT_FREE_BLOCK(inode, *len);
++ *errp = -ENOSPC;
++ block = 0;
++ goto out;
++ }
++
++ /* good news - free block(s) have been found. now it's time
++ * to mark block(s) in good old journaled bitmap */
++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++ /* we made a desicion, now mark found blocks in good old
++ * bitmap to be journaled */
++
++ ext3_debug("using block group %d(%d)\n",
++ ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++ if (!bitmap_bh) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err) {
++ *errp = err;
++ goto out_err;
++ }
++
++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++ if (!gdp) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(es->s_first_data_block);
++
++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++ in_range(block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error(sb, "ext3_new_block",
++ "Allocating block in system zone - "
++ "block = %u", block);
++#if 0
++ for (i = 0; i < ac.ac_b_len; i++)
++ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++ ext3_lock_group(sb, ac.ac_b_group);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
++ ac.ac_b_len);
++ ext3_unlock_group(sb, ac.ac_b_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++ if (err)
++ goto out_err;
++ err = ext3_journal_dirty_metadata(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ sb->s_dirt = 1;
++ *errp = 0;
++ brelse(bitmap_bh);
++
++ /* drop non-allocated, but dquote'd blocks */
++ J_ASSERT(*len >= ac.ac_b_len);
++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++ *len = ac.ac_b_len;
++ J_ASSERT(block != 0);
++ goto out;
++
++out_err:
++ /* if we've already allocated something, roll it back */
++ if (ac.ac_status == AC_STATUS_FOUND) {
++ /* FIXME: free blocks here */
++ }
++
++ DQUOT_FREE_BLOCK(inode, *len);
++ brelse(bitmap_bh);
++ *errp = err;
++ block = 0;
++out:
++ if (!(flags & 2)) {
++ /* block wasn't reserved before and we reserved it
++ * at the beginning of allocation. it doesn't matter
++ * whether we allocated anything or we failed: time
++ * to release reservation. NOTE: because I expect
++ * any multiblock request from delayed allocation
++ * path only, here is single block always */
++ ext3_mb_release_blocks(sb, 1);
++ }
++ return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++ struct buffer_head *bh;
++ int i, err, count = 0;
++ struct ext3_buddy e3b;
++
++ err = ext3_mb_load_desc(sb, group, &e3b);
++ if (err)
++ goto out;
++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++ bh = read_block_bitmap(sb, group);
++ if (bh == NULL) {
++ err = -EIO;
++ goto out2;
++ }
++
++ /* loop over the blocks, nad create buddies for free ones */
++ for (i = 0; i < sb->s_blocksize * 8; i++) {
++ if (!test_bit(i, (void *) bh->b_data)) {
++ mb_free_blocks(&e3b, i, 1);
++ count++;
++ }
++ }
++ brelse(bh);
++ mb_check_buddy(&e3b);
++ ext3_mb_dirty_buddy(&e3b);
++
++out2:
++ ext3_mb_release_desc(&e3b);
++out:
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS \
++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
++ 2 * EXT3_QUOTA_INIT_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++ struct inode *root = sb->s_root->d_inode;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct dentry *db;
++ tid_t target;
++ int err, i;
++
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_groups_count, GFP_KERNEL);
++ if (sbi->s_buddy_blocks == NULL) {
++ printk("can't allocate mem for buddy maps\n");
++ return -ENOMEM;
++ }
++ memset(sbi->s_buddy_blocks, 0,
++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sbi->s_buddy = NULL;
++
++ down(&root->i_sem);
++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++ strlen(EXT3_BUDDY_FILE));
++ if (IS_ERR(db)) {
++ err = PTR_ERR(db);
++ printk("can't lookup buddy file: %d\n", err);
++ goto out;
++ }
++
++ if (db->d_inode != NULL) {
++ sbi->s_buddy = igrab(db->d_inode);
++ goto map;
++ }
++
++ err = ext3_create(root, db, S_IFREG, NULL);
++ if (err) {
++ printk("error while creation buddy file: %d\n", err);
++ } else {
++ sbi->s_buddy = igrab(db->d_inode);
++ }
++
++map:
++ for (i = 0; i < sbi->s_groups_count; i++) {
++ struct buffer_head *bh = NULL;
++ handle_t *handle;
++
++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out2;
++ }
++
++ /* allocate block for bitmap */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy bitmap: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ brelse(bh);
++
++ /* allocate block for buddy */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ brelse(bh);
++ ext3_journal_stop(handle);
++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i].bb_tid = 0;
++ }
++
++ if (journal_start_commit(sbi->s_journal, &target))
++ log_wait_commit(sbi->s_journal, target);
++
++out2:
++ dput(db);
++out:
++ up(&root->i_sem);
++ return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* release freed, non-committed blocks */
++ spin_lock(&sbi->s_md_lock);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_committed_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ ext3_mb_free_committed_blocks(sb);
++
++ if (sbi->s_buddy_blocks)
++ kfree(sbi->s_buddy_blocks);
++ if (sbi->s_buddy)
++ iput(sbi->s_buddy);
++ if (sbi->s_blocks_reserved)
++ printk("ext3-fs: %ld blocks being reserved at umount!\n",
++ sbi->s_blocks_reserved);
++ return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++ struct ext3_super_block *es;
++ int i;
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* init file for buddy data */
++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ ext3_mb_init_backend(sb);
++
++ es = EXT3_SB(sb)->s_es;
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ ext3_mb_generate_buddy(sb, i);
++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++ spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ printk("EXT3-fs: mballoc enabled\n");
++ return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int err, i, count = 0, count2 = 0;
++ struct ext3_free_metadata *md;
++ struct ext3_buddy e3b;
++
++ if (list_empty(&sbi->s_committed_transaction))
++ return;
++
++ /* there is committed blocks to be freed yet */
++ do {
++ /* get next array of blocks */
++ md = NULL;
++ spin_lock(&sbi->s_md_lock);
++ if (!list_empty(&sbi->s_committed_transaction)) {
++ md = list_entry(sbi->s_committed_transaction.next,
++ struct ext3_free_metadata, list);
++ list_del(&md->list);
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ if (md == NULL)
++ break;
++
++ mb_debug("gonna free %u blocks in group %u (0x%p):",
++ md->num, md->group, md);
++
++ err = ext3_mb_load_desc(sb, md->group, &e3b);
++ BUG_ON(err != 0);
++
++ /* there are blocks to put in buddy to make them really free */
++ count += md->num;
++ count2++;
++ ext3_lock_group(sb, md->group);
++ for (i = 0; i < md->num; i++) {
++ mb_debug(" %u", md->blocks[i]);
++ mb_free_blocks(&e3b, md->blocks[i], 1);
++ }
++ mb_debug("\n");
++ ext3_unlock_group(sb, md->group);
++
++ kfree(md);
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ } while (md);
++ mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++ return;
++
++ /* new transaction! time to close last one and free blocks for
++ * committed transaction. we know that only transaction can be
++ * active, so previos transaction can be being logged and we
++ * know that transaction before previous is known to be alreade
++ * logged. this means that now we may free blocks freed in all
++ * transactions before previous one. hope I'm clear enough ... */
++
++ spin_lock(&sbi->s_md_lock);
++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++ mb_debug("new transaction %lu, old %lu\n",
++ (unsigned long) handle->h_transaction->t_tid,
++ (unsigned long) sbi->s_last_transaction);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_closed_transaction);
++ sbi->s_last_transaction = handle->h_transaction->t_tid;
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++ int group, int block, int count)
++{
++ struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++ struct super_block *sb = e3b->bd_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_free_metadata *md;
++ int i;
++
++ ext3_lock_group(sb, group);
++ for (i = 0; i < count; i++) {
++ md = db->bb_md_cur;
++ if (md && db->bb_tid != handle->h_transaction->t_tid) {
++ db->bb_md_cur = NULL;
++ md = NULL;
++ }
++
++ if (md == NULL) {
++ ext3_unlock_group(sb, group);
++ md = kmalloc(sizeof(*md), GFP_KERNEL);
++ if (md == NULL)
++ return -ENOMEM;
++ md->num = 0;
++ md->group = group;
++
++ ext3_lock_group(sb, group);
++ if (db->bb_md_cur == NULL) {
++ spin_lock(&sbi->s_md_lock);
++ list_add(&md->list, &sbi->s_active_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ db->bb_md_cur = md;
++ db->bb_tid = handle->h_transaction->t_tid;
++ mb_debug("new md 0x%p for group %u\n",
++ md, md->group);
++ } else {
++ kfree(md);
++ md = db->bb_md_cur;
++ }
++ }
++
++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++ md->blocks[md->num] = block + i;
++ md->num++;
++ if (md->num == EXT3_BB_MAX_BLOCKS) {
++ /* no more space, put full container on a sb's list */
++ db->bb_md_cur = NULL;
++ }
++ }
++ ext3_unlock_group(sb, group);
++ return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ unsigned long bit, overflow;
++ struct buffer_head *gd_bh;
++ unsigned long block_group;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++ int err = 0, ret;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++ if (block < le32_to_cpu(es->s_first_data_block) ||
++ block + count < block ||
++ block + count > le32_to_cpu(es->s_blocks_count)) {
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks not in datazone - "
++ "block = %lu, count = %lu", block, count);
++ goto error_return;
++ }
++
++ ext3_debug("freeing block %lu\n", block);
++
++do_more:
++ overflow = 0;
++ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ bit = (block - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ */
++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++ count -= overflow;
++ }
++ brelse(bitmap_bh);
++ bitmap_bh = read_block_bitmap(sb, block_group);
++ if (!bitmap_bh)
++ goto error_return;
++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++ if (!gdp)
++ goto error_return;
++
++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++ in_range (block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group) ||
++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks in system zones - "
++ "Block = %lu, count = %lu",
++ block, count);
++
++ BUFFER_TRACE(bitmap_bh, "getting write access");
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err)
++ goto error_return;
++
++ /*
++ * We are about to modify some metadata. Call the journal APIs
++ * to unshare ->b_data if a currently-committing transaction is
++ * using it
++ */
++ BUFFER_TRACE(gd_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, gd_bh);
++ if (err)
++ goto error_return;
++
++ err = ext3_mb_load_desc(sb, block_group, &e3b);
++ if (err)
++ goto error_return;
++
++ if (metadata) {
++ /* blocks being freed are metadata. these blocks shouldn't
++ * be used until this transaction is committed */
++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++ } else {
++ ext3_lock_group(sb, block_group);
++ mb_free_blocks(&e3b, bit, count);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++ ext3_unlock_group(sb, block_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++ }
++
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ /* FIXME: undo logic will be implemented later and another way */
++ mb_clear_bits(bitmap_bh->b_data, bit, count);
++ DQUOT_FREE_BLOCK(inode, count);
++
++ /* We dirtied the bitmap block */
++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++ /* And the group descriptor block */
++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++ ret = ext3_journal_dirty_metadata(handle, gd_bh);
++ if (!err) err = ret;
++
++ if (overflow && !err) {
++ block += count;
++ count = overflow;
++ goto do_more;
++ }
++ sb->s_dirt = 1;
++error_return:
++ brelse(bitmap_bh);
++ ext3_std_error(sb, err);
++ return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int free, ret = -ENOSPC;
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ if (blocks <= free - sbi->s_blocks_reserved) {
++ sbi->s_blocks_reserved += blocks;
++ ret = 0;
++ }
++ spin_unlock(&sbi->s_reserve_lock);
++ return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ sbi->s_blocks_reserved -= blocks;
++ WARN_ON(sbi->s_blocks_reserved < 0);
++ if (sbi->s_blocks_reserved < 0)
++ sbi->s_blocks_reserved = 0;
++ spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++ unsigned long goal, u32 *pc, u32 *pb, int *errp)
++{
++ int ret, len;
++
++ if (!test_opt(inode->i_sb, MBALLOC)) {
++ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
++ goto out;
++ }
++ len = 1;
++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++ return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ if (!test_opt(inode->i_sb, MBALLOC))
++ ext3_free_blocks_old(handle, inode, block, count);
++ else
++ ext3_mb_free_blocks(handle, inode, block, count, metadata);
++ return;
++}
++
+Index: linux-stage/fs/ext3/super.c
+===================================================================
+--- linux-stage.orig/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_mb_release(sb);
+ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -543,7 +544,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err, Opt_extents, Opt_extdebug
++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc
+ };
+
+ static match_table_t tokens = {
+@@ -588,6 +589,7 @@
+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_extents, "extents"},
+ {Opt_extdebug, "extdebug"},
++ {Opt_mballoc, "mballoc"},
+ {Opt_err, NULL}
+ };
+
+@@ -803,6 +805,9 @@
+ case Opt_extdebug:
+ set_opt (sbi->s_mount_opt, EXTDEBUG);
+ break;
++ case Opt_mballoc:
++ set_opt (sbi->s_mount_opt, MBALLOC);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1444,7 +1449,8 @@
+ ext3_count_dirs(sb));
+
+ ext3_ext_init(sb);
+-
++ ext3_mb_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-stage/fs/ext3/Makefile
+===================================================================
+--- linux-stage.orig/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o extents.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-stage/fs/ext3/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/balloc.c 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/fs/ext3/balloc.c 2004-10-13 17:06:53.000000000 -0400
+@@ -78,7 +78,7 @@
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -98,8 +98,8 @@
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+- unsigned long block, unsigned long count)
++void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gd_bh;
+@@ -528,8 +528,8 @@
+ * This function also updates quota and i_blocks field.
+ */
+ int
+-ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
+- u32 *prealloc_count, u32 *prealloc_block, int *errp)
++ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal,
++ u32 *prealloc_count, u32 *prealloc_block, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL; /* bh */
+ struct buffer_head *gdp_bh; /* bh2 */
+Index: linux-stage/fs/ext3/namei.c
+===================================================================
+--- linux-stage.orig/fs/ext3/namei.c 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/fs/ext3/namei.c 2004-10-13 17:06:53.000000000 -0400
+@@ -1640,7 +1640,7 @@
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+ handle_t *handle;
+Index: linux-stage/fs/ext3/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400
+@@ -256,7 +256,7 @@
+ ei->i_prealloc_count = 0;
+ ei->i_prealloc_block = 0;
+ /* Writer: end */
+- ext3_free_blocks (inode, block, total);
++ ext3_free_blocks (inode, block, total, 1);
+ }
+ #endif
+ }
+@@ -635,7 +635,7 @@
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
+
+@@ -736,7 +736,7 @@
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1924,7 +1924,7 @@
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2095,7 +2095,7 @@
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
+Index: linux-stage/fs/ext3/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400
+@@ -740,7 +740,7 @@
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+ }
+ }
+ kfree(ablocks);
+@@ -1388,7 +1388,7 @@
+ path->p_idx->ei_leaf);
+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+ return err;
+ }
+
+@@ -1876,10 +1876,12 @@
+ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+ handle_t *handle = ext3_journal_start(tree->inode, needed);
+ struct buffer_head *bh;
+- int i;
++ int i, metadata = 0;
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
++ if (S_ISDIR(tree->inode->i_mode))
++ metadata = 1;
+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+ /* tail removal */
+ unsigned long num, start;
+@@ -1891,7 +1893,7 @@
+ bh = sb_find_get_block(tree->inode->i_sb, start + i);
+ ext3_forget(handle, 0, tree->inode, bh, start + i);
+ }
+- ext3_free_blocks(handle, tree->inode, start, num);
++ ext3_free_blocks(handle, tree->inode, start, num, metadata);
+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, ex->ee_block, ex->ee_len);
+Index: linux-stage/fs/ext3/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400
+@@ -1366,7 +1366,7 @@
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ getblk_failed:
+- ext3_free_blocks(handle, inode, block, 1);
++ ext3_free_blocks(handle, inode, block, 1, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+@@ -1408,7 +1408,7 @@
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ ea_bdebug(old_bh, "freeing");
+- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+
+ /* ext3_forget() calls bforget() for us, but we
+ let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ext3_xattr_cache_remove(bh);
+- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+ } else {
+Index: linux-stage/include/linux/ext3_fs.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
+ /*
+ * Special inodes numbers
+ */
+@@ -336,6 +338,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -696,7 +699,7 @@
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
+ __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-stage/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/include/linux/ext3_fs_sb.h 2004-10-13 17:06:53.000000000 -0400
+@@ -23,9 +23,29 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+
++#define EXT3_BB_MAX_BLOCKS 30
++struct ext3_free_metadata {
++ unsigned short group;
++ unsigned short num;
++ unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++ struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER 14
++
++struct ext3_buddy_group_blocks {
++ sector_t bb_bitmap;
++ sector_t bb_buddy;
++ spinlock_t bb_lock;
++ unsigned bb_counters[EXT3_BB_MAX_ORDER];
++ struct ext3_free_metadata *bb_md_cur;
++ unsigned long bb_tid;
++};
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -72,6 +92,17 @@
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++
++ /* for buddy allocator */
++ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct inode *s_buddy;
++ long s_blocks_reserved;
++ spinlock_t s_reserve_lock;
++ struct list_head s_active_transaction;
++ struct list_head s_closed_transaction;
++ struct list_head s_committed_transaction;
++ spinlock_t s_md_lock;
++ tid_t s_last_transaction;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
+Index: linux-stage/include/linux/ext3_jbd.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_jbd.h 2004-10-13 17:06:52.000000000 -0400
++++ linux-stage/include/linux/ext3_jbd.h 2004-10-13 19:12:30.000000000 -0400
+@@ -72,6 +72,23 @@
+
+ #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
+
++#ifdef CONFIG_QUOTA
++/* Maximal numbers of writes for quota operation (insert/delete/update)
++ * (over all formats) - info block, 4 pointer blocks, data block */
++#define DQUOT_MAX_WRITES 6
++
++/* Amount of blocks needed for quota update - we know that the structure was
++ * allocated so we need to update only inode+data */
++#define EXT3_QUOTA_TRANS_BLOCKS 2
++/* Amount of blocks needed for quota insert/delete - we do some block writes
++ * but inode, sb and group updates are done only once */
++#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\
++ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3)
++#else
++#define EXT3_QUOTA_TRANS_BLOCKS 0
++#define EXT3_QUOTA_INIT_BLOCKS 0
++#endif
++
+ int
+ ext3_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
export-ext3-2.6-suse.patch
ext3-include-fixes-2.6-suse.patch
ext3-extents-2.6.5.patch
-ext3-mballoc2-2.6.7.patch
+ext3-mballoc2-2.6-suse.patch
ext3-nlinks-2.6.7.patch