Index: linux-2.6.9/include/linux/ext3_fs_i.h =================================================================== --- linux-2.6.9.orig/include/linux/ext3_fs_i.h +++ linux-2.6.9/include/linux/ext3_fs_i.h @@ -130,6 +130,10 @@ struct ext3_inode_info { struct inode vfs_inode; __u32 i_cached_extent[4]; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; }; #endif /* _LINUX_EXT3_FS_I */ Index: linux-2.6.9/include/linux/ext3_fs_sb.h =================================================================== --- linux-2.6.9.orig/include/linux/ext3_fs_sb.h +++ linux-2.6.9/include/linux/ext3_fs_sb.h @@ -23,9 +23,16 @@ #define EXT_INCLUDE #include #include +#include #endif #endif #include +#include + +struct ext3_buddy_group_blocks; +struct ext3_locality_group; +struct ext3_mb_history; +#define EXT3_BB_MAX_BLOCKS /* * third extended-fs super-block data in memory Index: linux-2.6.9/include/linux/ext3_fs.h =================================================================== --- linux-2.6.9.orig/include/linux/ext3_fs.h +++ linux-2.6.9/include/linux/ext3_fs.h @@ -389,6 +389,7 @@ struct ext3_inode { #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ +#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt @@ -757,8 +758,9 @@ struct dir_private_info { extern int ext3_bg_has_super(struct super_block *sb, int group); extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); +extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, - unsigned long); + unsigned long, int); extern void ext3_free_blocks_sb (handle_t *, struct super_block *, unsigned long, unsigned long, int *); extern unsigned long ext3_count_free_blocks (struct super_block *); Index: linux-2.6.9/fs/ext3/super.c =================================================================== --- linux-2.6.9.orig/fs/ext3/super.c +++ linux-2.6.9/fs/ext3/super.c @@ -642,6 +642,7 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, + Opt_mballoc, Opt_nomballoc, Opt_stripe, }; static match_table_t tokens = { @@ -695,6 +696,9 @@ static match_table_t tokens = { {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, {Opt_barrier, "barrier=%u"}, + {Opt_mballoc, "mballoc"}, + {Opt_nomballoc, "nomballoc"}, + {Opt_stripe, "stripe=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -1007,6 +1011,19 @@ clear_qf_name: case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; + case Opt_mballoc: + set_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_nomballoc: + clear_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_stripe: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_stripe = option; + break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " @@ -1696,6 +1713,7 @@ static int ext3_fill_super (struct super ext3_count_dirs(sb)); ext3_ext_init(sb); + ext3_mb_init(sb, needs_recovery); return 0; Index: linux-2.6.9/fs/ext3/extents.c =================================================================== --- linux-2.6.9.orig/fs/ext3/extents.c +++ linux-2.6.9/fs/ext3/extents.c @@ -820,7 +820,7 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext3_free_blocks(handle, tree->inode, ablocks[i], 1); + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); } } kfree(ablocks); @@ -1630,7 +1630,7 @@ int ext3_ext_rm_idx(handle_t *handle, st path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); - ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); return err; } @@ -2129,10 +2129,12 @@ ext3_remove_blocks(struct ext3_extents_t int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; - int i; + int i, metadata = 0; if (IS_ERR(handle)) return PTR_ERR(handle); + if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) + metadata = 1; if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; @@ -2144,7 +2146,7 @@ ext3_remove_blocks(struct ext3_extents_t bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } - ext3_free_blocks(handle, tree->inode, start, num); + ext3_free_blocks(handle, tree->inode, start, num, metadata); } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); @@ -2235,11 +2237,8 @@ int ext3_ext_get_block(handle_t *handle, struct ext3_extent *ex; int goal, newblock, err = 0, depth; struct ext3_extents_tree tree; - unsigned long next; - int allocated = 0; - - /* until we have multiblock allocation */ - max_blocks = 1; + unsigned long allocated = 0; + struct ext3_allocation_request ar; clear_buffer_new(bh_result); ext3_init_tree_desc(&tree, inode); @@ -2311,18 +2310,36 @@ int ext3_ext_get_block(handle_t *handle, goto out2; } + /* find neighbour allocated blocks */ + ar.lleft = iblock; + err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); + if (err) + goto out2; + ar.lright = iblock; + err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); + if (err) + goto out2; + /* find next allocated block so that we know how many * blocks we can allocate without ovelapping next extent */ - EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); - next = ext3_ext_next_allocated_block(path); - EXT_ASSERT(next > iblock); - allocated = next - iblock; + EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); + if (ar.pright == 0) + allocated = EXT_MAX_BLOCK - iblock; + else + allocated = ar.lright - iblock; if (allocated > max_blocks) allocated = max_blocks; /* allocate new block */ - goal = ext3_ext_find_goal(inode, path, iblock); - newblock = ext3_new_block(handle, inode, goal, &err); + ar.inode = inode; + ar.goal = ext3_ext_find_goal(inode, path, iblock); + ar.logical = iblock; + ar.len = allocated; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT3_MB_HINT_DATA; + else + ar.flags = 0; + newblock = ext3_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; ext_debug(&tree, "allocate new block: goal %d, found %d\n", @@ -2332,11 +2349,14 @@ int ext3_ext_get_block(handle_t *handle, newex.ee_block = iblock; newex.ee_start = newblock; newex.ee_start_hi = 0; - newex.ee_len = 1; + newex.ee_len = ar.len; err = ext3_ext_insert_extent(handle, &tree, path, &newex); if (err) { /* free data blocks we just allocated */ - ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); + /* not a good idea to call discard here directly, + * but otherwise we'd need to call it every free() */ + ext3_mb_discard_inode_preallocations(inode); + ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); goto out2; } @@ -2345,6 +2365,7 @@ int ext3_ext_get_block(handle_t *handle, /* previous routine could use block we allocated */ newblock = newex.ee_start; + allocated = newex.ee_len; set_buffer_new(bh_result); ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, @@ -2397,6 +2418,9 @@ void ext3_ext_truncate(struct inode * in down(&EXT3_I(inode)->truncate_sem); ext3_ext_invalidate_cache(&tree); + /* it's important to discard preallocations under truncate_sem */ + ext3_mb_discard_inode_preallocations(inode); + /* * TODO: optimization is possible here * probably we need not scaning at all, Index: linux-2.6.9/fs/ext3/Makefile =================================================================== --- linux-2.6.9.orig/fs/ext3/Makefile +++ linux-2.6.9/fs/ext3/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o \ - extents.o + extents.o mballoc.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o Index: linux-2.6.9/fs/ext3/xattr.c =================================================================== --- linux-2.6.9.orig/fs/ext3/xattr.c +++ linux-2.6.9/fs/ext3/xattr.c @@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle, new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext3_free_blocks(handle, inode, block, 1); + ext3_free_blocks(handle, inode, block, 1, 1); error = -EIO; goto cleanup; } @@ -1328,7 +1328,7 @@ getblk_failed: if (ce) mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); - ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to @@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle if (HDR(bh)->h_refcount == cpu_to_le32(1)) { if (ce) mb_cache_entry_free(ce); - ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); + ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { Index: linux-2.6.9/fs/ext3/balloc.c =================================================================== --- linux-2.6.9.orig/fs/ext3/balloc.c +++ linux-2.6.9/fs/ext3/balloc.c @@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; @@ -267,6 +267,8 @@ void ext3_discard_reservation(struct ino struct reserve_window_node *rsv = &ei->i_rsv_window; spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ext3_mb_discard_inode_preallocations(inode); + if (!rsv_is_empty(&rsv->rsv_window)) { spin_lock(rsv_lock); if (!rsv_is_empty(&rsv->rsv_window)) @@ -451,21 +453,25 @@ error_return: return; } -/* Free given blocks, update quota and i_blocks field */ -void ext3_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count) +void ext3_free_blocks(handle_t *handle, struct inode * inode, + unsigned long block, unsigned long count, int metadata) { - struct super_block * sb; - int dquot_freed_blocks; + struct super_block *sb; + int freed; + + /* this isn't the right place to decide whether block is metadata + * inode.c/extents.c knows better, but for safety ... */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || + ext3_should_journal_data(inode)) + metadata = 1; sb = inode->i_sb; - if (!sb) { - printk ("ext3_free_blocks: nonexistent device"); - return; - } - ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) - DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); + if (freed) + DQUOT_FREE_BLOCK(inode, freed); return; } @@ -1131,7 +1137,7 @@ int ext3_should_retry_alloc(struct super * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext3_new_block(handle_t *handle, struct inode *inode, +int ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, int *errp) { struct buffer_head *bitmap_bh = NULL; Index: linux-2.6.9/fs/ext3/inode.c =================================================================== --- linux-2.6.9.orig/fs/ext3/inode.c +++ linux-2.6.9/fs/ext3/inode.c @@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); return err; } @@ -673,7 +673,7 @@ err_out: if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, - le32_to_cpu(where[i].key), 1); + le32_to_cpu(where[i].key), 1, 0); return err; } @@ -1834,7 +1834,7 @@ ext3_clear_blocks(handle_t *handle, stru } } - ext3_free_blocks(handle, inode, block_to_free, count); + ext3_free_blocks(handle, inode, block_to_free, count, 0); } /** @@ -2007,7 +2007,7 @@ static void ext3_free_branches(handle_t ext3_journal_test_restart(handle, inode); } - ext3_free_blocks(handle, inode, nr, 1); + ext3_free_blocks(handle, inode, nr, 1, 1); if (parent_bh) { /*