Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h =================================================================== --- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_i.h 2007-03-28 02:13:37.000000000 +0400 +++ linux-2.6.5-7.283-full/include/linux/ext3_fs_i.h 2007-03-28 15:46:02.000000000 +0400 @@ -131,6 +131,10 @@ struct ext3_inode_info { struct inode vfs_inode; struct ext3_ext_cache i_cached_extent; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; }; #endif /* _LINUX_EXT3_FS_I */ Index: linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h =================================================================== --- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs_sb.h 2007-03-28 15:46:00.000000000 +0400 +++ linux-2.6.5-7.283-full/include/linux/ext3_fs_sb.h 2007-03-28 15:46:02.000000000 +0400 @@ -23,9 +23,16 @@ #define EXT_INCLUDE #include #include +#include #endif #endif #include +#include + +struct ext3_buddy_group_blocks; +struct ext3_locality_group; +struct ext3_mb_history; +#define EXT3_BB_MAX_BLOCKS /* * third extended-fs super-block data in memory Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h =================================================================== --- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 15:46:00.000000000 +0400 +++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h 2007-03-28 15:46:02.000000000 +0400 @@ -363,6 +363,7 @@ struct ext3_inode { #define EXT3_MOUNT_IOPEN_NOPRIV 0x100000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x200000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x400000/* Extents debug */ +#define EXT3_MOUNT_MBALLOC 0x800000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt @@ -723,8 +724,9 @@ struct dir_private_info { extern int ext3_bg_has_super(struct super_block *sb, int group); extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); +extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, - unsigned long); + unsigned long, int); extern unsigned long ext3_count_free_blocks (struct super_block *); extern void ext3_check_blocks_bitmap (struct super_block *); extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, Index: linux-2.6.5-7.283-full/fs/ext3/super.c =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 15:46:00.000000000 +0400 +++ linux-2.6.5-7.283-full/fs/ext3/super.c 2007-03-28 15:46:02.000000000 +0400 @@ -622,6 +622,7 @@ enum { Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_extents, Opt_noextents, Opt_extdebug, + Opt_mballoc, Opt_nomballoc, Opt_stripe, }; static match_table_t tokens = { @@ -669,6 +670,9 @@ static match_table_t tokens = { {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, {Opt_barrier, "barrier=%u"}, + {Opt_mballoc, "mballoc"}, + {Opt_nomballoc, "nomballoc"}, + {Opt_stripe, "stripe=%u"}, {Opt_err, NULL} }; @@ -893,6 +897,19 @@ static int parse_options (char * options case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; + case Opt_mballoc: + set_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_nomballoc: + clear_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_stripe: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_stripe = option; + break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " @@ -1548,6 +1565,7 @@ static int ext3_fill_super (struct super ext3_count_dirs(sb)); ext3_ext_init(sb); + ext3_mb_init(sb, needs_recovery); return 0; Index: linux-2.6.5-7.283-full/fs/ext3/extents.c =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/extents.c 2007-03-28 03:18:19.000000000 +0400 +++ linux-2.6.5-7.283-full/fs/ext3/extents.c 2007-03-28 15:46:02.000000000 +0400 @@ -779,7 +779,7 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext3_free_blocks(handle, tree->inode, ablocks[i], 1); + ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); } } kfree(ablocks); @@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); - ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); return err; } @@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; - int i; + int i, metadata = 0; if (IS_ERR(handle)) return PTR_ERR(handle); + if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode)) + metadata = 1; if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; @@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } - ext3_free_blocks(handle, tree->inode, start, num); + ext3_free_blocks(handle, tree->inode, start, num, metadata); } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); @@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle, struct ext3_extent *ex; int goal, newblock, err = 0, depth; struct ext3_extents_tree tree; - unsigned long next; - int allocated = 0; - - /* until we have multiblock allocation */ - max_blocks = 1; + unsigned long allocated = 0; + struct ext3_allocation_request ar; __clear_bit(BH_New, &bh_result->b_state); ext3_init_tree_desc(&tree, inode); @@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle, goto out2; } + /* find neighbour allocated blocks */ + ar.lleft = iblock; + err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft); + if (err) + goto out2; + ar.lright = iblock; + err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright); + if (err) + goto out2; + /* find next allocated block so that we know how many * blocks we can allocate without ovelapping next extent */ - EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len); - next = ext3_ext_next_allocated_block(path); - EXT_ASSERT(next > iblock); - allocated = next - iblock; + EXT_ASSERT(ar.pright == 0 || ar.lright > iblock); + if (ar.pright == 0) + allocated = EXT_MAX_BLOCK - iblock; + else + allocated = ar.lright - iblock; if (allocated > max_blocks) allocated = max_blocks; /* allocate new block */ - goal = ext3_ext_find_goal(inode, path, iblock); - newblock = ext3_new_block(handle, inode, goal, &err); + ar.inode = inode; + ar.goal = ext3_ext_find_goal(inode, path, iblock); + ar.logical = iblock; + ar.len = allocated; + ar.flags = EXT3_MB_HINT_DATA; + newblock = ext3_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; ext_debug(&tree, "allocate new block: goal %d, found %d\n", @@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle, newex.ee_block = iblock; newex.ee_start = newblock; newex.ee_start_hi = 0; - newex.ee_len = 1; + newex.ee_len = ar.len; err = ext3_ext_insert_extent(handle, &tree, path, &newex); if (err) { /* free data blocks we just allocated */ - ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len); + /* not a good idea to call discard here directly, + * but otherwise we'd need to call it every free() */ + ext3_mb_discard_inode_preallocations(inode); + ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0); goto out2; } @@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle, /* previous routine could use block we allocated */ newblock = newex.ee_start; + allocated = newex.ee_len; __set_bit(BH_New, &bh_result->b_state); ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, @@ -2341,6 +2359,9 @@ void ext3_ext_truncate(struct inode * in down(&EXT3_I(inode)->truncate_sem); ext3_ext_invalidate_cache(&tree); + /* it's important to discard preallocations under truncate_sem */ + ext3_mb_discard_inode_preallocations(inode); + /* * TODO: optimization is possible here * probably we need not scaning at all, Index: linux-2.6.5-7.283-full/fs/ext3/Makefile =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/Makefile 2007-03-28 15:27:39.000000000 +0400 +++ linux-2.6.5-7.283-full/fs/ext3/Makefile 2007-03-28 15:46:02.000000000 +0400 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ ioctl.o namei.o super.o symlink.o hash.o \ - extents.o + extents.o mballoc.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o Index: linux-2.6.5-7.283-full/fs/ext3/xattr.c =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/xattr.c 2007-03-28 02:13:37.000000000 +0400 +++ linux-2.6.5-7.283-full/fs/ext3/xattr.c 2007-03-28 15:46:02.000000000 +0400 @@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle, new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext3_free_blocks(handle, inode, block, 1); + ext3_free_blocks(handle, inode, block, 1, 1); error = -EIO; goto cleanup; } @@ -1411,7 +1411,7 @@ getblk_failed: if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { /* Free the old block. */ ea_bdebug(old_bh, "freeing"); - ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); + ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); /* ext3_forget() calls bforget() for us, but we let our caller release old_bh, so we need to @@ -1519,7 +1519,7 @@ ext3_xattr_delete_inode(handle_t *handle mb_cache_entry_free(ce); ce = NULL; } - ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); + ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c 2006-12-01 18:39:48.000000000 +0300 +++ linux-2.6.5-7.283-full/fs/ext3/balloc.c 2007-03-28 15:46:02.000000000 +0400 @@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_ * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; @@ -266,6 +266,8 @@ void ext3_discard_reservation(struct ino struct reserve_window_node *rsv = &ei->i_rsv_window; spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ext3_mb_discard_inode_preallocations(inode); + if (!rsv_is_empty(&rsv->rsv_window)) { spin_lock(rsv_lock); rsv_window_remove(inode->i_sb, rsv); @@ -274,7 +276,7 @@ void ext3_discard_reservation(struct ino } /* Free given blocks, update quota and i_blocks field */ -void ext3_free_blocks(handle_t *handle, struct inode *inode, +void ext3_free_blocks_old(handle_t *handle, struct inode *inode, unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; @@ -456,6 +458,29 @@ error_return: return; } +void ext3_free_blocks(handle_t *handle, struct inode * inode, + unsigned long block, unsigned long count, int metadata) +{ + struct super_block *sb; + int freed; + + /* this isn't the right place to decide whether block is metadata + * inode.c/extents.c knows better, but for safety ... */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || + ext3_should_journal_data(inode)) + metadata = 1; + + sb = inode->i_sb; + if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_old(handle, inode, block, count); + else { + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); + if (freed) + DQUOT_FREE_BLOCK(inode, freed); + } + return; +} + /* * For ext3 allocations, we must not reuse any blocks which are * allocated in the bitmap buffer's "last committed data" copy. This @@ -1142,7 +1167,7 @@ int ext3_should_retry_alloc(struct super * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext3_new_block(handle_t *handle, struct inode *inode, +int ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, int *errp) { struct buffer_head *bitmap_bh = NULL; Index: linux-2.6.5-7.283-full/fs/ext3/inode.c =================================================================== --- linux-2.6.5-7.283-full.orig/fs/ext3/inode.c 2007-03-28 02:50:19.000000000 +0400 +++ linux-2.6.5-7.283-full/fs/ext3/inode.c 2007-03-28 15:46:02.000000000 +0400 @@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) - ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); + ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0); return err; } @@ -675,7 +675,7 @@ err_out: if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, - le32_to_cpu(where[i].key), 1); + le32_to_cpu(where[i].key), 1, 0); return err; } @@ -1839,7 +1839,7 @@ ext3_clear_blocks(handle_t *handle, stru } } - ext3_free_blocks(handle, inode, block_to_free, count); + ext3_free_blocks(handle, inode, block_to_free, count, 0); } /** @@ -2010,7 +2010,7 @@ static void ext3_free_branches(handle_t ext3_journal_test_restart(handle, inode); } - ext3_free_blocks(handle, inode, nr, 1); + ext3_free_blocks(handle, inode, nr, 1, 1); if (parent_bh) { /*