Index: linux-2.6.18.8/include/linux/ext3_fs_i.h =================================================================== --- linux-2.6.18.8.orig/include/linux/ext3_fs_i.h 2007-06-21 14:20:14.000000000 +0200 +++ linux-2.6.18.8/include/linux/ext3_fs_i.h 2007-06-21 14:20:15.000000000 +0200 @@ -154,6 +154,10 @@ struct ext3_inode_info { struct inode vfs_inode; struct ext3_ext_cache i_cached_extent; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; }; #endif /* _LINUX_EXT3_FS_I */ Index: linux-2.6.18.8/include/linux/ext3_fs_sb.h =================================================================== --- linux-2.6.18.8.orig/include/linux/ext3_fs_sb.h 2007-06-21 14:20:15.000000000 +0200 +++ linux-2.6.18.8/include/linux/ext3_fs_sb.h 2007-06-21 14:20:15.000000000 +0200 @@ -21,8 +21,15 @@ #include #include #include +#include #endif #include +#include + +struct ext3_buddy_group_blocks; +struct ext3_locality_group; +struct ext3_mb_history; +#define EXT3_BB_MAX_BLOCKS /* * third extended-fs super-block data in memory Index: linux-2.6.18.8/include/linux/ext3_fs.h =================================================================== --- linux-2.6.18.8.orig/include/linux/ext3_fs.h 2007-06-21 14:20:15.000000000 +0200 +++ linux-2.6.18.8/include/linux/ext3_fs.h 2007-06-21 14:20:15.000000000 +0200 @@ -17,6 +17,7 @@ #define _LINUX_EXT3_FS_H #include +#include /* * The second extended filesystem constants/structures @@ -67,12 +68,12 @@ struct ext3_allocation_request { struct inode *inode; /* target inode for block we're allocating */ - unsigned long logical; /* logical block in target inode */ - unsigned long goal; /* phys. target (a hint) */ - unsigned long lleft; /* the closest logical allocated block to the left */ - unsigned long pleft; /* phys. block for ^^^ */ - unsigned long lright; /* the closest logical allocated block to the right */ - unsigned long pright; /* phys. block for ^^^ */ + ext3_fsblk_t logical; /* logical block in target inode */ + ext3_fsblk_t goal; /* phys. target (a hint) */ + ext3_fsblk_t lleft; /* the closest logical allocated block to the left */ + ext3_fsblk_t pleft; /* phys. block for ^^^ */ + ext3_fsblk_t lright; /* the closest logical allocated block to the right */ + ext3_fsblk_t pright; /* phys. block for ^^^ */ unsigned long len; /* how many blocks we want to allocate */ unsigned long flags; /* flags. see above EXT3_MB_HINT_* */ }; @@ -400,6 +401,7 @@ struct ext3_inode { #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x2000000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x4000000/* Extents debug */ +#define EXT3_MOUNT_MBALLOC 0x8000000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt @@ -787,12 +789,12 @@ ext3_group_first_block_no(struct super_b /* balloc.c */ extern int ext3_bg_has_super(struct super_block *sb, int group); extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); -extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode, +extern ext3_fsblk_t ext3_new_block_old (handle_t *handle, struct inode *inode, ext3_fsblk_t goal, int *errp); -extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode, +extern ext3_fsblk_t ext3_new_blocks_old (handle_t *handle, struct inode *inode, ext3_fsblk_t goal, unsigned long *count, int *errp); extern void ext3_free_blocks (handle_t *handle, struct inode *inode, - ext3_fsblk_t block, unsigned long count); + ext3_fsblk_t block, unsigned long count, int metadata); extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb, ext3_fsblk_t block, unsigned long count, unsigned long *pdquot_freed_blocks); @@ -836,15 +838,45 @@ extern long ext3_mb_stats; extern long ext3_mb_max_to_scan; extern int ext3_mb_init(struct super_block *, int); extern int ext3_mb_release(struct super_block *); -extern unsigned long ext3_mb_new_blocks(handle_t *, struct ext3_allocation_request *, int *); +extern ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp); +extern ext3_fsblk_t ext3_mb_new_blocks(handle_t *, + struct ext3_allocation_request *, int *); extern int ext3_mb_reserve_blocks(struct super_block *, int); extern void ext3_mb_release_blocks(struct super_block *, int); extern void ext3_mb_release_blocks(struct super_block *, int); extern void ext3_mb_discard_inode_preallocations(struct inode *); extern int __init init_ext3_proc(void); extern void exit_ext3_proc(void); -extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, unsigned long, int, int *); +extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, + unsigned long, int, unsigned long *); + +static inline ext3_fsblk_t ext3_new_blocks(handle_t *handle, + struct inode *inode, + ext3_fsblk_t goal, + unsigned long *count, int *errp) +{ + struct ext3_allocation_request ar; + ext3_fsblk_t ret; + if (!test_opt(inode->i_sb, MBALLOC)) { + ret = ext3_new_blocks_old(handle, inode, goal, count, errp); + return ret; + } + + ar.inode = inode; + ar.goal = goal; + ar.len = *count; + ar.logical = 0; + ar.lleft = 0; + ar.pleft = 0; + ar.lright = 0; + ar.pright = 0; + ar.flags = 0; + ret = ext3_mb_new_blocks(handle, &ar, errp); + *count = ar.len; + return ret; +} /* inode.c */ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, Index: linux-2.6.18.8/fs/ext3/super.c =================================================================== --- linux-2.6.18.8.orig/fs/ext3/super.c 2007-06-21 14:20:15.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/super.c 2007-06-21 14:20:15.000000000 +0200 @@ -688,6 +688,7 @@ enum { Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_grpquota, Opt_extents, Opt_noextents, Opt_extdebug, + Opt_mballoc, Opt_nomballoc, Opt_stripe }; static match_table_t tokens = { @@ -743,6 +744,9 @@ static match_table_t tokens = { {Opt_extents, "extents"}, {Opt_noextents, "noextents"}, {Opt_extdebug, "extdebug"}, + {Opt_mballoc, "mballoc"}, + {Opt_nomballoc, "nomballoc"}, + {Opt_stripe, "stripe=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -1096,6 +1100,19 @@ clear_qf_name: case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; + case Opt_mballoc: + set_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_nomballoc: + clear_opt(sbi->s_mount_opt, MBALLOC); + break; + case Opt_stripe: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_stripe = option; + break; default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " @@ -1826,6 +1843,7 @@ static int ext3_fill_super (struct super "writeback"); ext3_ext_init(sb); + ext3_mb_init(sb, needs_recovery); lock_kernel(); return 0; Index: linux-2.6.18.8/fs/ext3/extents.c =================================================================== --- linux-2.6.18.8.orig/fs/ext3/extents.c 2007-06-21 14:20:14.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/extents.c 2007-06-21 14:20:15.000000000 +0200 @@ -795,7 +795,7 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext3_free_blocks(handle, inode, ablocks[i], 1); + ext3_free_blocks(handle, inode, ablocks[i], 1, 1); } } kfree(ablocks); @@ -1613,7 +1613,7 @@ int ext3_ext_rm_idx(handle_t *handle, st ext_debug(inode, "index is empty, remove it, free block %lu\n", leaf); bh = sb_find_get_block(inode->i_sb, leaf); ext3_forget(handle, 1, inode, bh, leaf); - ext3_free_blocks(handle, inode, leaf, 1); + ext3_free_blocks(handle, inode, leaf, 1, 1); return err; } @@ -1672,7 +1672,7 @@ static int ext3_remove_blocks(handle_t * unsigned long from, unsigned long to) { struct buffer_head *bh; - int i; + int i, metadata = 0; #ifdef EXTENTS_STATS { @@ -1690,6 +1690,8 @@ static int ext3_remove_blocks(handle_t * spin_unlock(&sbi->s_ext_stats_lock); } #endif + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + metadata = 1; if (from >= le32_to_cpu(ex->ee_block) && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { /* tail removal */ @@ -1701,7 +1703,7 @@ static int ext3_remove_blocks(handle_t * bh = sb_find_get_block(inode->i_sb, start + i); ext3_forget(handle, 0, inode, bh, start + i); } - ext3_free_blocks(handle, inode, start, num); + ext3_free_blocks(handle, inode, start, num, metadata); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", @@ -2034,7 +2036,7 @@ int ext3_ext_get_blocks(handle_t *handle struct ext3_extent newex, *ex; int goal, newblock, err = 0, depth; unsigned long allocated = 0; - unsigned long next; + struct ext3_allocation_request ar; __clear_bit(BH_New, &bh_result->b_state); ext_debug(inode, "blocks %d/%lu requested for inode %u\n", (int) iblock, @@ -2116,18 +2118,33 @@ int ext3_ext_get_blocks(handle_t *handle if (S_ISREG(inode->i_mode) && (!EXT3_I(inode)->i_block_alloc_info)) ext3_init_block_alloc_info(inode); + /* find neighbour allocated blocks */ + ar.lleft = iblock; + err = ext3_ext_search_left(inode, path, &ar.lleft, &ar.pleft); + if (err) + goto out2; + ar.lright = iblock; + err = ext3_ext_search_right(inode, path, &ar.lright, &ar.pright); + if (err) + goto out2; + /* find next allocated block so that we know how many * blocks we can allocate without ovelapping next extent */ - BUG_ON(iblock < le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)); - next = ext3_ext_next_allocated_block(path); - BUG_ON(next <= iblock); - allocated = next - iblock; + BUG_ON(ar.pright != 0 && ar.lright <= iblock); + if (ar.pright == 0) + allocated = EXT_MAX_BLOCK - iblock; + else + allocated = ar.lright - iblock; if (allocated > max_blocks) allocated = max_blocks; /* allocate new block */ - goal = ext3_ext_find_goal(inode, path, iblock); - newblock = ext3_new_blocks(handle, inode, goal, &allocated, &err); + ar.inode = inode; + ar.goal = ext3_ext_find_goal(inode, path, iblock); + ar.logical = iblock; + ar.len = allocated; + ar.flags = EXT3_MB_HINT_DATA; + newblock = ext3_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; ext_debug(inode, "allocate new block: goal %d, found %d/%lu\n", @@ -2137,12 +2154,16 @@ int ext3_ext_get_blocks(handle_t *handle newex.ee_block = cpu_to_le32(iblock); newex.ee_start = cpu_to_le32(newblock); newex.ee_start_hi = 0; - newex.ee_len = cpu_to_le16(allocated); + newex.ee_len = cpu_to_le16(ar.len); err = ext3_ext_insert_extent(handle, inode, path, &newex); if (err) { /* free data blocks we just allocated */ - ext3_free_blocks(handle, inode, le32_to_cpu(newex.ee_start), - le16_to_cpu(newex.ee_len)); + /* not a good idea to call discard here directly, + * but otherwise we'd need to call it every free() */ + ext3_mb_discard_inode_preallocations(inode); + ext3_free_blocks(handle, inode, newex.ee_start, + newex.ee_len, 0); + goto out2; } @@ -2151,6 +2172,7 @@ int ext3_ext_get_blocks(handle_t *handle /* previous routine could use block we allocated */ newblock = le32_to_cpu(newex.ee_start); + allocated = le16_to_cpu(newex.ee_len); __set_bit(BH_New, &bh_result->b_state); ext3_ext_put_in_cache(inode, iblock, allocated, newblock, @@ -2202,6 +2224,9 @@ void ext3_ext_truncate(struct inode * in mutex_lock(&EXT3_I(inode)->truncate_mutex); ext3_ext_invalidate_cache(inode); + /* it's important to discard preallocations under truncate_mutex */ + ext3_mb_discard_inode_preallocations(inode); + /* * TODO: optimization is possible here * probably we need not scaning at all, Index: linux-2.6.18.8/fs/ext3/Makefile =================================================================== --- linux-2.6.18.8.orig/fs/ext3/Makefile 2007-06-21 14:20:14.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/Makefile 2007-06-21 14:20:15.000000000 +0200 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o mballoc.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o Index: linux-2.6.18.8/fs/ext3/xattr.c =================================================================== --- linux-2.6.18.8.orig/fs/ext3/xattr.c 2007-06-21 10:31:07.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/xattr.c 2007-06-21 14:20:15.000000000 +0200 @@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl ea_bdebug(bh, "refcount now=0; freeing"); if (ce) mb_cache_entry_free(ce); - ext3_free_blocks(handle, inode, bh->b_blocknr, 1); + ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); get_bh(bh); ext3_forget(handle, 1, inode, bh, bh->b_blocknr); } else { @@ -805,7 +805,7 @@ inserted: new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext3_free_blocks(handle, inode, block, 1); + ext3_free_blocks(handle, inode, block, 1, 1); error = -EIO; goto cleanup; } Index: linux-2.6.18.8/fs/ext3/balloc.c =================================================================== --- linux-2.6.18.8.orig/fs/ext3/balloc.c 2007-06-21 10:31:07.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/balloc.c 2007-06-21 14:20:15.000000000 +0200 @@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ * * Return buffer_head on success or NULL in case of failure. */ -static struct buffer_head * +struct buffer_head * read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; @@ -294,6 +294,8 @@ void ext3_discard_reservation(struct ino struct ext3_reserve_window_node *rsv; spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ext3_mb_discard_inode_preallocations(inode); + if (!block_i) return; @@ -490,21 +492,25 @@ error_return: return; } -/* Free given blocks, update quota and i_blocks field */ void ext3_free_blocks(handle_t *handle, struct inode *inode, - ext3_fsblk_t block, unsigned long count) + ext3_fsblk_t block, unsigned long count, int metadata) { - struct super_block * sb; - unsigned long dquot_freed_blocks; + struct super_block *sb; + unsigned long freed; + + /* this isn't the right place to decide whether block is metadata + * inode.c/extents.c knows better, but for safety ... */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || + ext3_should_journal_data(inode)) + metadata = 1; sb = inode->i_sb; - if (!sb) { - printk ("ext3_free_blocks: nonexistent device"); - return; - } - ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) - DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); + if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); + if (freed) + DQUOT_FREE_BLOCK(inode, freed); return; } @@ -1199,7 +1205,7 @@ int ext3_should_retry_alloc(struct super * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, +ext3_fsblk_t ext3_new_blocks_old(handle_t *handle, struct inode *inode, ext3_fsblk_t goal, unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; @@ -1463,7 +1469,7 @@ out: return 0; } -ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, +ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode, ext3_fsblk_t goal, int *errp) { unsigned long count = 1; Index: linux-2.6.18.8/fs/ext3/inode.c =================================================================== --- linux-2.6.18.8.orig/fs/ext3/inode.c 2007-06-21 14:20:14.000000000 +0200 +++ linux-2.6.18.8/fs/ext3/inode.c 2007-06-21 14:20:15.000000000 +0200 @@ -560,7 +560,7 @@ static int ext3_alloc_blocks(handle_t *h return ret; failed_out: for (i = 0; i bb_state); } -unsigned long ext3_grp_offs_to_block(struct super_block *sb, +ext3_fsblk_t ext3_grp_offs_to_block(struct super_block *sb, struct ext3_free_extent *fex) { - unsigned long block; + ext3_fsblk_t block; - block = (unsigned long) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) + block = (ext3_fsblk_t) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) + fex->fe_start + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); return block; @@ -3188,7 +3188,7 @@ void ext3_mb_collect_stats(struct ext3_a void ext3_mb_use_inode_pa(struct ext3_allocation_context *ac, struct ext3_prealloc_space *pa) { - unsigned long start, len; + ext3_fsblk_t start, len; /* found preallocated blocks, use them */ start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); @@ -4041,13 +4041,13 @@ int ext3_mb_discard_preallocations(struc * it tries to use preallocation first, then falls back * to usual allocation */ -unsigned long ext3_mb_new_blocks(handle_t *handle, +ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, struct ext3_allocation_request *ar, int *errp) { struct ext3_allocation_context ac; struct ext3_sb_info *sbi; struct super_block *sb; - unsigned long block; + ext3_fsblk_t block; int err, freed; sb = ar->inode->i_sb; @@ -4058,9 +4058,8 @@ unsigned long ext3_mb_new_blocks(handle_ if (ext3_mballoc_warning++ == 0) printk(KERN_ERR "EXT3-fs: multiblock request with " "mballoc disabled!\n"); - ar->len = 1; - err = ext3_new_block_old(handle, ar->inode, ar->goal, errp); - return err; + return ext3_new_blocks_old(handle, ar->inode, ar->goal, + &ar->len, errp); } ext3_mb_poll_new_transaction(sb, handle); @@ -4107,11 +4106,11 @@ repeat: } EXPORT_SYMBOL(ext3_mb_new_blocks); -int ext3_new_block(handle_t *handle, struct inode *inode, - unsigned long goal, int *errp) +ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp) { struct ext3_allocation_request ar; - unsigned long ret; + ext3_fsblk_t ret; if (!test_opt(inode->i_sb, MBALLOC)) { ret = ext3_new_block_old(handle, inode, goal, errp); @@ -4226,8 +4225,8 @@ int ext3_mb_free_metadata(handle_t *hand * Main entry point into mballoc to free blocks */ void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, - unsigned long block, unsigned long count, - int metadata, int *freed) + ext3_fsblk_t block, unsigned long count, + int metadata, unsigned long *freed) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb;