From 4784d1d291ff6d2fff570a4ad9dd6f3f401ea776 Mon Sep 17 00:00:00 2001 From: johann Date: Tue, 17 Jul 2007 06:56:36 +0000 Subject: [PATCH 1/1] Branch b1_6 b=11039 i=alex i=adilger attachment 11236 / mballoc3 support for 2.6.18. --- .../patches/ext3-mballoc3-2.6.18.patch | 606 +++++++++++++++++++++ 1 file changed, 606 insertions(+) create mode 100644 ldiskfs/kernel_patches/patches/ext3-mballoc3-2.6.18.patch diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc3-2.6.18.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc3-2.6.18.patch new file mode 100644 index 0000000..04eb2a2 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc3-2.6.18.patch @@ -0,0 +1,606 @@ +Index: linux-2.6.18.8/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.18.8.orig/include/linux/ext3_fs_i.h 2007-06-21 14:20:14.000000000 +0200 ++++ linux-2.6.18.8/include/linux/ext3_fs_i.h 2007-06-21 14:20:15.000000000 +0200 +@@ -154,6 +154,10 @@ struct ext3_inode_info { + struct inode vfs_inode; + + struct ext3_ext_cache i_cached_extent; ++ ++ /* mballoc */ ++ struct list_head i_prealloc_list; ++ spinlock_t i_prealloc_lock; + }; + + #endif /* _LINUX_EXT3_FS_I */ +Index: linux-2.6.18.8/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.18.8.orig/include/linux/ext3_fs_sb.h 2007-06-21 14:20:15.000000000 +0200 ++++ linux-2.6.18.8/include/linux/ext3_fs_sb.h 2007-06-21 14:20:15.000000000 +0200 +@@ -21,8 +21,15 @@ + #include + #include + #include ++#include + #endif + #include ++#include ++ ++struct ext3_buddy_group_blocks; ++struct ext3_locality_group; ++struct ext3_mb_history; ++#define EXT3_BB_MAX_BLOCKS + + /* + * third extended-fs super-block data in memory +Index: linux-2.6.18.8/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.18.8.orig/include/linux/ext3_fs.h 2007-06-21 14:20:15.000000000 +0200 ++++ linux-2.6.18.8/include/linux/ext3_fs.h 2007-06-21 14:20:15.000000000 +0200 +@@ -17,6 +17,7 @@ + #define _LINUX_EXT3_FS_H + + #include ++#include + + /* + * The second extended filesystem constants/structures +@@ -67,12 +68,12 @@ + + struct ext3_allocation_request { + struct inode *inode; /* target inode for block we're allocating */ +- unsigned long logical; /* logical block in target inode */ +- unsigned long goal; /* phys. target (a hint) */ +- unsigned long lleft; /* the closest logical allocated block to the left */ +- unsigned long pleft; /* phys. block for ^^^ */ +- unsigned long lright; /* the closest logical allocated block to the right */ +- unsigned long pright; /* phys. block for ^^^ */ ++ ext3_fsblk_t logical; /* logical block in target inode */ ++ ext3_fsblk_t goal; /* phys. target (a hint) */ ++ ext3_fsblk_t lleft; /* the closest logical allocated block to the left */ ++ ext3_fsblk_t pleft; /* phys. block for ^^^ */ ++ ext3_fsblk_t lright; /* the closest logical allocated block to the right */ ++ ext3_fsblk_t pright; /* phys. block for ^^^ */ + unsigned long len; /* how many blocks we want to allocate */ + unsigned long flags; /* flags. see above EXT3_MB_HINT_* */ + }; +@@ -400,6 +401,7 @@ struct ext3_inode { + #define EXT3_MOUNT_IOPEN_NOPRIV 0x800000/* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x2000000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x4000000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x8000000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -787,12 +789,12 @@ ext3_group_first_block_no(struct super_b + /* balloc.c */ + extern int ext3_bg_has_super(struct super_block *sb, int group); + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); +-extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode, ++extern ext3_fsblk_t ext3_new_block_old (handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp); +-extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode, ++extern ext3_fsblk_t ext3_new_blocks_old (handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, unsigned long *count, int *errp); + extern void ext3_free_blocks (handle_t *handle, struct inode *inode, +- ext3_fsblk_t block, unsigned long count); ++ ext3_fsblk_t block, unsigned long count, int metadata); + extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb, + ext3_fsblk_t block, unsigned long count, + unsigned long *pdquot_freed_blocks); +@@ -836,15 +838,45 @@ extern long ext3_mb_stats; + extern long ext3_mb_max_to_scan; + extern int ext3_mb_init(struct super_block *, int); + extern int ext3_mb_release(struct super_block *); +-extern unsigned long ext3_mb_new_blocks(handle_t *, struct ext3_allocation_request *, int *); ++extern ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, ++ ext3_fsblk_t goal, int *errp); ++extern ext3_fsblk_t ext3_mb_new_blocks(handle_t *, ++ struct ext3_allocation_request *, int *); + extern int ext3_mb_reserve_blocks(struct super_block *, int); + extern void ext3_mb_release_blocks(struct super_block *, int); + extern void ext3_mb_release_blocks(struct super_block *, int); + extern void ext3_mb_discard_inode_preallocations(struct inode *); + extern int __init init_ext3_proc(void); + extern void exit_ext3_proc(void); +-extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, unsigned long, int, int *); ++extern void ext3_mb_free_blocks(handle_t *, struct inode *, unsigned long, ++ unsigned long, int, unsigned long *); ++ ++static inline ext3_fsblk_t ext3_new_blocks(handle_t *handle, ++ struct inode *inode, ++ ext3_fsblk_t goal, ++ unsigned long *count, int *errp) ++{ ++ struct ext3_allocation_request ar; ++ ext3_fsblk_t ret; + ++ if (!test_opt(inode->i_sb, MBALLOC)) { ++ ret = ext3_new_blocks_old(handle, inode, goal, count, errp); ++ return ret; ++ } ++ ++ ar.inode = inode; ++ ar.goal = goal; ++ ar.len = *count; ++ ar.logical = 0; ++ ar.lleft = 0; ++ ar.pleft = 0; ++ ar.lright = 0; ++ ar.pright = 0; ++ ar.flags = 0; ++ ret = ext3_mb_new_blocks(handle, &ar, errp); ++ *count = ar.len; ++ return ret; ++} + + /* inode.c */ + int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, +Index: linux-2.6.18.8/fs/ext3/super.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/super.c 2007-06-21 14:20:15.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/super.c 2007-06-21 14:20:15.000000000 +0200 +@@ -688,6 +688,7 @@ enum { + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_grpquota, + Opt_extents, Opt_noextents, Opt_extdebug, ++ Opt_mballoc, Opt_nomballoc, Opt_stripe + }; + + static match_table_t tokens = { +@@ -743,6 +744,9 @@ static match_table_t tokens = { + {Opt_extents, "extents"}, + {Opt_noextents, "noextents"}, + {Opt_extdebug, "extdebug"}, ++ {Opt_mballoc, "mballoc"}, ++ {Opt_nomballoc, "nomballoc"}, ++ {Opt_stripe, "stripe=%u"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, + }; +@@ -1096,6 +1100,19 @@ clear_qf_name: + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_nomballoc: ++ clear_opt(sbi->s_mount_opt, MBALLOC); ++ break; ++ case Opt_stripe: ++ if (match_int(&args[0], &option)) ++ return 0; ++ if (option < 0) ++ return 0; ++ sbi->s_stripe = option; ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1826,6 +1843,7 @@ static int ext3_fill_super (struct super + "writeback"); + + ext3_ext_init(sb); ++ ext3_mb_init(sb, needs_recovery); + + lock_kernel(); + return 0; +Index: linux-2.6.18.8/fs/ext3/extents.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/extents.c 2007-06-21 14:20:14.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/extents.c 2007-06-21 14:20:15.000000000 +0200 +@@ -795,7 +795,7 @@ cleanup: + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, inode, ablocks[i], 1); ++ ext3_free_blocks(handle, inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1613,7 +1613,7 @@ int ext3_ext_rm_idx(handle_t *handle, st + ext_debug(inode, "index is empty, remove it, free block %lu\n", leaf); + bh = sb_find_get_block(inode->i_sb, leaf); + ext3_forget(handle, 1, inode, bh, leaf); +- ext3_free_blocks(handle, inode, leaf, 1); ++ ext3_free_blocks(handle, inode, leaf, 1, 1); + return err; + } + +@@ -1672,7 +1672,7 @@ static int ext3_remove_blocks(handle_t * + unsigned long from, unsigned long to) + { + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + #ifdef EXTENTS_STATS + { +@@ -1690,6 +1690,8 @@ static int ext3_remove_blocks(handle_t * + spin_unlock(&sbi->s_ext_stats_lock); + } + #endif ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) ++ metadata = 1; + if (from >= le32_to_cpu(ex->ee_block) + && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + /* tail removal */ +@@ -1701,7 +1703,7 @@ static int ext3_remove_blocks(handle_t * + bh = sb_find_get_block(inode->i_sb, start + i); + ext3_forget(handle, 0, inode, bh, start + i); + } +- ext3_free_blocks(handle, inode, start, num); ++ ext3_free_blocks(handle, inode, start, num, metadata); + } else if (from == le32_to_cpu(ex->ee_block) + && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", +@@ -2034,7 +2036,7 @@ int ext3_ext_get_blocks(handle_t *handle + struct ext3_extent newex, *ex; + int goal, newblock, err = 0, depth; + unsigned long allocated = 0; +- unsigned long next; ++ struct ext3_allocation_request ar; + + __clear_bit(BH_New, &bh_result->b_state); + ext_debug(inode, "blocks %d/%lu requested for inode %u\n", (int) iblock, +@@ -2116,18 +2118,33 @@ int ext3_ext_get_blocks(handle_t *handle + if (S_ISREG(inode->i_mode) && (!EXT3_I(inode)->i_block_alloc_info)) + ext3_init_block_alloc_info(inode); + ++ /* find neighbour allocated blocks */ ++ ar.lleft = iblock; ++ err = ext3_ext_search_left(inode, path, &ar.lleft, &ar.pleft); ++ if (err) ++ goto out2; ++ ar.lright = iblock; ++ err = ext3_ext_search_right(inode, path, &ar.lright, &ar.pright); ++ if (err) ++ goto out2; ++ + /* find next allocated block so that we know how many + * blocks we can allocate without ovelapping next extent */ +- BUG_ON(iblock < le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)); +- next = ext3_ext_next_allocated_block(path); +- BUG_ON(next <= iblock); +- allocated = next - iblock; ++ BUG_ON(ar.pright != 0 && ar.lright <= iblock); ++ if (ar.pright == 0) ++ allocated = EXT_MAX_BLOCK - iblock; ++ else ++ allocated = ar.lright - iblock; + if (allocated > max_blocks) + allocated = max_blocks; + + /* allocate new block */ +- goal = ext3_ext_find_goal(inode, path, iblock); +- newblock = ext3_new_blocks(handle, inode, goal, &allocated, &err); ++ ar.inode = inode; ++ ar.goal = ext3_ext_find_goal(inode, path, iblock); ++ ar.logical = iblock; ++ ar.len = allocated; ++ ar.flags = EXT3_MB_HINT_DATA; ++ newblock = ext3_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; + ext_debug(inode, "allocate new block: goal %d, found %d/%lu\n", +@@ -2137,12 +2154,16 @@ int ext3_ext_get_blocks(handle_t *handle + newex.ee_block = cpu_to_le32(iblock); + newex.ee_start = cpu_to_le32(newblock); + newex.ee_start_hi = 0; +- newex.ee_len = cpu_to_le16(allocated); ++ newex.ee_len = cpu_to_le16(ar.len); + err = ext3_ext_insert_extent(handle, inode, path, &newex); + if (err) { + /* free data blocks we just allocated */ +- ext3_free_blocks(handle, inode, le32_to_cpu(newex.ee_start), +- le16_to_cpu(newex.ee_len)); ++ /* not a good idea to call discard here directly, ++ * but otherwise we'd need to call it every free() */ ++ ext3_mb_discard_inode_preallocations(inode); ++ ext3_free_blocks(handle, inode, newex.ee_start, ++ newex.ee_len, 0); ++ + goto out2; + } + +@@ -2151,6 +2172,7 @@ int ext3_ext_get_blocks(handle_t *handle + + /* previous routine could use block we allocated */ + newblock = le32_to_cpu(newex.ee_start); ++ allocated = le16_to_cpu(newex.ee_len); + __set_bit(BH_New, &bh_result->b_state); + + ext3_ext_put_in_cache(inode, iblock, allocated, newblock, +@@ -2202,6 +2224,9 @@ void ext3_ext_truncate(struct inode * in + mutex_lock(&EXT3_I(inode)->truncate_mutex); + ext3_ext_invalidate_cache(inode); + ++ /* it's important to discard preallocations under truncate_mutex */ ++ ext3_mb_discard_inode_preallocations(inode); ++ + /* + * TODO: optimization is possible here + * probably we need not scaning at all, +Index: linux-2.6.18.8/fs/ext3/Makefile +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/Makefile 2007-06-21 14:20:14.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/Makefile 2007-06-21 14:20:15.000000000 +0200 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o ++ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.18.8/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/xattr.c 2007-06-21 10:31:07.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/xattr.c 2007-06-21 14:20:15.000000000 +0200 +@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl + ea_bdebug(bh, "refcount now=0; freeing"); + if (ce) + mb_cache_entry_free(ce); +- ext3_free_blocks(handle, inode, bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, bh->b_blocknr); + } else { +@@ -805,7 +805,7 @@ inserted: + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +Index: linux-2.6.18.8/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/balloc.c 2007-06-21 10:31:07.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/balloc.c 2007-06-21 14:20:15.000000000 +0200 +@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -294,6 +294,8 @@ void ext3_discard_reservation(struct ino + struct ext3_reserve_window_node *rsv; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + ++ ext3_mb_discard_inode_preallocations(inode); ++ + if (!block_i) + return; + +@@ -490,21 +492,25 @@ error_return: + return; + } + +-/* Free given blocks, update quota and i_blocks field */ + void ext3_free_blocks(handle_t *handle, struct inode *inode, +- ext3_fsblk_t block, unsigned long count) ++ ext3_fsblk_t block, unsigned long count, int metadata) + { +- struct super_block * sb; +- unsigned long dquot_freed_blocks; ++ struct super_block *sb; ++ unsigned long freed; ++ ++ /* this isn't the right place to decide whether block is metadata ++ * inode.c/extents.c knows better, but for safety ... */ ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || ++ ext3_should_journal_data(inode)) ++ metadata = 1; + + sb = inode->i_sb; +- if (!sb) { +- printk ("ext3_free_blocks: nonexistent device"); +- return; +- } +- ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); +- if (dquot_freed_blocks) +- DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) ++ ext3_free_blocks_sb(handle, sb, block, count, &freed); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); ++ if (freed) ++ DQUOT_FREE_BLOCK(inode, freed); + return; + } + +@@ -1199,7 +1205,7 @@ int ext3_should_retry_alloc(struct super + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, ++ext3_fsblk_t ext3_new_blocks_old(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, unsigned long *count, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +@@ -1463,7 +1469,7 @@ out: + return 0; + } + +-ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, ++ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode, + ext3_fsblk_t goal, int *errp) + { + unsigned long count = 1; +Index: linux-2.6.18.8/fs/ext3/inode.c +=================================================================== +--- linux-2.6.18.8.orig/fs/ext3/inode.c 2007-06-21 14:20:14.000000000 +0200 ++++ linux-2.6.18.8/fs/ext3/inode.c 2007-06-21 14:20:15.000000000 +0200 +@@ -560,7 +560,7 @@ static int ext3_alloc_blocks(handle_t *h + return ret; + failed_out: + for (i = 0; i bb_state); + } + +-unsigned long ext3_grp_offs_to_block(struct super_block *sb, ++ext3_fsblk_t ext3_grp_offs_to_block(struct super_block *sb, + struct ext3_free_extent *fex) + { +- unsigned long block; ++ ext3_fsblk_t block; + +- block = (unsigned long) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) ++ block = (ext3_fsblk_t) fex->fe_group * EXT3_BLOCKS_PER_GROUP(sb) + + fex->fe_start + + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); + return block; +@@ -3188,7 +3188,7 @@ void ext3_mb_collect_stats(struct ext3_a + void ext3_mb_use_inode_pa(struct ext3_allocation_context *ac, + struct ext3_prealloc_space *pa) + { +- unsigned long start, len; ++ ext3_fsblk_t start, len; + + /* found preallocated blocks, use them */ + start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); +@@ -4041,13 +4041,13 @@ int ext3_mb_discard_preallocations(struc + * it tries to use preallocation first, then falls back + * to usual allocation + */ +-unsigned long ext3_mb_new_blocks(handle_t *handle, ++ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, + struct ext3_allocation_request *ar, int *errp) + { + struct ext3_allocation_context ac; + struct ext3_sb_info *sbi; + struct super_block *sb; +- unsigned long block; ++ ext3_fsblk_t block; + int err, freed; + + sb = ar->inode->i_sb; +@@ -4058,9 +4058,8 @@ unsigned long ext3_mb_new_blocks(handle_ + if (ext3_mballoc_warning++ == 0) + printk(KERN_ERR "EXT3-fs: multiblock request with " + "mballoc disabled!\n"); +- ar->len = 1; +- err = ext3_new_block_old(handle, ar->inode, ar->goal, errp); +- return err; ++ return ext3_new_blocks_old(handle, ar->inode, ar->goal, ++ &ar->len, errp); + } + + ext3_mb_poll_new_transaction(sb, handle); +@@ -4107,11 +4106,11 @@ repeat: + } + EXPORT_SYMBOL(ext3_mb_new_blocks); + +-int ext3_new_block(handle_t *handle, struct inode *inode, +- unsigned long goal, int *errp) ++ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, ++ ext3_fsblk_t goal, int *errp) + { + struct ext3_allocation_request ar; +- unsigned long ret; ++ ext3_fsblk_t ret; + + if (!test_opt(inode->i_sb, MBALLOC)) { + ret = ext3_new_block_old(handle, inode, goal, errp); +@@ -4226,8 +4225,8 @@ int ext3_mb_free_metadata(handle_t *hand + * Main entry point into mballoc to free blocks + */ + void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, +- unsigned long block, unsigned long count, +- int metadata, int *freed) ++ ext3_fsblk_t block, unsigned long count, ++ int metadata, unsigned long *freed) + { + struct buffer_head *bitmap_bh = NULL; + struct super_block *sb = inode->i_sb; -- 1.8.3.1