X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=ldiskfs%2Fkernel_patches%2Fpatches%2Fext4-prealloc-rhel5.patch;h=dda25ed27fdfa0f527776a41a5133f9e53d29c79;hb=898d16e4a9265be32af41bea56f96ddf6e83877d;hp=34d0472fa01e01d0fb8c51b333d726b1ac2b2e8b;hpb=b175e2441b0cd9fae60341ba92b0f7f192e71446;p=fs%2Flustre-release.git diff --git a/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch index 34d0472..dda25ed 100644 --- a/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch +++ b/ldiskfs/kernel_patches/patches/ext4-prealloc-rhel5.patch @@ -1,38 +1,53 @@ -Index: linux-2.6.18-128.1.6/fs/ext4/ext4_sb.h +Index: linux-2.6.18-128.1.6/fs/ext4/super.c =================================================================== ---- linux-2.6.18-128.1.6.orig/fs/ext4/ext4_sb.h 2009-05-28 17:16:51.000000000 +0530 -+++ linux-2.6.18-128.1.6/fs/ext4/ext4_sb.h 2009-05-28 17:16:52.000000000 +0530 +--- linux-2.6.18-128.1.6.orig/fs/ext4/super.c ++++ linux-2.6.18-128.1.6/fs/ext4/super.c +@@ -108,7 +108,8 @@ + EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); +-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); ++EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req); ++EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req); + EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); + EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size); + +@@ -108,7 +108,8 @@ + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), + ATTR_LIST(mb_order2_req), +- ATTR_LIST(mb_stream_req), ++ ATTR_LIST(mb_small_req), ++ ATTR_LIST(mb_large_req), + ATTR_LIST(mb_group_prealloc), + ATTR_LIST(max_dir_size), + NULL, +Index: linux-2.6.18-128.1.6/fs/ext4/ext4.h +=================================================================== +--- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h 2009-05-28 17:16:51.000000000 +0530 ++++ linux-2.6.18-128.1.6/fs/ext4/ext4.h 2009-05-28 17:16:52.000000000 +0530 @@ -108,11 +108,14 @@ /* tunables */ unsigned long s_stripe; -- unsigned long s_mb_stream_request; +- unsigned int s_mb_stream_request; + unsigned long s_mb_small_req; + unsigned long s_mb_large_req; - unsigned long s_mb_max_to_scan; - unsigned long s_mb_min_to_scan; - unsigned long s_mb_stats; - unsigned long s_mb_order2_reqs; + unsigned int s_mb_max_to_scan; + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; + unsigned long *s_mb_prealloc_table; + unsigned long s_mb_prealloc_table_size; - unsigned long s_mb_group_prealloc; + unsigned int s_mb_group_prealloc; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c =================================================================== --- linux-2.6.18-128.1.6.orig/fs/ext4/mballoc.c 2009-05-28 17:16:51.000000000 +0530 +++ linux-2.6.18-128.1.6/fs/ext4/mballoc.c 2009-05-28 17:19:57.000000000 +0530 -@@ -1744,7 +1744,7 @@ - if (size < isize) - size = isize; - -- if (size < sbi->s_mb_stream_request && -+ if ((ac->ac_g_ex.fe_len < sbi->s_mb_large_req) && - (ac->ac_flags & EXT4_MB_HINT_DATA)) { - /* TBD: may be hot point */ - spin_lock(&sbi->s_md_lock); -@@ -2484,6 +2484,26 @@ - return -ENOMEM; +@@ -2284,6 +2284,26 @@ + } } +static void ext4_mb_prealloc_table_add(struct ext4_sb_info *sbi, int value) @@ -55,81 +70,16 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c +} + + - int ext4_mb_init(struct super_block *sb, int needs_recovery) + static int ext4_mb_good_group(struct ext4_allocation_context *ac, + ext4_group_t group, int cr) { - struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -2542,15 +2562,59 @@ - sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; - sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; - sbi->s_mb_stats = MB_DEFAULT_STATS; -- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; - sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; - sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; -- sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; -+ -+ if (sbi->s_stripe == 0) { -+ sbi->s_mb_prealloc_table_size = 8; -+ i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long); -+ sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS); -+ if (sbi->s_mb_prealloc_table == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ memset(sbi->s_mb_prealloc_table, 0, i); +@@ -2325,6 +2389,80 @@ + .llseek = seq_lseek, + .release = seq_release, + }; + -+ ext4_mb_prealloc_table_add(sbi, 4); -+ ext4_mb_prealloc_table_add(sbi, 8); -+ ext4_mb_prealloc_table_add(sbi, 16); -+ ext4_mb_prealloc_table_add(sbi, 32); -+ ext4_mb_prealloc_table_add(sbi, 64); -+ ext4_mb_prealloc_table_add(sbi, 128); -+ ext4_mb_prealloc_table_add(sbi, 256); -+ ext4_mb_prealloc_table_add(sbi, 512); -+ -+ sbi->s_mb_small_req = 256; -+ sbi->s_mb_large_req = 1024; -+ sbi->s_mb_group_prealloc = 512; -+ } else { -+ sbi->s_mb_prealloc_table_size = 3; -+ i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long); -+ sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS); -+ if (sbi->s_mb_prealloc_table == NULL) { -+ clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_offsets); -+ kfree(sbi->s_mb_maxs); -+ return -ENOMEM; -+ } -+ memset(sbi->s_mb_prealloc_table, 0, i); -+ -+ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe); -+ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 2); -+ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 4); -+ -+ sbi->s_mb_small_req = sbi->s_stripe; -+ sbi->s_mb_large_req = sbi->s_stripe * 8; -+ sbi->s_mb_group_prealloc = sbi->s_stripe * 4; -+ } - - i = sizeof(struct ext4_locality_group) * num_possible_cpus(); - sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); - if (sbi->s_locality_groups == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); -+ kfree(sbi->s_mb_prealloc_table); - kfree(sbi->s_mb_offsets); - kfree(sbi->s_mb_maxs); - return -ENOMEM; -@@ -2725,10 +2789,82 @@ - #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" - #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" - #define EXT4_MB_ORDER2_REQ "order2_req" --#define EXT4_MB_STREAM_REQ "stream_req" -+#define EXT4_MB_SMALL_REQ "small_req" -+#define EXT4_MB_LARGE_REQ "large_req" +#define EXT4_MB_PREALLOC_TABLE "prealloc_table" - #define EXT4_MB_GROUP_PREALLOC "group_prealloc" - ++ +static int ext4_mb_prealloc_table_proc_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ @@ -182,7 +132,7 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c + prev = value; + num++; + } - ++ + new_table = kmalloc(num * sizeof(*new_table), GFP_KERNEL); + if (new_table == NULL) + return -ENOMEM; @@ -202,69 +152,90 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c + return cnt; +} - #define MB_PROC_FOPS(name) \ - static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ -@@ -2774,7 +2910,8 @@ - MB_PROC_FOPS(max_to_scan); - MB_PROC_FOPS(min_to_scan); - MB_PROC_FOPS(order2_reqs); --MB_PROC_FOPS(stream_request); -+MB_PROC_FOPS(small_req); -+MB_PROC_FOPS(large_req); - MB_PROC_FOPS(group_prealloc); - - #define MB_PROC_HANDLER(name, var) \ -@@ -2795,6 +2932,7 @@ - mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct proc_dir_entry *proc; -+ struct proc_dir_entry *proc_entry; - char devname[64]; - - if (proc_root_ext4 == NULL) { -@@ -2808,15 +2946,29 @@ - MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); - MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); - MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); -- MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); -+ MB_PROC_HANDLER(EXT4_MB_SMALL_REQ, small_req); -+ MB_PROC_HANDLER(EXT4_MB_LARGE_REQ, large_req); - MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); - -+ proc_entry = create_proc_entry(EXT4_MB_PREALLOC_TABLE, S_IFREG | -+ S_IRUGO | S_IWUSR, sbi->s_mb_proc); -+ if (proc_entry == NULL) { -+ printk(KERN_ERR "EXT4-fs: unable to create %s\n", -+ EXT4_MB_PREALLOC_TABLE); -+ goto err_out; -+ } -+ proc_entry->data = sbi; -+ proc_entry->read_proc = ext4_mb_prealloc_table_proc_read; -+ proc_entry->write_proc = ext4_mb_prealloc_table_proc_write; + static void ext4_mb_history_release(struct super_block *sb) + { +@@ -2400,6 +2400,7 @@ + remove_proc_entry("mb_groups", sbi->s_proc); + if (sbi->s_mb_history_max) + remove_proc_entry("mb_history", sbi->s_proc); ++ remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc); + } + kfree(sbi->s_mb_history); + } +@@ -2408,6 +2446,13 @@ + p->proc_fops = &ext4_mb_seq_groups_fops; + p->data = sb; + } ++ p = create_proc_entry(EXT4_MB_PREALLOC_TABLE, S_IFREG | ++ S_IRUGO | S_IWUSR, sbi->s_proc); ++ if (p) { ++ p->data = sbi; ++ p->read_proc = ext4_mb_prealloc_table_proc_read; ++ p->write_proc = ext4_mb_prealloc_table_proc_write; ++ } + } + + sbi->s_mb_history_cur = 0; +@@ -2542,13 +2562,57 @@ + sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; + sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; + sbi->s_mb_stats = MB_DEFAULT_STATS; +- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; + sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; + sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; +- sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; + - return 0; - - err_out: - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); -- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); -@@ -2838,7 +2990,9 @@ ++ if (sbi->s_stripe == 0) { ++ sbi->s_mb_prealloc_table_size = 10; ++ i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long); ++ sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS); ++ if (sbi->s_mb_prealloc_table == NULL) { ++ kfree(sbi->s_mb_offsets); ++ kfree(sbi->s_mb_maxs); ++ return -ENOMEM; ++ } ++ memset(sbi->s_mb_prealloc_table, 0, i); ++ ++ ext4_mb_prealloc_table_add(sbi, 4); ++ ext4_mb_prealloc_table_add(sbi, 8); ++ ext4_mb_prealloc_table_add(sbi, 16); ++ ext4_mb_prealloc_table_add(sbi, 32); ++ ext4_mb_prealloc_table_add(sbi, 64); ++ ext4_mb_prealloc_table_add(sbi, 128); ++ ext4_mb_prealloc_table_add(sbi, 256); ++ ext4_mb_prealloc_table_add(sbi, 512); ++ ext4_mb_prealloc_table_add(sbi, 1024); ++ ext4_mb_prealloc_table_add(sbi, 2048); ++ ++ sbi->s_mb_small_req = 256; ++ sbi->s_mb_large_req = 1024; ++ sbi->s_mb_group_prealloc = 512; ++ } else { ++ sbi->s_mb_prealloc_table_size = 3; ++ i = sbi->s_mb_prealloc_table_size * sizeof(unsigned long); ++ sbi->s_mb_prealloc_table = kmalloc(i, GFP_NOFS); ++ if (sbi->s_mb_prealloc_table == NULL) { ++ kfree(sbi->s_mb_offsets); ++ kfree(sbi->s_mb_maxs); ++ return -ENOMEM; ++ } ++ memset(sbi->s_mb_prealloc_table, 0, i); ++ ++ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe); ++ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 2); ++ ext4_mb_prealloc_table_add(sbi, sbi->s_stripe * 4); ++ ++ sbi->s_mb_small_req = sbi->s_stripe; ++ sbi->s_mb_large_req = sbi->s_stripe * 8; ++ sbi->s_mb_group_prealloc = sbi->s_stripe * 4; ++ } - bdevname(sb->s_bdev, devname); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); -- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_LARGE_REQ, sbi->s_mb_proc); -+ remove_proc_entry(EXT4_MB_SMALL_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); + if (sbi->s_locality_groups == NULL) { ++ kfree(sbi->s_mb_prealloc_table); + kfree(sbi->s_mb_offsets); + kfree(sbi->s_mb_maxs); + return -ENOMEM; @@ -3032,11 +3186,12 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) @@ -358,48 +329,50 @@ Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.c @@ -3185,7 +3326,6 @@ } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && - start > ac->ac_o_ex.fe_logical); -- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + start > ac->ac_o_ex.fe_logical); +- BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ -@@ -4077,22 +4217,32 @@ - { - struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - int bsbits = ac->ac_sb->s_blocksize_bits; -- loff_t size, isize; -+ loff_t size; - - if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) - return; - -- size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; -- isize = i_size_read(ac->ac_inode) >> bsbits; -- size = max(size, isize); -- -- /* don't use group allocation for large files */ -- if (size >= sbi->s_mb_stream_request) -+ if (ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) - return; +@@ -4077,11 +4217,17 @@ - if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) - return; + /* don't use group allocation for large files */ + size = max(size, isize); +- if (size >= sbi->s_mb_stream_request) { ++ if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) || ++ (size >= sbi->s_mb_large_req)) { + ac->ac_flags |= EXT4_MB_STREAM_ALLOC; + return; + } + /* request is so large that we don't care about + * streaming - it overweights any possible seek */ + if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req) + return; + -+ size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; -+ size = size << bsbits; -+ if (size < i_size_read(ac->ac_inode)) -+ size = i_size_read(ac->ac_inode); -+ size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits; -+ -+ /* don't use group allocation for large files */ -+ if (size >= sbi->s_mb_large_req) -+ return; -+ BUG_ON(ac->ac_lg != NULL); /* * locality group prealloc space are per cpu. The reason for having +Index: linux-2.6.27.21-0.1/fs/ext4/inode.c +=================================================================== +--- linux-2.6.27.21-0.1.orig/fs/ext4/inode.c 2009-05-28 11:12:42.000000000 +0530 ++++ linux-2.6.27.21-0.1/fs/ext4/inode.c 2009-05-28 11:16:48.000000000 +0530 +@@ -2442,14 +2442,14 @@ + return -EROFS; + + /* +- * Make sure nr_to_write is >= sbi->s_mb_stream_request ++ * Make sure nr_to_write is >= sbi->s_mb_small_req + * This make sure small files blocks are allocated in + * single attempt. This ensure that small files + * get less fragmented. + */ +- if (wbc->nr_to_write < sbi->s_mb_stream_request) { +- nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; +- wbc->nr_to_write = sbi->s_mb_stream_request; ++ if (wbc->nr_to_write < sbi->s_mb_small_req) { ++ nr_to_writebump = sbi->s_mb_small_req - wbc->nr_to_write; ++ wbc->nr_to_write = sbi->s_mb_small_req; + } + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1;