From: Wang Shilong Date: Wed, 4 Mar 2020 02:34:46 +0000 (+0800) Subject: EX-852 ldiskfs: adjust default block allocation threshold X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=95cff643c1ef19969a7db8b388d3b36848b69745;p=fs%2Flustre-release.git EX-852 ldiskfs: adjust default block allocation threshold Use default 25% 15% might be too aggressive, especially with DDN OST will be 1PB soon, let's adjust it to 15% and 10%, also limit the max space to 15TB,10TB Change-Id: I472160fc37fa8d119f13084f00bb22ea55c5ca18 Signed-off-by: Wang Shilong Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/41835 Tested-by: jenkins Tested-by: Maloo --- diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-simple-blockalloc.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-simple-blockalloc.patch index f7a11a6..b44d58b 100644 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-simple-blockalloc.patch +++ b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-simple-blockalloc.patch @@ -1,8 +1,32 @@ -Index: linux-stage/fs/ext4/mballoc.c -=================================================================== ---- linux-stage.orig/fs/ext4/mballoc.c -+++ linux-stage/fs/ext4/mballoc.c -@@ -2078,6 +2078,21 @@ static int ext4_mb_good_group(struct ext +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index caf007ba..5a61cec7 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1407,6 +1407,9 @@ struct ext4_sb_info { + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; ++ ext4_fsblk_t s_mb_c1_blocks; ++ ext4_fsblk_t s_mb_c2_blocks; ++ ext4_fsblk_t s_mb_c3_blocks; + unsigned long *s_mb_prealloc_table; + unsigned long s_mb_prealloc_table_size; + unsigned int s_mb_group_prealloc; +@@ -1424,6 +1427,9 @@ struct ext4_sb_info { + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ ++ /* cX loop didn't find blocks */ ++ atomic64_t s_bal_cX_failed[3]; ++ atomic64_t s_bal_cX_skipped[3]; + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index e4a00c78..14a4fb25 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2098,6 +2098,21 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, return 0; } @@ -24,7 +48,7 @@ Index: linux-stage/fs/ext4/mballoc.c static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { -@@ -2087,6 +2102,7 @@ ext4_mb_regular_allocator(struct ext4_al +@@ -2107,6 +2122,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; @@ -32,7 +56,7 @@ Index: linux-stage/fs/ext4/mballoc.c sb = ac->ac_sb; sbi = EXT4_SB(sb); -@@ -2136,6 +2152,21 @@ ext4_mb_regular_allocator(struct ext4_al +@@ -2156,6 +2172,21 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) /* Let's just scan groups to find more-less suitable blocks */ cr = ac->ac_2order ? 0 : 1; @@ -54,7 +78,7 @@ Index: linux-stage/fs/ext4/mballoc.c /* * cr == 0 try to get exact allocation, * cr == 3 try to get anything -@@ -2193,6 +2224,9 @@ repeat: +@@ -2213,6 +2244,9 @@ repeat: if (ac->ac_status != AC_STATUS_CONTINUE) break; } @@ -64,7 +88,7 @@ Index: linux-stage/fs/ext4/mballoc.c } if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND && -@@ -2316,6 +2350,93 @@ static const struct seq_operations ext4_ +@@ -2336,6 +2370,93 @@ static const struct seq_operations ext4_mb_seq_groups_ops = { .show = ext4_mb_seq_groups_show, }; @@ -157,8 +181,8 @@ Index: linux-stage/fs/ext4/mballoc.c + #define EXT4_MB_PREALLOC_TABLE "prealloc_table" - static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi, -@@ -2730,6 +2851,7 @@ static int ext4_groupinfo_create_slab(si + static ssize_t ext4_mb_prealloc_table_proc_write(struct file *file, +@@ -2738,6 +2859,7 @@ static int ext4_groupinfo_create_slab(size_t size) return 0; } @@ -166,17 +190,23 @@ Index: linux-stage/fs/ext4/mballoc.c int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -2781,6 +2903,9 @@ int ext4_mb_init(struct super_block *sb) +@@ -2789,6 +2911,15 @@ int ext4_mb_init(struct super_block *sb) sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; sbi->s_mb_stats = MB_DEFAULT_STATS; sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; -+ sbi->s_mb_c1_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD); -+ sbi->s_mb_c2_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD); -+ sbi->s_mb_c3_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD); ++ sbi->s_mb_c1_blocks = ++ min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD), ++ MB_DEFAULT_MAX_C1_BYTES >> sb->s_blocksize_bits); ++ sbi->s_mb_c2_blocks = ++ min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD), ++ MB_DEFAULT_MAX_C2_BYTES >> sb->s_blocksize_bits); ++ sbi->s_mb_c3_blocks = ++ min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD), ++ MB_DEFAULT_MAX_C3_BYTES >> sb->s_blocksize_bits); /* * The default group preallocation is 512, which for 4k block * sizes translates to 2 megabytes. However for bigalloc file -@@ -2853,6 +2978,8 @@ int ext4_mb_init(struct super_block *sb) +@@ -2879,6 +3010,8 @@ int ext4_mb_init(struct super_block *sb) proc_create_data(EXT4_MB_PREALLOC_TABLE, S_IFREG | S_IRUGO | S_IWUSR, sbi->s_proc, &ext4_mb_prealloc_seq_fops, sb); @@ -185,7 +215,7 @@ Index: linux-stage/fs/ext4/mballoc.c proc_create_data("mb_last_group", S_IFREG | S_IRUGO | S_IWUSR, sbi->s_proc, &ext4_mb_seq_last_group_fops, sb); -@@ -2906,6 +3033,7 @@ int ext4_mb_release(struct super_block * +@@ -2932,6 +3065,7 @@ int ext4_mb_release(struct super_block *sb) remove_proc_entry("mb_last_group", sbi->s_proc); remove_proc_entry("mb_last_start", sbi->s_proc); remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc); @@ -193,10 +223,11 @@ Index: linux-stage/fs/ext4/mballoc.c } if (sbi->s_group_info) { -@@ -2936,6 +3064,16 @@ int ext4_mb_release(struct super_block * +@@ -2961,6 +3095,16 @@ int ext4_mb_release(struct super_block *sb) + atomic_read(&sbi->s_bal_allocated), atomic_read(&sbi->s_bal_reqs), atomic_read(&sbi->s_bal_success)); - ext4_msg(sb, KERN_INFO, ++ ext4_msg(sb, KERN_INFO, + "mballoc: (%lu, %lu, %lu) useless c(0,1,2) loops", + atomic64_read(&sbi->s_bal_cX_failed[0]), + atomic64_read(&sbi->s_bal_cX_failed[1]), @@ -206,39 +237,31 @@ Index: linux-stage/fs/ext4/mballoc.c + atomic64_read(&sbi->s_bal_cX_skipped[0]), + atomic64_read(&sbi->s_bal_cX_skipped[1]), + atomic64_read(&sbi->s_bal_cX_skipped[2])); -+ ext4_msg(sb, KERN_INFO, + ext4_msg(sb, KERN_INFO, "mballoc: %u extents scanned, %u goal hits, " "%u 2^N hits, %u breaks, %u lost", - atomic_read(&sbi->s_bal_ex_scanned), -Index: linux-stage/fs/ext4/ext4.h -=================================================================== ---- linux-stage.orig/fs/ext4/ext4.h -+++ linux-stage/fs/ext4/ext4.h -@@ -1409,6 +1409,9 @@ struct ext4_sb_info { - unsigned int s_mb_min_to_scan; - unsigned int s_mb_stats; - unsigned int s_mb_order2_reqs; -+ ext4_fsblk_t s_mb_c1_blocks; -+ ext4_fsblk_t s_mb_c2_blocks; -+ ext4_fsblk_t s_mb_c3_blocks; - unsigned long *s_mb_prealloc_table; - unsigned int s_mb_group_prealloc; - unsigned int s_max_dir_size_kb; -@@ -1425,6 +1428,9 @@ struct ext4_sb_info { - atomic_t s_bal_goals; /* goal hits */ - atomic_t s_bal_breaks; /* too long searches */ - atomic_t s_bal_2orders; /* 2^order hits */ -+ /* cX loop didn't find blocks */ -+ atomic64_t s_bal_cX_failed[3]; -+ atomic64_t s_bal_cX_skipped[3]; - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; -Index: linux-stage/fs/ext4/super.c -=================================================================== ---- linux-stage.orig/fs/ext4/super.c -+++ linux-stage/fs/ext4/super.c -@@ -2734,6 +2734,73 @@ static ssize_t sbi_deprecated_show(struc +diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h +index 81d56d0b..3ee74eac 100644 +--- a/fs/ext4/mballoc.h ++++ b/fs/ext4/mballoc.h +@@ -84,6 +84,12 @@ extern ushort ext4_mballoc_debug; + * for which requests use 2^N search using buddies + */ + #define MB_DEFAULT_ORDER2_REQS 8 ++#define MB_DEFAULT_C1_THRESHOLD 15 ++#define MB_DEFAULT_C2_THRESHOLD 10 ++#define MB_DEFAULT_C3_THRESHOLD 5 ++#define MB_DEFAULT_MAX_C1_BYTES (15ULL << 40) /* 15TB */ ++#define MB_DEFAULT_MAX_C2_BYTES (10ULL << 40) ++#define MB_DEFAULT_MAX_C3_BYTES (5ULL << 40) + + /* + * default group prealloc size 512 blocks +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index ca8b50c8..b492245b 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2746,6 +2746,73 @@ static ssize_t sbi_deprecated_show(struct ext4_attr *a, return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); } @@ -312,7 +335,7 @@ Index: linux-stage/fs/ext4/super.c #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ -@@ -2790,6 +2857,9 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats +@@ -2802,6 +2869,9 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); @@ -322,7 +345,7 @@ Index: linux-stage/fs/ext4/super.c EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req); EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -@@ -2820,6 +2890,9 @@ static struct attribute *ext4_attrs[] = +@@ -2833,6 +2903,9 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_max_to_scan), ATTR_LIST(mb_min_to_scan), ATTR_LIST(mb_order2_req), @@ -332,17 +355,3 @@ Index: linux-stage/fs/ext4/super.c ATTR_LIST(mb_small_req), ATTR_LIST(mb_large_req), ATTR_LIST(mb_group_prealloc), -Index: linux-stage/fs/ext4/mballoc.h -=================================================================== ---- linux-stage.orig/fs/ext4/mballoc.h -+++ linux-stage/fs/ext4/mballoc.h -@@ -84,6 +84,9 @@ extern ushort ext4_mballoc_debug; - * for which requests use 2^N search using buddies - */ - #define MB_DEFAULT_ORDER2_REQS 8 -+#define MB_DEFAULT_C1_THRESHOLD 25 -+#define MB_DEFAULT_C2_THRESHOLD 15 -+#define MB_DEFAULT_C3_THRESHOLD 5 - - /* - * default group prealloc size 512 blocks