From 1363670d0125d1962a40643f784fccc943cf47d8 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Wed, 15 May 2024 10:22:33 -0400 Subject: [PATCH] LU-15781 ldiskfs: support 5.15.0-106+ ubuntu kernels Starting with 5.15.0-106 kernels the ext4-prealloc patch no long applies. Update ext4-prealloc.patch so it can build again. Test-Parameters: trivial Change-Id: I958c64842c5e1dc8b974e8a188fa18541d458ab5 Signed-off-by: James Simmons Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55078 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Shaun Tancheff Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- config/lustre-build-ldiskfs.m4 | 12 +- .../patches/ubuntu22.04.4/ext4-prealloc.patch | 426 +++++++++++++++++++++ .../series/ldiskfs-5.15.0-106-ubuntu20.series | 38 ++ 3 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 ldiskfs/kernel_patches/patches/ubuntu22.04.4/ext4-prealloc.patch create mode 100644 ldiskfs/kernel_patches/series/ldiskfs-5.15.0-106-ubuntu20.series diff --git a/config/lustre-build-ldiskfs.m4 b/config/lustre-build-ldiskfs.m4 index 3f7d370..d35c936 100644 --- a/config/lustre-build-ldiskfs.m4 +++ b/config/lustre-build-ldiskfs.m4 @@ -150,7 +150,17 @@ AS_IF([test x$RHEL_KERNEL = xyes], [ [LDISKFS_SERIES="5.8.0-ml.series"])], [LDISKFS_SERIES="5.11.0-40-ubuntu20.series"], [LDISKFS_SERIES="5.11.0-40-ubuntu20.series"])], - [LDISKFS_SERIES="5.15.0-83-ubuntu20.series"], + [ + PLEV=$(echo $LINUXRELEASE | cut -d'-' -f2) + AS_IF( + [test -z "$KPLEV"], [ + AC_MSG_WARN([Failed to determine Kernel patch level. Assume latest.]) + LDISKFS_SERIES="5.15.0-106-ubuntu20.series" + ], + [test $KPLEV -ge 106], [LDISKFS_SERIES="5.15.0-106-ubuntu20.series"], + [LDISKFS_SERIES="5.15.0-83-ubuntu20.series"] + ) + ], [LDISKFS_SERIES="5.15.0-83-ubuntu20.series"])], [LDISKFS_SERIES="5.19.0-35-ubuntu.series"], [LDISKFS_SERIES="5.19.0-35-ubuntu.series"]) diff --git a/ldiskfs/kernel_patches/patches/ubuntu22.04.4/ext4-prealloc.patch b/ldiskfs/kernel_patches/patches/ubuntu22.04.4/ext4-prealloc.patch new file mode 100644 index 0000000..12926ba --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ubuntu22.04.4/ext4-prealloc.patch @@ -0,0 +1,426 @@ +commit d8d8fd9192a54c7b8caef8cca9b7a1eb5e5e3298 +Author: Alex Zhuravlev +AuthorDate: Thu Oct 23 10:02:19 2008 +0000 + +Subject: ext4: support for tunable preallocation window +Add support for tunable preallocation window and new tunables +for large/small requests. + +Bugzilla-ID: b=12800 +Signed-off-by: Alex Zhuravlev +Reviewed-by: Kalpak Shah +Reviewed-by: Andreas Dilger +--- + fs/ext4/ext4.h | 7 +- + fs/ext4/inode.c | 3 + + fs/ext4/mballoc.c | 220 +++++++++++++++++++++++++++++++++++----------- + fs/ext4/sysfs.c | 8 +- + 4 files changed, 182 insertions(+), 56 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index bdd72d46..7168e4e4 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1290,6 +1290,8 @@ extern void ext4_set_bits(void *bm, int cur, int len); + #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ + #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + ++#define EXT4_MAX_PREALLOC_TABLE 64 ++ + /* + * Behaviour when detecting errors + */ +@@ -1594,11 +1596,13 @@ struct ext4_sb_info { + /* tunables */ + unsigned long s_stripe; + unsigned int s_mb_max_linear_groups; +- unsigned int s_mb_stream_request; ++ unsigned long s_mb_small_req; ++ unsigned long s_mb_large_req; + unsigned int s_mb_max_to_scan; + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; ++ unsigned long *s_mb_prealloc_table; + unsigned int s_mb_group_prealloc; + unsigned int s_mb_max_inode_prealloc; + unsigned int s_max_dir_size_kb; +@@ -2939,6 +2943,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, + int len, int replay); + + /* mballoc.c */ ++extern const struct proc_ops ext4_seq_prealloc_table_fops; + extern const struct seq_operations ext4_mb_seq_groups_ops; + extern const struct seq_operations ext4_mb_seq_structs_summary_ops; + extern long ext4_mb_stats; +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 8fa8757e..bc7bcbc0 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2743,6 +2743,9 @@ static int ext4_writepages(struct address_space *mapping, + PAGE_SIZE >> inode->i_blkbits); + } + ++ if (wbc->nr_to_write < sbi->s_mb_small_req) ++ wbc->nr_to_write = sbi->s_mb_small_req; ++ + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index e8f5f05b..e1e3da73 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3091,6 +3091,99 @@ const struct seq_operations ext4_mb_seq_structs_summary_ops = { + .show = ext4_mb_seq_structs_summary_show, + }; + ++static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi, ++ char *str, size_t cnt, ++ int update) ++{ ++ unsigned long value; ++ unsigned long prev = 0; ++ char *cur; ++ char *next; ++ char *end; ++ int num = 0; ++ ++ cur = str; ++ end = str + cnt; ++ while (cur < end) { ++ while ((cur < end) && (*cur == ' ')) cur++; ++ value = simple_strtol(cur, &next, 0); ++ if (value == 0) ++ break; ++ if (cur == next) ++ return -EINVAL; ++ ++ cur = next; ++ ++ if (value > (sbi->s_blocks_per_group - 1 - 1 - sbi->s_itb_per_group)) ++ return -EINVAL; ++ ++ /* they should add values in order */ ++ if (value <= prev) ++ return -EINVAL; ++ ++ if (update) ++ sbi->s_mb_prealloc_table[num] = value; ++ ++ prev = value; ++ num++; ++ } ++ ++ if (num > EXT4_MAX_PREALLOC_TABLE - 1) ++ return -EOVERFLOW; ++ ++ if (update) ++ sbi->s_mb_prealloc_table[num] = 0; ++ ++ return 0; ++} ++ ++static ssize_t ext4_mb_prealloc_table_proc_write(struct file *file, ++ const char __user *buf, ++ size_t cnt, loff_t *pos) ++{ ++ struct ext4_sb_info *sbi = EXT4_SB(pde_data(file_inode(file))); ++ char str[128]; ++ int rc; ++ ++ if (cnt >= sizeof(str)) ++ return -EINVAL; ++ if (copy_from_user(str, buf, cnt)) ++ return -EFAULT; ++ ++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 0); ++ if (rc) ++ return rc; ++ ++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 1); ++ return rc ? rc : cnt; ++} ++ ++static int mb_prealloc_table_seq_show(struct seq_file *m, void *v) ++{ ++ struct ext4_sb_info *sbi = EXT4_SB(m->private); ++ int i; ++ ++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE && ++ sbi->s_mb_prealloc_table[i] != 0; i++) ++ seq_printf(m, "%ld ", sbi->s_mb_prealloc_table[i]); ++ seq_printf(m, "\n"); ++ ++ return 0; ++} ++ ++static int mb_prealloc_table_seq_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, mb_prealloc_table_seq_show, pde_data(inode)); ++} ++ ++const struct proc_ops ext4_seq_prealloc_table_fops = { ++ .proc_open = mb_prealloc_table_seq_open, ++ .proc_read = seq_read, ++ .proc_lseek = seq_lseek, ++ .proc_release = single_release, ++ .proc_write = ext4_mb_prealloc_table_proc_write, ++}; ++ + static struct kmem_cache *get_groupinfo_cache(int blocksize_bits) + { + int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE; +@@ -3407,7 +3500,7 @@ static void ext4_discard_work(struct work_struct *work) + int ext4_mb_init(struct super_block *sb) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); +- unsigned i, j; ++ unsigned i, j, k, l; + unsigned offset, offset_incr; + unsigned max; + int ret; +@@ -3479,7 +3572,6 @@ int ext4_mb_init(struct super_block *sb) + sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; + sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; + sbi->s_mb_stats = MB_DEFAULT_STATS; +- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; + sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; + sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC; + /* +@@ -3504,9 +3596,29 @@ int ext4_mb_init(struct super_block *sb) + * RAID stripe size so that preallocations don't fragment + * the stripes. + */ +- if (sbi->s_stripe > 1) { +- sbi->s_mb_group_prealloc = roundup( +- sbi->s_mb_group_prealloc, sbi->s_stripe); ++ ++ /* Allocate table once */ ++ sbi->s_mb_prealloc_table = kzalloc( ++ EXT4_MAX_PREALLOC_TABLE * sizeof(unsigned long), GFP_NOFS); ++ if (sbi->s_mb_prealloc_table == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ if (sbi->s_stripe == 0) { ++ for (k = 0, l = 4; k <= 9; ++k, l *= 2) ++ sbi->s_mb_prealloc_table[k] = l; ++ ++ sbi->s_mb_small_req = 256; ++ sbi->s_mb_large_req = 1024; ++ sbi->s_mb_group_prealloc = 512; ++ } else { ++ for (k = 0, l = sbi->s_stripe; k <= 2; ++k, l *= 2) ++ sbi->s_mb_prealloc_table[k] = l; ++ ++ sbi->s_mb_small_req = sbi->s_stripe; ++ sbi->s_mb_large_req = sbi->s_stripe * 8; ++ sbi->s_mb_group_prealloc = sbi->s_stripe * 4; + } + + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); +@@ -3540,6 +3652,7 @@ out_free_locality_groups: + out: + kfree(sbi->s_mb_largest_free_orders); + kfree(sbi->s_mb_largest_free_orders_locks); ++ kfree(sbi->s_mb_prealloc_table); + kfree(sbi->s_mb_offsets); + sbi->s_mb_offsets = NULL; + kfree(sbi->s_mb_maxs); +@@ -3608,6 +3721,7 @@ + } + kfree(sbi->s_mb_largest_free_orders); + kfree(sbi->s_mb_largest_free_orders_locks); ++ kfree(sbi->s_mb_prealloc_table); + kfree(sbi->s_mb_offsets); + kfree(sbi->s_mb_maxs); + iput(sbi->s_buddy_cache); +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index e8f5f05b..e1e3da73 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3810,7 +3923,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + int err, len; + + BUG_ON(ac->ac_status != AC_STATUS_FOUND); +- BUG_ON(ac->ac_b_ex.fe_len <= 0); + + sb = ac->ac_sb; + sbi = EXT4_SB(sb); +@@ -4046,13 +4158,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, + { + struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); + struct ext4_super_block *es = sbi->s_es; +- int bsbits, max; ++ int bsbits, i, wind; + ext4_lblk_t end; +- loff_t size, start_off; ++ loff_t size; + loff_t orig_size __maybe_unused; + ext4_lblk_t start; + struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); + struct ext4_prealloc_space *pa; ++ unsigned long value, last_non_zero; + + /* do normalize only data requests, metadata requests + do not need preallocation */ +@@ -4081,51 +4194,46 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, + size = size << bsbits; + if (size < i_size_read(ac->ac_inode)) + size = i_size_read(ac->ac_inode); +- orig_size = size; ++ size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits; ++ ++ start = wind = 0; ++ value = last_non_zero = 0; + +- /* max size of free chunks */ +- max = 2 << bsbits; +- +-#define NRL_CHECK_SIZE(req, size, max, chunk_size) \ +- (req <= (size) || max <= (chunk_size)) +- +- /* first, try to predict filesize */ +- /* XXX: should this table be tunable? */ +- start_off = 0; +- if (size <= 16 * 1024) { +- size = 16 * 1024; +- } else if (size <= 32 * 1024) { +- size = 32 * 1024; +- } else if (size <= 64 * 1024) { +- size = 64 * 1024; +- } else if (size <= 128 * 1024) { +- size = 128 * 1024; +- } else if (size <= 256 * 1024) { +- size = 256 * 1024; +- } else if (size <= 512 * 1024) { +- size = 512 * 1024; +- } else if (size <= 1024 * 1024) { +- size = 1024 * 1024; +- } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { +- start_off = ((loff_t)ac->ac_o_ex.fe_logical >> +- (21 - bsbits)) << 21; +- size = 2 * 1024 * 1024; +- } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { +- start_off = ((loff_t)ac->ac_o_ex.fe_logical >> +- (22 - bsbits)) << 22; +- size = 4 * 1024 * 1024; +- } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, +- (8<<20)>>bsbits, max, 8 * 1024)) { +- start_off = ((loff_t)ac->ac_o_ex.fe_logical >> +- (23 - bsbits)) << 23; +- size = 8 * 1024 * 1024; ++ /* let's choose preallocation window depending on file size */ ++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE; i++) { ++ value = sbi->s_mb_prealloc_table[i]; ++ if (value == 0) ++ break; ++ else ++ last_non_zero = value; ++ ++ if (size <= value) { ++ wind = value; ++ break; ++ } ++ } ++ ++ if (wind == 0) { ++ if (last_non_zero != 0) { ++ __u64 tstart, tend; ++ /* file is quite large, we now preallocate with ++ * the biggest configured window with regart to ++ * logical offset */ ++ wind = last_non_zero; ++ tstart = ac->ac_o_ex.fe_logical; ++ do_div(tstart, wind); ++ start = tstart * wind; ++ tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1; ++ do_div(tend, wind); ++ tend = tend * wind + wind; ++ size = tend - start; ++ } + } else { +- start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits; +- size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb), +- ac->ac_o_ex.fe_len) << bsbits; ++ size = wind; + } +- size = size >> bsbits; +- start = start_off >> bsbits; ++ ++ ++ orig_size = size; + + /* + * For tiny groups (smaller than 8MB) the chosen allocation +@@ -4216,7 +4324,6 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, + (unsigned long) ac->ac_o_ex.fe_logical); + BUG(); + } +- BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + + /* now prepare goal request */ + +@@ -5249,8 +5356,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) + inode_pa_eligible = false; + + size = max(size, isize); +- /* Don't use group allocation for large files */ +- if (size > sbi->s_mb_stream_request) ++ if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) || ++ (size >= sbi->s_mb_large_req)) + group_pa_eligible = false; + + if (!group_pa_eligible) { +@@ -5261,6 +5368,13 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) + return; + } + ++ /* ++ * request is so large that we don't care about ++ * streaming - it overweights any possible seek ++ */ ++ if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req) ++ return; ++ + BUG_ON(ac->ac_lg != NULL); + /* + * locality group prealloc space are per cpu. The reason for having +diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c +index aa07b78b..eef2fadb 100644 +--- a/fs/ext4/sysfs.c ++++ b/fs/ext4/sysfs.c +@@ -212,7 +212,8 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); + EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); + EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); +-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); ++EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req); ++EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req); + EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); + EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc); + EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); +@@ -261,7 +262,8 @@ static struct attribute *ext4_attrs[] = { + ATTR_LIST(mb_max_to_scan), + ATTR_LIST(mb_min_to_scan), + ATTR_LIST(mb_order2_req), +- ATTR_LIST(mb_stream_req), ++ ATTR_LIST(mb_small_req), ++ ATTR_LIST(mb_large_req), + ATTR_LIST(mb_group_prealloc), + ATTR_LIST(mb_max_inode_prealloc), + ATTR_LIST(mb_max_linear_groups), +@@ -546,6 +548,8 @@ int ext4_register_sysfs(struct super_block *sb) + ext4_fc_info_show, sb); + proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_groups_ops, sb); ++ proc_create_data("prealloc_table", S_IRUGO, sbi->s_proc, ++ &ext4_seq_prealloc_table_fops, sb); + proc_create_single_data("mb_stats", 0444, sbi->s_proc, + ext4_seq_mb_stats_show, sb); + proc_create_seq_data("mb_structs_summary", 0444, sbi->s_proc, +-- +2.25.1 + diff --git a/ldiskfs/kernel_patches/series/ldiskfs-5.15.0-106-ubuntu20.series b/ldiskfs/kernel_patches/series/ldiskfs-5.15.0-106-ubuntu20.series new file mode 100644 index 0000000..3a1201a --- /dev/null +++ b/ldiskfs/kernel_patches/series/ldiskfs-5.15.0-106-ubuntu20.series @@ -0,0 +1,38 @@ +ubuntu20.04.5/ext4-inode-version.patch +linux-5.4/ext4-lookup-dotdot.patch +linux-5.14/ext4-print-inum-in-htree-warning.patch +ubuntu22.04.4/ext4-prealloc.patch +linux-5.16/ext4-osd-iop-common.patch +linux-5.16/ext4-misc.patch +ubuntu20.04.5/ext4-mballoc-extra-checks.patch +sles15sp4/ext4-hash-indexed-dir-dotdot-update.patch +linux-5.14/ext4-kill-dx-root.patch +linux-5.18/ext4-mballoc-pa-free-mismatch.patch +ubuntu20.04.5/ext4-data-in-dirent.patch +rhel8/ext4-nocmtime.patch +base/ext4-htree-lock.patch +rhel9.2/ext4-pdirop.patch +linux-5.8/ext4-max-dir-size.patch +ubuntu20.04.5/ext4-corrupted-inode-block-bitmaps-handling-patches.patch +linux-5.10/ext4-give-warning-with-dir-htree-growing.patch +ubuntu18/ext4-jcb-optimization.patch +linux-5.10/ext4-attach-jinode-in-writepages.patch +ubuntu20.04.5/ext4-dont-check-before-replay.patch +rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch +rhel7.6/ext4-export-orphan-add.patch +linux-5.18/ext4-export-mb-stream-allocator-variables.patch +ubuntu19/ext4-iget-with-flags.patch +linux-5.14/export-ext4fs-dirhash-helper.patch +linux-5.8/ext4-no-max-dir-size-limit-for-iam-objects.patch +rhel9/ext4-dquot-commit-speedup.patch +linux-5.14/ext4-ialloc-uid-gid-and-pass-owner-down.patch +linux-5.14/ext4-projid-xattrs.patch +rhel9.1/ext4-delayed-iput.patch +rhel8/ext4-ext-merge.patch +linux-5.14/ext4-xattr-disable-credits-check.patch +rhel9.2/ext4-fiemap-kernel-data.patch +rhel8/ext4-old_ea_inodes_handling_fix.patch +ubuntu20.04.5/ext4-filename-encode.patch +rhel9.1/ext4-enc-flag.patch +rhel9.2/ext4-encdata.patch +rhel9/ext4-add-periodic-superblock-update.patch -- 1.8.3.1