From 0027b134c4db40eb5804dd759d8f0a9a04771c3e Mon Sep 17 00:00:00 2001 From: Jian Yu Date: Mon, 18 Nov 2024 11:42:18 -0800 Subject: [PATCH] LU-18414 kernel: update RHEL 9.4 [5.14.0-427.42.1.el9_4] Update RHEL 9.4 kernel to 5.14.0-427.42.1.el9_4. Test-Parameters: trivial fstype=ldiskfs mdtcount=4 mdscount=2 \ clientdistro=el9.4 serverdistro=el9.3 testlist=sanity Test-Parameters: trivial fstype=zfs mdtcount=4 mdscount=2 \ clientdistro=el9.4 serverdistro=el9.3 testlist=sanity Test-Parameters: trivial fstype=ldiskfs mdtcount=4 mdscount=2 \ clientdistro=el9.3 serverdistro=el9.4 testlist=sanity Test-Parameters: trivial fstype=zfs mdtcount=4 mdscount=2 \ clientdistro=el9.3 serverdistro=el9.4 testlist=sanity Test-Parameters: optional clientdistro=el9.4 serverdistro=el9.4 \ mdscount=2 mdtcount=4 ostcount=8 testgroup=full-dne-part-1 Test-Parameters: optional clientdistro=el9.4 serverdistro=el9.4 \ mdscount=2 mdtcount=4 ostcount=8 testgroup=full-dne-part-2 Test-Parameters: optional clientdistro=el9.4 serverdistro=el9.4 \ mdscount=2 mdtcount=4 ostcount=8 testgroup=full-dne-part-3 Change-Id: Ib1b95bcaf35a9f8ed80fe7a33b51127086dd412c Signed-off-by: Jian Yu Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56845 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Deiter Reviewed-by: Yang Sheng Reviewed-by: Oleg Drokin --- .../patches/rhel9.4/ext4-delayed-iput.patch | 187 +++++++++++++++++++++ .../series/ldiskfs-5.14-rhel9.4.series | 2 +- lustre/ChangeLog | 4 +- .../kernel-5.14.0-5.14-rhel9.4-x86_64.config | 2 +- .../kernel_patches/targets/5.14-rhel9.4.target.in | 2 +- lustre/kernel_patches/which_patch | 2 +- 6 files changed, 193 insertions(+), 6 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/rhel9.4/ext4-delayed-iput.patch diff --git a/ldiskfs/kernel_patches/patches/rhel9.4/ext4-delayed-iput.patch b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-delayed-iput.patch new file mode 100644 index 0000000..aa8412c --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-delayed-iput.patch @@ -0,0 +1,187 @@ +commit e239a14001b62d96c186ae2c9f58402f73e63dcc +Author: Andrew Perepechko +AuthorDate: Mon Jan 31 19:55:31 2022 +0300 +LU-15404 ldiskfs: truncate during setxattr leads to kernel panic + +When changing a large xattr value to a different large xattr value, +the old xattr inode is freed. Truncate during the final iput causes +current transaction restart. Eventually, parent inode bh is marked +dirty and kernel panic happens when jbd2 figures out that this bh +belongs to the committed transaction. + +A possible fix is to call this final iput in a separate thread. +This way, setxattr transactions will never be split into two. +Since the setxattr code adds xattr inodes with nlink=0 into the +orphan list, old xattr inodes will be properly cleaned up in +any case. + +Change-Id: Idd70befa6a83818ece06daccf9bb6256812674b9 +Signed-off-by: Andrew Perepechko +HPE-bug-id: LUS-10534 + +Changes since v1: +- fixed a bug added during the porting +- fixed a workqueue related deadlock reported by Tetsuo Handa + +Reviewed-on: https://review.whamcloud.com/46358 +Reviewed-by: Andreas Dilger +Reviewed-by: Alexander Zarochentsev +--- + fs/ext4/ext4.h | 7 +++++-- + fs/ext4/page-io.c | 2 +- + fs/ext4/super.c | 15 ++++++++------- + fs/ext4/xattr.c | 39 +++++++++++++++++++++++++++++++++++++-- + 4 files changed, 51 insertions(+), 12 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 0e8a936..b6a5e52 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1664,8 +1664,11 @@ struct ext4_sb_info { + struct flex_groups * __rcu *s_flex_groups; + ext4_group_t s_flex_groups_allocated; + +- /* workqueue for reserved extent conversions (buffered io) */ +- struct workqueue_struct *rsv_conversion_wq; ++ /* ++ * workqueue for reserved extent conversions (buffered io) ++ * and large ea inodes reclaim ++ */ ++ struct workqueue_struct *s_misc_wq; + + /* timer for periodic error stats printing */ + struct timer_list s_err_report; +diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c +index 417476c..1dbfc16 100644 +--- a/fs/ext4/page-io.c ++++ b/fs/ext4/page-io.c +@@ -230,7 +230,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end) + WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); + WARN_ON(!io_end->handle && sbi->s_journal); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); +- wq = sbi->rsv_conversion_wq; ++ wq = sbi->s_misc_wq; + if (list_empty(&ei->i_rsv_conversion_list)) + queue_work(wq, &ei->i_rsv_conversion_work); + list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 82a8d57..64b7306 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -1205,10 +1205,11 @@ static void ext4_put_super(struct super_block *sb) + ext4_unregister_sysfs(sb); + + ext4_unregister_li_request(sb); ++ flush_workqueue(sbi->s_misc_wq); + ext4_quota_off_umount(sb); + + flush_work(&sbi->s_error_work); +- destroy_workqueue(sbi->rsv_conversion_wq); ++ destroy_workqueue(sbi->s_misc_wq); + ext4_release_orphan_info(sb); + + if (sbi->s_journal) { +@@ -5337,9 +5338,9 @@ no_journal: + * The maximum number of concurrent works can be high and + * concurrency isn't really necessary. Limit it to 1. + */ +- EXT4_SB(sb)->rsv_conversion_wq = +- alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); +- if (!EXT4_SB(sb)->rsv_conversion_wq) { ++ EXT4_SB(sb)->s_misc_wq = ++ alloc_workqueue("ext4-misc", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); ++ if (!EXT4_SB(sb)->s_misc_wq) { + printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); + ret = -ENOMEM; + goto failed_mount4; +@@ -5553,8 +5554,8 @@ failed_mount4a: + sb->s_root = NULL; + failed_mount4: + ext4_msg(sb, KERN_ERR, "mount failed"); +- if (EXT4_SB(sb)->rsv_conversion_wq) +- destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); ++ if (EXT4_SB(sb)->s_misc_wq) ++ destroy_workqueue(EXT4_SB(sb)->s_misc_wq); + failed_mount_wq: + ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); + sbi->s_ea_inode_cache = NULL; +@@ -6170,7 +6171,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) + return 0; + + trace_ext4_sync_fs(sb, wait); +- flush_workqueue(sbi->rsv_conversion_wq); ++ flush_workqueue(sbi->s_misc_wq); + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 038124c..def6dc7 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1659,6 +1659,36 @@ out_err: + return ERR_PTR(err); + } + ++struct delayed_iput_work { ++ struct work_struct work; ++ struct inode *inode; ++}; ++ ++static void delayed_iput_fn(struct work_struct *work) ++{ ++ struct delayed_iput_work *diwork; ++ ++ diwork = container_of(work, struct delayed_iput_work, work); ++ iput(diwork->inode); ++ kfree(diwork); ++} ++ ++static void delayed_iput(struct inode *inode, struct delayed_iput_work *work) ++{ ++ if (!inode) { ++ kfree(work); ++ return; ++ } ++ ++ if (!work) { ++ iput(inode); ++ } else { ++ INIT_WORK(&work->work, delayed_iput_fn); ++ work->inode = inode; ++ queue_work(EXT4_SB(inode->i_sb)->s_misc_wq, &work->work); ++ } ++} ++ + /* + * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode + * feature is enabled. +@@ -1676,6 +1706,7 @@ static int ext4_xattr_set_entry(struct e + size_t min_offs = s->end - s->base, name_len = strlen(i->name); + int in_inode = i->in_inode; + struct inode *old_ea_inode = NULL; ++ struct delayed_iput_work *diwork = NULL; + size_t old_size, new_size; + int ret; + +@@ -1752,7 +1783,11 @@ static int ext4_xattr_set_entry(struct e + * Finish that work before doing any modifications to the xattr data. + */ + if (!s->not_found && here->e_value_inum) { +- ret = ext4_xattr_inode_iget(inode, ++ diwork = kmalloc(sizeof(*diwork), GFP_NOFS); ++ if (!diwork) ++ ret = -ENOMEM; ++ else ++ ret = ext4_xattr_inode_iget(inode, + le32_to_cpu(here->e_value_inum), + le32_to_cpu(here->e_hash), + &old_ea_inode); +@@ -1873,7 +1908,7 @@ update_hash: + + ret = 0; + out: +- iput(old_ea_inode); ++ delayed_iput(old_ea_inode, diwork); + return ret; + } + diff --git a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series index ce45357..a9d2003 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series @@ -28,7 +28,7 @@ rhel9/ext4-dquot-commit-speedup.patch rhel9.4/ext4-introduce-EXT4_BG_TRIMMED-to-optimize-fstrim.patch linux-5.14/ext4-ialloc-uid-gid-and-pass-owner-down.patch linux-5.14/ext4-projid-xattrs.patch -rhel9.1/ext4-delayed-iput.patch +rhel9.4/ext4-delayed-iput.patch rhel8/ext4-ext-merge.patch linux-5.14/ext4-xattr-disable-credits-check.patch rhel9.2/ext4-fiemap-kernel-data.patch diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 5ffacb3..03c69e1 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -3,7 +3,7 @@ TBD Whamcloud * See https://wiki.whamcloud.com/display/PUB/Lustre+Support+Matrix for currently supported client and server kernel versions. * Server primary kernels built and tested during release cycle: - 5.14.0-427.31.1.el9 (RHEL9.4) + 5.14.0-427.42.1.el9 (RHEL9.4) 5.14.0-362.24.1.el9 (RHEL9.3) 4.18.0-553.16.1.el8 (RHEL8.10) 4.18.0-513.24.1.el8 (RHEL8.9) @@ -43,7 +43,7 @@ TBD Whamcloud * ldiskfs needs an ldiskfs patch series for that kernel, ZFS does not * Client primary kernels built and tested during release cycle: 5.14.0-503.14.1.el9 (RHEL9.5) - 5.14.0-427.31.1.el9 (RHEL9.4) + 5.14.0-427.42.1.el9 (RHEL9.4) 5.14.0-362.24.1.el9 (RHEL9.3) 4.18.0-553.16.1.el8 (RHEL8.10) 4.18.0-513.24.1.el8 (RHEL8.9) diff --git a/lustre/kernel_patches/kernel_configs/kernel-5.14.0-5.14-rhel9.4-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-5.14.0-5.14-rhel9.4-x86_64.config index 89c7b37..a87e1ec 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-5.14.0-5.14-rhel9.4-x86_64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-5.14.0-5.14-rhel9.4-x86_64.config @@ -32,7 +32,7 @@ CONFIG_WERROR=y CONFIG_UAPI_HEADER_TEST=y CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_BUILD_SALT="5.14.0-427.31.1.el9_4.x86_64" +CONFIG_BUILD_SALT="5.14.0-427.42.1.el9_4.x86_64" CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y CONFIG_HAVE_KERNEL_LZMA=y diff --git a/lustre/kernel_patches/targets/5.14-rhel9.4.target.in b/lustre/kernel_patches/targets/5.14-rhel9.4.target.in index c509ac6..b320031 100644 --- a/lustre/kernel_patches/targets/5.14-rhel9.4.target.in +++ b/lustre/kernel_patches/targets/5.14-rhel9.4.target.in @@ -1,5 +1,5 @@ lnxmaj="5.14.0" -lnxrel="427.31.1.el9_4" +lnxrel="427.42.1.el9_4" KERNEL_SRPM=kernel-${lnxmaj}-${lnxrel}.src.rpm SERIES=5.14-rhel9.4.series diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index 83354d5..037db16 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -24,6 +24,6 @@ PATCH SERIES FOR SERVER KERNELS: 5.14-rhel9.1.series 5.14.0-162.23.1.el9 (RHEL 9.1) 5.14-rhel9.2.series 5.14.0-284.30.1.el9 (RHEL 9.2) 5.14-rhel9.3.series 5.14.0-362.24.1.el9 (RHEL 9.3) -5.14-rhel9.4.series 5.14.0-427.31.1.el9 (RHEL 9.4) +5.14-rhel9.4.series 5.14.0-427.42.1.el9 (RHEL 9.4) See lustre/ChangeLog for supported client kernel versions. -- 1.8.3.1