--- /dev/null
+Subject: [PATCH] ext4: delayed iput
+
+When changing a large xattr value to a different large xattr
+value, the old xattr inode is freed. Truncate during the final iput causes
+current transaction restart. Eventually, parent inode bh is marked dirty and
+kernel panic happens when jbd2 figures out that this bh belongs to the
+committed transaction.
+
+A possible fix is to call this final iput in a separate thread.
+This way, setxattr transactions will never be split into two.
+Since the setxattr code adds xattr inodes with nlink=0 into the
+orphan list, old xattr inodes will be properly cleaned up in
+any case.
+
+Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
+HPE-bug-id: LUS-10534
+---
+ fs/ext4/ext4.h | 7 +++++--
+ fs/ext4/page-io.c | 2 +-
+ fs/ext4/super.c | 15 ++++++++-------
+ fs/ext4/xattr.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 4 files changed, 51 insertions(+), 12 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 0e8a936..b6a5e52 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1664,8 +1664,11 @@ struct ext4_sb_info {
+ struct flex_groups * __rcu *s_flex_groups;
+ ext4_group_t s_flex_groups_allocated;
+
+- /* workqueue for reserved extent conversions (buffered io) */
+- struct workqueue_struct *rsv_conversion_wq;
++ /*
++ * workqueue for reserved extent conversions (buffered io)
++ * and large ea inodes reclaim
++ */
++ struct workqueue_struct *s_misc_wq;
+
+ /* timer for periodic error stats printing */
+ struct timer_list s_err_report;
+diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
+index 417476c..1dbfc16 100644
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -230,7 +230,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
+ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+ WARN_ON(!io_end->handle && sbi->s_journal);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+- wq = sbi->rsv_conversion_wq;
++ wq = sbi->s_misc_wq;
+ if (list_empty(&ei->i_rsv_conversion_list))
+ queue_work(wq, &ei->i_rsv_conversion_work);
+ list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 82a8d57..64b7306 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1205,10 +1205,11 @@ static void ext4_put_super(struct super_block *sb)
+ ext4_unregister_sysfs(sb);
+
+ ext4_unregister_li_request(sb);
++ flush_workqueue(sbi->s_misc_wq);
+ ext4_quota_off_umount(sb);
+
+ flush_work(&sbi->s_error_work);
+- destroy_workqueue(sbi->rsv_conversion_wq);
++ destroy_workqueue(sbi->s_misc_wq);
+ ext4_release_orphan_info(sb);
+
+ if (sbi->s_journal) {
+@@ -5337,9 +5338,9 @@ no_journal:
+ * The maximum number of concurrent works can be high and
+ * concurrency isn't really necessary. Limit it to 1.
+ */
+- EXT4_SB(sb)->rsv_conversion_wq =
+- alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+- if (!EXT4_SB(sb)->rsv_conversion_wq) {
++ EXT4_SB(sb)->s_misc_wq =
++ alloc_workqueue("ext4-misc", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
++ if (!EXT4_SB(sb)->s_misc_wq) {
+ printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
+ ret = -ENOMEM;
+ goto failed_mount4;
+@@ -5553,8 +5554,8 @@ failed_mount4a:
+ sb->s_root = NULL;
+ failed_mount4:
+ ext4_msg(sb, KERN_ERR, "mount failed");
+- if (EXT4_SB(sb)->rsv_conversion_wq)
+- destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
++ if (EXT4_SB(sb)->s_misc_wq)
++ destroy_workqueue(EXT4_SB(sb)->s_misc_wq);
+ failed_mount_wq:
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+@@ -6170,7 +6171,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
+ return 0;
+
+ trace_ext4_sync_fs(sb, wait);
+- flush_workqueue(sbi->rsv_conversion_wq);
++ flush_workqueue(sbi->s_misc_wq);
+ /*
+ * Writeback quota in non-journalled quota case - journalled quota has
+ * no dirty dquots
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 038124c..def6dc7 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1592,6 +1592,36 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+ return 0;
+ }
+
++struct delayed_iput_work {
++ struct work_struct work;
++ struct inode *inode;
++};
++
++static void delayed_iput_fn(struct work_struct *work)
++{
++ struct delayed_iput_work *diwork;
++
++ diwork = container_of(work, struct delayed_iput_work, work);
++ iput(diwork->inode);
++ kfree(diwork);
++}
++
++static void delayed_iput(struct inode *inode, struct delayed_iput_work *work)
++{
++ if (!inode) {
++ kfree(work);
++ return;
++ }
++
++ if (!work) {
++ iput(inode);
++ } else {
++ INIT_WORK(&work->work, delayed_iput_fn);
++ work->inode = inode;
++ queue_work(EXT4_SB(inode->i_sb)->s_misc_wq, &work->work);
++ }
++}
++
+ /*
+ * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
+ * feature is enabled.
+@@ -1609,6 +1639,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ int in_inode = i->in_inode;
+ struct inode *old_ea_inode = NULL;
+ struct inode *new_ea_inode = NULL;
++ struct delayed_iput_work *diwork = NULL;
+ size_t old_size, new_size;
+ int ret;
+
+@@ -1685,7 +1716,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+ * Finish that work before doing any modifications to the xattr data.
+ */
+ if (!s->not_found && here->e_value_inum) {
+- ret = ext4_xattr_inode_iget(inode,
++ diwork = kmalloc(sizeof(*diwork), GFP_NOFS);
++ if (!diwork)
++ ret = -ENOMEM;
++ else
++ ret = ext4_xattr_inode_iget(inode,
+ le32_to_cpu(here->e_value_inum),
+ le32_to_cpu(here->e_hash),
+ &old_ea_inode);
+@@ -1838,7 +1873,7 @@ update_hash:
+
+ ret = 0;
+ out:
+- iput(old_ea_inode);
++ delayed_iput(old_ea_inode, diwork);
+ iput(new_ea_inode);
+ return ret;
+ }
+--
+2.34.1
+