+When changing a large xattr value to a different large xattr value,
+the old xattr inode is freed. Truncate during the final iput causes
+current transaction restart. Eventually, parent inode bh is marked
+dirty and kernel panic happens when jbd2 figures out that this bh
+belongs to the committed transaction.
+
+A possible fix is to call this final iput in a separate thread.
+This way, setxattr transactions will never be split into two.
+Since the setxattr code adds xattr inodes with nlink=0 into the
+orphan list, old xattr inodes will be properly cleaned up in
+any case.
+
+Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
+HPE-bug-id: LUS-10534
+
+Changes since v1:
+- fixed a bug added during the porting
+- fixed a workqueue related deadlock reported by Tetsuo Handa
+---
+ fs/ext4/ext4.h | 7 +++++--
+ fs/ext4/page-io.c | 2 +-
+ fs/ext4/super.c | 15 ++++++++-------
+ fs/ext4/xattr.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 4 files changed, 51 insertions(+), 12 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 3f87cca49f0c..52db5d6bae7f 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1650,8 +1650,11 @@ struct ext4_sb_info {
+ struct flex_groups * __rcu *s_flex_groups;
+ ext4_group_t s_flex_groups_allocated;
+
+- /* workqueue for reserved extent conversions (buffered io) */
+- struct workqueue_struct *rsv_conversion_wq;
++ /*
++ * workqueue for reserved extent conversions (buffered io)
++ * and large ea inodes reclaim
++ */
++ struct workqueue_struct *s_misc_wq;
+
+ /* timer for periodic error stats printing */
+ struct timer_list s_err_report;
+diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
+index 495ce59fb4ad..0142b88471ff 100644
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -228,7 +228,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
+ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+ WARN_ON(!io_end->handle && sbi->s_journal);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+- wq = sbi->rsv_conversion_wq;
++ wq = sbi->s_misc_wq;
+ if (list_empty(&ei->i_rsv_conversion_list))
+ queue_work(wq, &ei->i_rsv_conversion_work);
+ list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index 1ac2f2cdc..197dd546d 100644
+index 81749eaddf4c..ee03f593b264 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
-@@ -972,6 +972,8 @@ static void ext4_put_super(struct super_block *sb)
- int aborted = 0;
+@@ -1200,8 +1200,9 @@ static void ext4_put_super(struct super_block *sb)
int i, err;
-+ flush_scheduled_work();
-+
ext4_unregister_li_request(sb);
++ flush_workqueue(sbi->s_misc_wq);
ext4_quota_off_umount(sb);
+- destroy_workqueue(sbi->rsv_conversion_wq);
++ destroy_workqueue(sbi->s_misc_wq);
+
+ /*
+@@ -5294,9 +5295,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+ * The maximum number of concurrent works can be high and
+ * concurrency isn't really necessary. Limit it to 1.
+ */
+- EXT4_SB(sb)->rsv_conversion_wq =
+- alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+- if (!EXT4_SB(sb)->rsv_conversion_wq) {
++ EXT4_SB(sb)->s_misc_wq =
++ alloc_workqueue("ext4-misc", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
++ if (!EXT4_SB(sb)->s_misc_wq) {
+ printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
+ ret = -ENOMEM;
+ goto failed_mount4;
+@@ -5514,8 +5515,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+ sb->s_root = NULL;
+ failed_mount4:
+ ext4_msg(sb, KERN_ERR, "mount failed");
+- if (EXT4_SB(sb)->rsv_conversion_wq)
+- destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
++ if (EXT4_SB(sb)->s_misc_wq)
++ destroy_workqueue(EXT4_SB(sb)->s_misc_wq);
+ failed_mount_wq:
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+@@ -6129,7 +6130,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
+ return 0;
+
+ trace_ext4_sync_fs(sb, wait);
+- flush_workqueue(sbi->rsv_conversion_wq);
++ flush_workqueue(sbi->s_misc_wq);
+ /*
+ * Writeback quota in non-journalled quota case - journalled quota has
+ * no dirty dquots
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
-index 491f9ee40..6a77f12d1 100644
+index 042325349098..ee13675fbead 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
-@@ -1551,6 +1551,31 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
+@@ -1544,6 +1544,36 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
return 0;
}
+
+static void delayed_iput(struct inode *inode, struct delayed_iput_work *work)
+{
++ if (!inode) {
++ kfree(work);
++ return;
++ }
++
+ if (!work) {
+ iput(inode);
+ } else {
+ INIT_WORK(&work->work, delayed_iput_fn);
+ work->inode = inode;
-+ schedule_work(&work->work);
++ queue_work(EXT4_SB(inode->i_sb)->s_misc_wq, &work->work);
+ }
+}
+
/*
* Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
* feature is enabled.
-@@ -1568,6 +1593,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+@@ -1561,6 +1591,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;
-@@ -1644,7 +1670,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+@@ -1637,7 +1668,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
* Finish that work before doing any modifications to the xattr data.
*/
if (!s->not_found && here->e_value_inum) {
le32_to_cpu(here->e_value_inum),
le32_to_cpu(here->e_hash),
&old_ea_inode);
-@@ -1797,7 +1827,7 @@ update_hash:
+@@ -1790,7 +1825,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
ret = 0;
out:
iput(new_ea_inode);
return ret;
}
+--
+2.25.1