1 commit e239a14001b62d96c186ae2c9f58402f73e63dcc
2 Author: Andrew Perepechko <andrew.perepechko@hpe.com>
3 AuthorDate: Mon Jan 31 19:55:31 2022 +0300
4 LU-15404 ldiskfs: truncate during setxattr leads to kernel panic
6 When changing a large xattr value to a different large xattr value,
7 the old xattr inode is freed. Truncate during the final iput causes
8 current transaction restart. Eventually, parent inode bh is marked
9 dirty and kernel panic happens when jbd2 figures out that this bh
10 belongs to the committed transaction.
12 A possible fix is to call this final iput in a separate thread.
13 This way, setxattr transactions will never be split into two.
14 Since the setxattr code adds xattr inodes with nlink=0 into the
15 orphan list, old xattr inodes will be properly cleaned up in
18 Change-Id: Idd70befa6a83818ece06daccf9bb6256812674b9
19 Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
23 - fixed a bug added during the porting
24 - fixed a workqueue related deadlock reported by Tetsuo Handa
26 Reviewed-on: https://review.whamcloud.com/46358
27 Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
28 Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
30 fs/ext4/ext4.h | 7 +++++--
31 fs/ext4/page-io.c | 2 +-
32 fs/ext4/super.c | 15 ++++++++-------
33 fs/ext4/xattr.c | 39 +++++++++++++++++++++++++++++++++++++--
34 4 files changed, 51 insertions(+), 12 deletions(-)
36 Index: linux-stage/fs/ext4/ext4.h
37 ===================================================================
38 --- linux-stage.orig/fs/ext4/ext4.h
39 +++ linux-stage/fs/ext4/ext4.h
40 @@ -1464,8 +1464,11 @@ struct ext4_sb_info {
41 struct flex_groups *s_flex_groups;
42 ext4_group_t s_flex_groups_allocated;
44 - /* workqueue for reserved extent conversions (buffered io) */
45 - struct workqueue_struct *rsv_conversion_wq;
47 + * workqueue for reserved extent conversions (buffered io)
48 + * and large ea inodes reclaim
50 + struct workqueue_struct *s_misc_wq;
52 /* timer for periodic error stats printing */
53 struct timer_list s_err_report;
54 Index: linux-stage/fs/ext4/page-io.c
55 ===================================================================
56 --- linux-stage.orig/fs/ext4/page-io.c
57 +++ linux-stage/fs/ext4/page-io.c
58 @@ -191,7 +191,7 @@ static void ext4_add_complete_io(ext4_io
59 WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
60 WARN_ON(!io_end->handle);
61 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
62 - wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq;
63 + wq = EXT4_SB(io_end->inode->i_sb)->s_misc_wq;
64 if (list_empty(&ei->i_rsv_conversion_list))
65 queue_work(wq, &ei->i_rsv_conversion_work);
66 list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
67 Index: linux-stage/fs/ext4/super.c
68 ===================================================================
69 --- linux-stage.orig/fs/ext4/super.c
70 +++ linux-stage/fs/ext4/super.c
71 @@ -849,10 +849,10 @@ static void ext4_put_super(struct super_
74 ext4_unregister_li_request(sb);
75 + flush_workqueue(sbi->s_misc_wq);
76 ext4_quota_off_umount(sb);
78 - flush_workqueue(sbi->rsv_conversion_wq);
79 - destroy_workqueue(sbi->rsv_conversion_wq);
80 + destroy_workqueue(sbi->s_misc_wq);
83 aborted = is_journal_aborted(sbi->s_journal);
84 @@ -4479,9 +4479,9 @@ no_journal:
85 * The maximum number of concurrent works can be high and
86 * concurrency isn't really necessary. Limit it to 1.
88 - EXT4_SB(sb)->rsv_conversion_wq =
89 - alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
90 - if (!EXT4_SB(sb)->rsv_conversion_wq) {
91 + EXT4_SB(sb)->s_misc_wq =
92 + alloc_workqueue("ext4-misc", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
93 + if (!EXT4_SB(sb)->s_misc_wq) {
94 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
97 @@ -4665,8 +4665,8 @@ failed_mount4a:
100 ext4_msg(sb, KERN_ERR, "mount failed");
101 - if (EXT4_SB(sb)->rsv_conversion_wq)
102 - destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
103 + if (EXT4_SB(sb)->s_misc_wq)
104 + destroy_workqueue(EXT4_SB(sb)->s_misc_wq);
106 if (sbi->s_journal) {
107 jbd2_journal_destroy(sbi->s_journal);
108 @@ -5130,7 +5130,7 @@ static int ext4_sync_fs(struct super_blo
109 struct ext4_sb_info *sbi = EXT4_SB(sb);
111 trace_ext4_sync_fs(sb, wait);
112 - flush_workqueue(sbi->rsv_conversion_wq);
113 + flush_workqueue(sbi->s_misc_wq);
115 * Writeback quota in non-journalled quota case - journalled quota has
117 @@ -5165,7 +5165,7 @@ static int ext4_sync_fs_nojournal(struct
120 trace_ext4_sync_fs(sb, wait);
121 - flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
122 + flush_workqueue(EXT4_SB(sb)->s_misc_wq);
123 dquot_writeback_dquots(sb, -1);
124 if (wait && test_opt(sb, BARRIER))
125 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
126 Index: linux-stage/fs/ext4/xattr.c
127 ===================================================================
128 --- linux-stage.orig/fs/ext4/xattr.c
129 +++ linux-stage/fs/ext4/xattr.c
130 @@ -929,6 +929,36 @@ ext4_xattr_inode_create(handle_t *handle
134 +struct delayed_iput_work {
135 + struct work_struct work;
136 + struct inode *inode;
139 +static void delayed_iput_fn(struct work_struct *work)
141 + struct delayed_iput_work *diwork;
143 + diwork = container_of(work, struct delayed_iput_work, work);
144 + iput(diwork->inode);
148 +static void delayed_iput(struct inode *inode, struct delayed_iput_work *work)
158 + INIT_WORK(&work->work, delayed_iput_fn);
159 + work->inode = inode;
160 + queue_work(EXT4_SB(inode->i_sb)->s_misc_wq, &work->work);
165 * Unlink the inode storing the value of the EA.
167 @@ -936,14 +966,16 @@ int
168 ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
170 struct inode *ea_inode = NULL;
171 + struct delayed_iput_work *diwork = NULL;
174 + diwork = kmalloc(sizeof(*diwork), GFP_NOFS);
175 ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
179 clear_nlink(ea_inode);
181 + delayed_iput(ea_inode, diwork);