Whamcloud - gitweb
LU-9146 ldiskfs: backport a few patches to resolve deadlock
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel7.2 / ext4-remove-i_data_sem-from-xattr.patch
1 From a521100231f816f8cdd9c8e77da14ff1e42c2b17 Mon Sep 17 00:00:00 2001
2 From: Theodore Ts'o <tytso@mit.edu>
3 Date: Thu, 4 Sep 2014 18:06:25 -0400
4 Subject: [PATCH] ext4: pass allocation_request struct to
5  ext4_(alloc,splice)_branch
6
7 Instead of initializing the allocation_request structure in
8 ext4_alloc_branch(), set it up in ext4_ind_map_blocks(), and then pass
9 it to ext4_alloc_branch() and ext4_splice_branch().
10
11 This allows ext4_ind_map_blocks to pass flags in the allocation
12 request structure without having to add Yet Another argument to
13 ext4_alloc_branch().
14
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 Reviewed-by: Jan Kara <jack@suse.cz>
17 ---
18  fs/ext4/indirect.c | 82 +++++++++++++++++++++++++-----------------------------
19  1 file changed, 38 insertions(+), 44 deletions(-)
20
21 diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
22 index e75f840..69af0cd 100644
23 --- a/fs/ext4/indirect.c
24 +++ b/fs/ext4/indirect.c
25 @@ -318,34 +318,22 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
26   *     ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
27   *     as described above and return 0.
28   */
29 -static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
30 -                            ext4_lblk_t iblock, int indirect_blks,
31 -                            int *blks, ext4_fsblk_t goal,
32 -                            ext4_lblk_t *offsets, Indirect *branch)
33 +static int ext4_alloc_branch(handle_t *handle,
34 +                            struct ext4_allocation_request *ar,
35 +                            int indirect_blks, ext4_lblk_t *offsets,
36 +                            Indirect *branch)
37  {
38 -       struct ext4_allocation_request  ar;
39         struct buffer_head *            bh;
40         ext4_fsblk_t                    b, new_blocks[4];
41         __le32                          *p;
42         int                             i, j, err, len = 1;
43  
44 -       /*
45 -        * Set up for the direct block allocation
46 -        */
47 -       memset(&ar, 0, sizeof(ar));
48 -       ar.inode = inode;
49 -       ar.len = *blks;
50 -       ar.logical = iblock;
51 -       if (S_ISREG(inode->i_mode))
52 -               ar.flags = EXT4_MB_HINT_DATA;
53 -
54         for (i = 0; i <= indirect_blks; i++) {
55                 if (i == indirect_blks) {
56 -                       ar.goal = goal;
57 -                       new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
58 +                       new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
59                 } else
60 -                       goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
61 -                                                       goal, 0, NULL, &err);
62 +                       ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
63 +                                   ar->inode, ar->goal, 0, NULL, &err);
64                 if (err) {
65                         i--;
66                         goto failed;
67 @@ -354,7 +342,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
68                 if (i == 0)
69                         continue;
70  
71 -               bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
72 +               bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
73                 if (unlikely(!bh)) {
74                         err = -ENOMEM;
75                         goto failed;
76 @@ -372,7 +360,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
77                 b = new_blocks[i];
78  
79                 if (i == indirect_blks)
80 -                       len = ar.len;
81 +                       len = ar->len;
82                 for (j = 0; j < len; j++)
83                         *p++ = cpu_to_le32(b++);
84  
85 @@ -381,11 +369,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
86                 unlock_buffer(bh);
87  
88                 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
89 -               err = ext4_handle_dirty_metadata(handle, inode, bh);
90 +               err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
91                 if (err)
92                         goto failed;
93         }
94 -       *blks = ar.len;
95         return 0;
96  failed:
97         for (; i >= 0; i--) {
98 @@ -396,10 +383,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
99                  * existing before ext4_alloc_branch() was called.
100                  */
101                 if (i > 0 && i != indirect_blks && branch[i].bh)
102 -                       ext4_forget(handle, 1, inode, branch[i].bh,
103 +                       ext4_forget(handle, 1, ar->inode, branch[i].bh,
104                                     branch[i].bh->b_blocknr);
105 -               ext4_free_blocks(handle, inode, NULL, new_blocks[i],
106 -                                (i == indirect_blks) ? ar.len : 1, 0);
107 +               ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
108 +                                (i == indirect_blks) ? ar->len : 1, 0);
109         }
110         return err;
111  }
112 @@ -419,9 +406,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
113   * inode (->i_blocks, etc.). In case of success we end up with the full
114   * chain to new block and return 0.
115   */
116 -static int ext4_splice_branch(handle_t *handle, struct inode *inode,
117 -                             ext4_lblk_t block, Indirect *where, int num,
118 -                             int blks)
119 +static int ext4_splice_branch(handle_t *handle,
120 +                             struct ext4_allocation_request *ar,
121 +                             Indirect *where, int num)
122  {
123         int i;
124         int err = 0;
125 @@ -446,9 +433,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
126          * Update the host buffer_head or inode to point to more just allocated
127          * direct blocks blocks
128          */
129 -       if (num == 0 && blks > 1) {
130 +       if (num == 0 && ar->len > 1) {
131                 current_block = le32_to_cpu(where->key) + 1;
132 -               for (i = 1; i < blks; i++)
133 +               for (i = 1; i < ar->len; i++)
134                         *(where->p + i) = cpu_to_le32(current_block++);
135         }
136  
137 @@ -465,14 +452,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
138                  */
139                 jbd_debug(5, "splicing indirect only\n");
140                 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
141 -               err = ext4_handle_dirty_metadata(handle, inode, where->bh);
142 +               err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
143                 if (err)
144                         goto err_out;
145         } else {
146                 /*
147                  * OK, we spliced it into the inode itself on a direct block.
148                  */
149 -               ext4_mark_inode_dirty(handle, inode);
150 +               ext4_mark_inode_dirty(handle, ar->inode);
151                 jbd_debug(5, "splicing direct\n");
152         }
153         return err;
154 @@ -484,11 +471,11 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
155                  * need to revoke the block, which is why we don't
156                  * need to set EXT4_FREE_BLOCKS_METADATA.
157                  */
158 -               ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
159 +               ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
160                                  EXT4_FREE_BLOCKS_FORGET);
161         }
162 -       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
163 -                        blks, 0);
164 +       ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
165 +                        ar->len, 0);
166  
167         return err;
168  }
169 @@ -525,11 +512,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
170                         struct ext4_map_blocks *map,
171                         int flags)
172  {
173 +       struct ext4_allocation_request ar;
174         int err = -EIO;
175         ext4_lblk_t offsets[4];
176         Indirect chain[4];
177         Indirect *partial;
178 -       ext4_fsblk_t goal;
179         int indirect_blks;
180         int blocks_to_boundary = 0;
181         int depth;
182 @@ -579,7 +566,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
183                 return -ENOSPC;
184         }
185  
186 -       goal = ext4_find_goal(inode, map->m_lblk, partial);
187 +       /* Set up for the direct block allocation */
188 +       memset(&ar, 0, sizeof(ar));
189 +       ar.inode = inode;
190 +       ar.logical = map->m_lblk;
191 +       if (S_ISREG(inode->i_mode))
192 +               ar.flags = EXT4_MB_HINT_DATA;
193 +
194 +       ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
195  
196         /* the number of blocks need to allocate for [d,t]indirect blocks */
197         indirect_blks = (chain + depth) - partial - 1;
198 @@ -588,13 +582,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
199          * Next look up the indirect map to count the totoal number of
200          * direct blocks to allocate for this branch.
201          */
202 -       count = ext4_blks_to_allocate(partial, indirect_blks,
203 -                                     map->m_len, blocks_to_boundary);
204 +       ar.len = ext4_blks_to_allocate(partial, indirect_blks,
205 +                                      map->m_len, blocks_to_boundary);
206 +
207         /*
208          * Block out ext4_truncate while we alter the tree
209          */
210 -       err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
211 -                               &count, goal,
212 +       err = ext4_alloc_branch(handle, &ar, indirect_blks,
213                                 offsets + (partial - chain), partial);
214  
215         /*
216 @@ -605,14 +599,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
217          * may need to return -EAGAIN upwards in the worst case.  --sct
218          */
219         if (!err)
220 -               err = ext4_splice_branch(handle, inode, map->m_lblk,
221 -                                        partial, indirect_blks, count);
222 +               err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
223         if (err)
224                 goto cleanup;
225  
226         map->m_flags |= EXT4_MAP_NEW;
227  
228         ext4_update_inode_fsync_trans(handle, inode, 1);
229 +       count = ar.len;
230  got_it:
231         map->m_flags |= EXT4_MAP_MAPPED;
232         map->m_pblk = le32_to_cpu(chain[depth-1].key);
233 -- 
234 2.7.4
235
236 From e3cf5d5d9a86df1c5e413bdd3725c25a16ff854c Mon Sep 17 00:00:00 2001
237 From: Theodore Ts'o <tytso@mit.edu>
238 Date: Thu, 4 Sep 2014 18:07:25 -0400
239 Subject: [PATCH] ext4: prepare to drop EXT4_STATE_DELALLOC_RESERVED
240
241 The EXT4_STATE_DELALLOC_RESERVED flag was originally implemented
242 because it was too hard to make sure the mballoc and get_block flags
243 could be reliably passed down through all of the codepaths that end up
244 calling ext4_mb_new_blocks().
245
246 Since then, we have mb_flags passed down through most of the code
247 paths, so getting rid of EXT4_STATE_DELALLOC_RESERVED isn't as tricky
248 as it used to.
249
250 This commit plumbs in the last of what is required, and then adds a
251 WARN_ON check to make sure we haven't missed anything.  If this passes
252 a full regression test run, we can then drop
253 EXT4_STATE_DELALLOC_RESERVED.
254
255 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
256 Reviewed-by: Jan Kara <jack@suse.cz>
257 ---
258  fs/ext4/balloc.c   |  3 +--
259  fs/ext4/extents.c  |  6 +++++-
260  fs/ext4/indirect.c |  6 +++++-
261  fs/ext4/mballoc.c  | 10 ++++++----
262  fs/ext4/xattr.c    |  6 ------
263  5 files changed, 17 insertions(+), 14 deletions(-)
264
265 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
266 index 581ef40..d70f154 100644
267 --- a/fs/ext4/balloc.c
268 +++ b/fs/ext4/balloc.c
269 @@ -636,8 +636,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
270          * Account for the allocated meta blocks.  We will never
271          * fail EDQUOT for metdata, but we do account for it.
272          */
273 -       if (!(*errp) &&
274 -           ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
275 +       if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
276                 dquot_alloc_block_nofail(inode,
277                                 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
278         }
279 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
280 index 3ac1686..8170b32 100644
281 --- a/fs/ext4/extents.c
282 +++ b/fs/ext4/extents.c
283 @@ -1933,6 +1933,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
284         ext4_lblk_t next;
285         int mb_flags = 0, unwritten;
286  
287 +       if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
288 +               mb_flags |= EXT4_MB_DELALLOC_RESERVED;
289         if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
290                 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
291                 return -EIO;
292 @@ -2054,7 +2056,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
293          * We're gonna add a new leaf in the tree.
294          */
295         if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
296 -               mb_flags = EXT4_MB_USE_RESERVED;
297 +               mb_flags |= EXT4_MB_USE_RESERVED;
298         err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
299                                        ppath, newext);
300         if (err)
301 @@ -4438,6 +4440,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
302                 ar.flags = 0;
303         if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
304                 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
305 +       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
306 +               ar.flags |= EXT4_MB_DELALLOC_RESERVED;
307         newblock = ext4_mb_new_blocks(handle, &ar, &err);
308         if (!newblock)
309                 goto out2;
310 diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
311 index 69af0cd..36b3696 100644
312 --- a/fs/ext4/indirect.c
313 +++ b/fs/ext4/indirect.c
314 @@ -333,7 +333,9 @@ static int ext4_alloc_branch(handle_t *handle,
315                         new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
316                 } else
317                         ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
318 -                                   ar->inode, ar->goal, 0, NULL, &err);
319 +                                       ar->inode, ar->goal,
320 +                                       ar->flags & EXT4_MB_DELALLOC_RESERVED,
321 +                                       NULL, &err);
322                 if (err) {
323                         i--;
324                         goto failed;
325 @@ -572,6 +574,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
326         ar.logical = map->m_lblk;
327         if (S_ISREG(inode->i_mode))
328                 ar.flags = EXT4_MB_HINT_DATA;
329 +       if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
330 +               ar.flags |= EXT4_MB_DELALLOC_RESERVED;
331  
332         ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
333  
334 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
335 index 8b0f9ef..15dffda 100644
336 --- a/fs/ext4/mballoc.c
337 +++ b/fs/ext4/mballoc.c
338 @@ -4415,9 +4415,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
339          * EDQUOT check, as blocks and quotas have been already
340          * reserved when data being copied into pagecache.
341          */
342 -       if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
343 +       if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) {
344 +               WARN_ON((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0);
345                 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
346 -       else {
347 +       }
348 +
349 +       if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
350                 /* Without delayed allocation we need to verify
351                  * there is enough free blocks to do block allocation
352                  * and verify allocation doesn't exceed the quota limits.
353 @@ -4528,8 +4531,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
354         if (inquota && ar->len < inquota)
355                 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
356         if (!ar->len) {
357 -               if (!ext4_test_inode_state(ar->inode,
358 -                                          EXT4_STATE_DELALLOC_RESERVED))
359 +               if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
360                         /* release all the reserved blocks if non delalloc */
361                         percpu_counter_sub(&sbi->s_dirtyclusters_counter,
362                                                 reserv_clstrs);
363 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
364 index e738733..da4df70 100644
365 --- a/fs/ext4/xattr.c
366 +++ b/fs/ext4/xattr.c
367 @@ -899,14 +899,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
368                         if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
369                                 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
370  
371 -                       /*
372 -                        * take i_data_sem because we will test
373 -                        * i_delalloc_reserved_flag in ext4_mb_new_blocks
374 -                        */
375 -                       down_read(&EXT4_I(inode)->i_data_sem);
376                         block = ext4_new_meta_blocks(handle, inode, goal, 0,
377                                                      NULL, &error);
378 -                       up_read((&EXT4_I(inode)->i_data_sem));
379                         if (error)
380                                 goto cleanup;
381  
382 -- 
383 2.7.4
384
385 From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001
386 From: Jan Kara <jack@suse.cz>
387 Date: Thu, 11 Aug 2016 12:38:55 -0400
388 Subject: [PATCH] ext4: avoid deadlock when expanding inode size
389
390 When we need to move xattrs into external xattr block, we call
391 ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end
392 up calling ext4_mark_inode_dirty() again which will recurse back into
393 the inode expansion code leading to deadlocks.
394
395 Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move
396 its management into ext4_expand_extra_isize_ea() since its manipulation
397 is safe there (due to xattr_sem) from possible races with
398 ext4_xattr_set_handle() which plays with it as well.
399
400 CC: stable@vger.kernel.org   # 4.4.x
401 Signed-off-by: Jan Kara <jack@suse.cz>
402 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
403 ---
404  fs/ext4/inode.c |  2 --
405  fs/ext4/xattr.c | 19 +++++++++++++------
406  2 files changed, 13 insertions(+), 8 deletions(-)
407
408 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
409 index 5a6277d..13c95b2 100644
410 --- a/fs/ext4/inode.c
411 +++ b/fs/ext4/inode.c
412 @@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
413                                                       sbi->s_want_extra_isize,
414                                                       iloc, handle);
415                         if (ret) {
416 -                               ext4_set_inode_state(inode,
417 -                                                    EXT4_STATE_NO_EXPAND);
418                                 if (mnt_count !=
419                                         le16_to_cpu(sbi->s_es->s_mnt_count)) {
420                                         ext4_warning(inode->i_sb,
421 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
422 index c893f00..2eb935c 100644
423 --- a/fs/ext4/xattr.c
424 +++ b/fs/ext4/xattr.c
425 @@ -1358,11 +1358,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
426         int isize_diff; /* How much do we need to grow i_extra_isize */
427  
428         down_write(&EXT4_I(inode)->xattr_sem);
429 +       /*
430 +        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
431 +        */
432 +       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
433  retry:
434 -       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
435 -               up_write(&EXT4_I(inode)->xattr_sem);
436 -               return 0;
437 -       }
438 +       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
439 +               goto out;
440  
441         header = IHDR(inode, raw_inode);
442         entry = IFIRST(header);
443 @@ -1392,8 +1394,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
444                                 (void *)header, total_ino,
445                                 inode->i_sb->s_blocksize);
446                 EXT4_I(inode)->i_extra_isize = new_extra_isize;
447 -               error = 0;
448 -               goto cleanup;
449 +               goto out;
450         }
451  
452         /*
453 @@ -1553,6 +1554,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
454                 kfree(bs);
455         }
456         brelse(bh);
457 +out:
458 +       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
459         up_write(&EXT4_I(inode)->xattr_sem);
460         return 0;
461  
462 @@ -1564,6 +1567,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
463         kfree(is);
464         kfree(bs);
465         brelse(bh);
466 +       /*
467 +        * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
468 +        * size expansion failed.
469 +        */
470         up_write(&EXT4_I(inode)->xattr_sem);
471         return error;
472  }
473 -- 
474 2.7.4
475