1 From a521100231f816f8cdd9c8e77da14ff1e42c2b17 Mon Sep 17 00:00:00 2001
2 From: Theodore Ts'o <tytso@mit.edu>
3 Date: Thu, 4 Sep 2014 18:06:25 -0400
4 Subject: [PATCH] ext4: pass allocation_request struct to
5 ext4_(alloc,splice)_branch
7 Instead of initializing the allocation_request structure in
8 ext4_alloc_branch(), set it up in ext4_ind_map_blocks(), and then pass
9 it to ext4_alloc_branch() and ext4_splice_branch().
11 This allows ext4_ind_map_blocks to pass flags in the allocation
12 request structure without having to add Yet Another argument to
15 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
16 Reviewed-by: Jan Kara <jack@suse.cz>
18 fs/ext4/indirect.c | 82 +++++++++++++++++++++++++-----------------------------
19 1 file changed, 38 insertions(+), 44 deletions(-)
21 diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
22 index e75f840..69af0cd 100644
23 --- a/fs/ext4/indirect.c
24 +++ b/fs/ext4/indirect.c
25 @@ -318,34 +318,22 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
26 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
27 * as described above and return 0.
29 -static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
30 - ext4_lblk_t iblock, int indirect_blks,
31 - int *blks, ext4_fsblk_t goal,
32 - ext4_lblk_t *offsets, Indirect *branch)
33 +static int ext4_alloc_branch(handle_t *handle,
34 + struct ext4_allocation_request *ar,
35 + int indirect_blks, ext4_lblk_t *offsets,
38 - struct ext4_allocation_request ar;
39 struct buffer_head * bh;
40 ext4_fsblk_t b, new_blocks[4];
42 int i, j, err, len = 1;
45 - * Set up for the direct block allocation
47 - memset(&ar, 0, sizeof(ar));
50 - ar.logical = iblock;
51 - if (S_ISREG(inode->i_mode))
52 - ar.flags = EXT4_MB_HINT_DATA;
54 for (i = 0; i <= indirect_blks; i++) {
55 if (i == indirect_blks) {
57 - new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
58 + new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
60 - goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
61 - goal, 0, NULL, &err);
62 + ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
63 + ar->inode, ar->goal, 0, NULL, &err);
67 @@ -354,7 +342,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
71 - bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
72 + bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
76 @@ -372,7 +360,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
79 if (i == indirect_blks)
82 for (j = 0; j < len; j++)
83 *p++ = cpu_to_le32(b++);
85 @@ -381,11 +369,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
88 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
89 - err = ext4_handle_dirty_metadata(handle, inode, bh);
90 + err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
98 @@ -396,10 +383,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
99 * existing before ext4_alloc_branch() was called.
101 if (i > 0 && i != indirect_blks && branch[i].bh)
102 - ext4_forget(handle, 1, inode, branch[i].bh,
103 + ext4_forget(handle, 1, ar->inode, branch[i].bh,
104 branch[i].bh->b_blocknr);
105 - ext4_free_blocks(handle, inode, NULL, new_blocks[i],
106 - (i == indirect_blks) ? ar.len : 1, 0);
107 + ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
108 + (i == indirect_blks) ? ar->len : 1, 0);
112 @@ -419,9 +406,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
113 * inode (->i_blocks, etc.). In case of success we end up with the full
114 * chain to new block and return 0.
116 -static int ext4_splice_branch(handle_t *handle, struct inode *inode,
117 - ext4_lblk_t block, Indirect *where, int num,
119 +static int ext4_splice_branch(handle_t *handle,
120 + struct ext4_allocation_request *ar,
121 + Indirect *where, int num)
125 @@ -446,9 +433,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
126 * Update the host buffer_head or inode to point to more just allocated
127 * direct blocks blocks
129 - if (num == 0 && blks > 1) {
130 + if (num == 0 && ar->len > 1) {
131 current_block = le32_to_cpu(where->key) + 1;
132 - for (i = 1; i < blks; i++)
133 + for (i = 1; i < ar->len; i++)
134 *(where->p + i) = cpu_to_le32(current_block++);
137 @@ -465,14 +452,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
139 jbd_debug(5, "splicing indirect only\n");
140 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
141 - err = ext4_handle_dirty_metadata(handle, inode, where->bh);
142 + err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
147 * OK, we spliced it into the inode itself on a direct block.
149 - ext4_mark_inode_dirty(handle, inode);
150 + ext4_mark_inode_dirty(handle, ar->inode);
151 jbd_debug(5, "splicing direct\n");
154 @@ -484,11 +471,11 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
155 * need to revoke the block, which is why we don't
156 * need to set EXT4_FREE_BLOCKS_METADATA.
158 - ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
159 + ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
160 EXT4_FREE_BLOCKS_FORGET);
162 - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
164 + ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
169 @@ -525,11 +512,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
170 struct ext4_map_blocks *map,
173 + struct ext4_allocation_request ar;
175 ext4_lblk_t offsets[4];
180 int blocks_to_boundary = 0;
182 @@ -579,7 +566,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
186 - goal = ext4_find_goal(inode, map->m_lblk, partial);
187 + /* Set up for the direct block allocation */
188 + memset(&ar, 0, sizeof(ar));
190 + ar.logical = map->m_lblk;
191 + if (S_ISREG(inode->i_mode))
192 + ar.flags = EXT4_MB_HINT_DATA;
194 + ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
196 /* the number of blocks need to allocate for [d,t]indirect blocks */
197 indirect_blks = (chain + depth) - partial - 1;
198 @@ -588,13 +582,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
199 * Next look up the indirect map to count the totoal number of
200 * direct blocks to allocate for this branch.
202 - count = ext4_blks_to_allocate(partial, indirect_blks,
203 - map->m_len, blocks_to_boundary);
204 + ar.len = ext4_blks_to_allocate(partial, indirect_blks,
205 + map->m_len, blocks_to_boundary);
208 * Block out ext4_truncate while we alter the tree
210 - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
212 + err = ext4_alloc_branch(handle, &ar, indirect_blks,
213 offsets + (partial - chain), partial);
216 @@ -605,14 +599,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
217 * may need to return -EAGAIN upwards in the worst case. --sct
220 - err = ext4_splice_branch(handle, inode, map->m_lblk,
221 - partial, indirect_blks, count);
222 + err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
226 map->m_flags |= EXT4_MAP_NEW;
228 ext4_update_inode_fsync_trans(handle, inode, 1);
231 map->m_flags |= EXT4_MAP_MAPPED;
232 map->m_pblk = le32_to_cpu(chain[depth-1].key);
236 From e3cf5d5d9a86df1c5e413bdd3725c25a16ff854c Mon Sep 17 00:00:00 2001
237 From: Theodore Ts'o <tytso@mit.edu>
238 Date: Thu, 4 Sep 2014 18:07:25 -0400
239 Subject: [PATCH] ext4: prepare to drop EXT4_STATE_DELALLOC_RESERVED
241 The EXT4_STATE_DELALLOC_RESERVED flag was originally implemented
242 because it was too hard to make sure the mballoc and get_block flags
243 could be reliably passed down through all of the codepaths that end up
244 calling ext4_mb_new_blocks().
246 Since then, we have mb_flags passed down through most of the code
247 paths, so getting rid of EXT4_STATE_DELALLOC_RESERVED isn't as tricky
250 This commit plumbs in the last of what is required, and then adds a
251 WARN_ON check to make sure we haven't missed anything. If this passes
252 a full regression test run, we can then drop
253 EXT4_STATE_DELALLOC_RESERVED.
255 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
256 Reviewed-by: Jan Kara <jack@suse.cz>
258 fs/ext4/balloc.c | 3 +--
259 fs/ext4/extents.c | 6 +++++-
260 fs/ext4/indirect.c | 6 +++++-
261 fs/ext4/mballoc.c | 10 ++++++----
262 fs/ext4/xattr.c | 6 ------
263 5 files changed, 17 insertions(+), 14 deletions(-)
265 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
266 index 581ef40..d70f154 100644
267 --- a/fs/ext4/balloc.c
268 +++ b/fs/ext4/balloc.c
269 @@ -636,8 +636,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
270 * Account for the allocated meta blocks. We will never
271 * fail EDQUOT for metdata, but we do account for it.
274 - ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
275 + if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
276 dquot_alloc_block_nofail(inode,
277 EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
279 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
280 index 3ac1686..8170b32 100644
281 --- a/fs/ext4/extents.c
282 +++ b/fs/ext4/extents.c
283 @@ -1933,6 +1933,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
285 int mb_flags = 0, unwritten;
287 + if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
288 + mb_flags |= EXT4_MB_DELALLOC_RESERVED;
289 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
290 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
292 @@ -2054,7 +2056,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
293 * We're gonna add a new leaf in the tree.
295 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
296 - mb_flags = EXT4_MB_USE_RESERVED;
297 + mb_flags |= EXT4_MB_USE_RESERVED;
298 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
301 @@ -4438,6 +4440,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
303 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
304 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
305 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
306 + ar.flags |= EXT4_MB_DELALLOC_RESERVED;
307 newblock = ext4_mb_new_blocks(handle, &ar, &err);
310 diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
311 index 69af0cd..36b3696 100644
312 --- a/fs/ext4/indirect.c
313 +++ b/fs/ext4/indirect.c
314 @@ -333,7 +333,9 @@ static int ext4_alloc_branch(handle_t *handle,
315 new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
317 ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
318 - ar->inode, ar->goal, 0, NULL, &err);
319 + ar->inode, ar->goal,
320 + ar->flags & EXT4_MB_DELALLOC_RESERVED,
325 @@ -572,6 +574,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
326 ar.logical = map->m_lblk;
327 if (S_ISREG(inode->i_mode))
328 ar.flags = EXT4_MB_HINT_DATA;
329 + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
330 + ar.flags |= EXT4_MB_DELALLOC_RESERVED;
332 ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
334 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
335 index 8b0f9ef..15dffda 100644
336 --- a/fs/ext4/mballoc.c
337 +++ b/fs/ext4/mballoc.c
338 @@ -4415,9 +4415,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
339 * EDQUOT check, as blocks and quotas have been already
340 * reserved when data being copied into pagecache.
342 - if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
343 + if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) {
344 + WARN_ON((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0);
345 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
349 + if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
350 /* Without delayed allocation we need to verify
351 * there is enough free blocks to do block allocation
352 * and verify allocation doesn't exceed the quota limits.
353 @@ -4528,8 +4531,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
354 if (inquota && ar->len < inquota)
355 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
357 - if (!ext4_test_inode_state(ar->inode,
358 - EXT4_STATE_DELALLOC_RESERVED))
359 + if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
360 /* release all the reserved blocks if non delalloc */
361 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
363 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
364 index e738733..da4df70 100644
365 --- a/fs/ext4/xattr.c
366 +++ b/fs/ext4/xattr.c
367 @@ -899,14 +899,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
368 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
369 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
372 - * take i_data_sem because we will test
373 - * i_delalloc_reserved_flag in ext4_mb_new_blocks
375 - down_read(&EXT4_I(inode)->i_data_sem);
376 block = ext4_new_meta_blocks(handle, inode, goal, 0,
378 - up_read((&EXT4_I(inode)->i_data_sem));
385 From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001
386 From: Jan Kara <jack@suse.cz>
387 Date: Thu, 11 Aug 2016 12:38:55 -0400
388 Subject: [PATCH] ext4: avoid deadlock when expanding inode size
390 When we need to move xattrs into external xattr block, we call
391 ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end
392 up calling ext4_mark_inode_dirty() again which will recurse back into
393 the inode expansion code leading to deadlocks.
395 Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move
396 its management into ext4_expand_extra_isize_ea() since its manipulation
397 is safe there (due to xattr_sem) from possible races with
398 ext4_xattr_set_handle() which plays with it as well.
400 CC: stable@vger.kernel.org # 4.4.x
401 Signed-off-by: Jan Kara <jack@suse.cz>
402 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
404 fs/ext4/inode.c | 2 --
405 fs/ext4/xattr.c | 19 +++++++++++++------
406 2 files changed, 13 insertions(+), 8 deletions(-)
408 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
409 index 5a6277d..13c95b2 100644
410 --- a/fs/ext4/inode.c
411 +++ b/fs/ext4/inode.c
412 @@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
413 sbi->s_want_extra_isize,
416 - ext4_set_inode_state(inode,
417 - EXT4_STATE_NO_EXPAND);
419 le16_to_cpu(sbi->s_es->s_mnt_count)) {
420 ext4_warning(inode->i_sb,
421 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
422 index c893f00..2eb935c 100644
423 --- a/fs/ext4/xattr.c
424 +++ b/fs/ext4/xattr.c
425 @@ -1358,11 +1358,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
426 int isize_diff; /* How much do we need to grow i_extra_isize */
428 down_write(&EXT4_I(inode)->xattr_sem);
430 + * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
432 + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
434 - if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
435 - up_write(&EXT4_I(inode)->xattr_sem);
438 + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
441 header = IHDR(inode, raw_inode);
442 entry = IFIRST(header);
443 @@ -1392,8 +1394,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
444 (void *)header, total_ino,
445 inode->i_sb->s_blocksize);
446 EXT4_I(inode)->i_extra_isize = new_extra_isize;
453 @@ -1553,6 +1554,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
458 + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
459 up_write(&EXT4_I(inode)->xattr_sem);
462 @@ -1564,6 +1567,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
467 + * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
468 + * size expansion failed.
470 up_write(&EXT4_I(inode)->xattr_sem);