X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=ldiskfs%2Fkernel_patches%2Fpatches%2Frhel7.2%2Fext4-remove-i_data_sem-from-xattr.patch;fp=ldiskfs%2Fkernel_patches%2Fpatches%2Frhel7.2%2Fext4-remove-i_data_sem-from-xattr.patch;h=fa5ace5ae54f1bfffe35a7d410f0fcd5c449e799;hp=0000000000000000000000000000000000000000;hb=18120272b73a018a2590f1e5a895331b35df75e9;hpb=d4778ad25799ac83edd4ac784aa5314fac9f8b23 diff --git a/ldiskfs/kernel_patches/patches/rhel7.2/ext4-remove-i_data_sem-from-xattr.patch b/ldiskfs/kernel_patches/patches/rhel7.2/ext4-remove-i_data_sem-from-xattr.patch new file mode 100644 index 0000000..fa5ace5 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel7.2/ext4-remove-i_data_sem-from-xattr.patch @@ -0,0 +1,475 @@ +From a521100231f816f8cdd9c8e77da14ff1e42c2b17 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 4 Sep 2014 18:06:25 -0400 +Subject: [PATCH] ext4: pass allocation_request struct to + ext4_(alloc,splice)_branch + +Instead of initializing the allocation_request structure in +ext4_alloc_branch(), set it up in ext4_ind_map_blocks(), and then pass +it to ext4_alloc_branch() and ext4_splice_branch(). + +This allows ext4_ind_map_blocks to pass flags in the allocation +request structure without having to add Yet Another argument to +ext4_alloc_branch(). + +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +--- + fs/ext4/indirect.c | 82 +++++++++++++++++++++++++----------------------------- + 1 file changed, 38 insertions(+), 44 deletions(-) + +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index e75f840..69af0cd 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -318,34 +318,22 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +-static int ext4_alloc_branch(handle_t *handle, struct inode *inode, +- ext4_lblk_t iblock, int indirect_blks, +- int *blks, ext4_fsblk_t goal, +- ext4_lblk_t *offsets, Indirect *branch) ++static int ext4_alloc_branch(handle_t *handle, ++ struct ext4_allocation_request *ar, ++ int indirect_blks, ext4_lblk_t *offsets, ++ Indirect *branch) + { +- struct ext4_allocation_request ar; + struct buffer_head * bh; + ext4_fsblk_t b, new_blocks[4]; + __le32 *p; + int i, j, err, len = 1; + +- /* +- * Set up for the direct block allocation +- */ +- memset(&ar, 0, sizeof(ar)); +- ar.inode = inode; +- ar.len = *blks; +- ar.logical = iblock; +- if (S_ISREG(inode->i_mode)) +- ar.flags = EXT4_MB_HINT_DATA; +- + for (i = 0; i <= indirect_blks; i++) { + if (i == indirect_blks) { +- ar.goal = goal; +- new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); ++ new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); + } else +- goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, +- goal, 0, NULL, &err); ++ ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, ++ ar->inode, ar->goal, 0, NULL, &err); + if (err) { + i--; + goto failed; +@@ -354,7 +342,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + if (i == 0) + continue; + +- bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); ++ bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); + if (unlikely(!bh)) { + err = -ENOMEM; + goto failed; +@@ -372,7 +360,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + b = new_blocks[i]; + + if (i == indirect_blks) +- len = ar.len; ++ len = ar->len; + for (j = 0; j < len; j++) + *p++ = cpu_to_le32(b++); + +@@ -381,11 +369,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); +- err = ext4_handle_dirty_metadata(handle, inode, bh); ++ err = ext4_handle_dirty_metadata(handle, ar->inode, bh); + if (err) + goto failed; + } +- *blks = ar.len; + return 0; + failed: + for (; i >= 0; i--) { +@@ -396,10 +383,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + * existing before ext4_alloc_branch() was called. + */ + if (i > 0 && i != indirect_blks && branch[i].bh) +- ext4_forget(handle, 1, inode, branch[i].bh, ++ ext4_forget(handle, 1, ar->inode, branch[i].bh, + branch[i].bh->b_blocknr); +- ext4_free_blocks(handle, inode, NULL, new_blocks[i], +- (i == indirect_blks) ? ar.len : 1, 0); ++ ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], ++ (i == indirect_blks) ? ar->len : 1, 0); + } + return err; + } +@@ -419,9 +406,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +-static int ext4_splice_branch(handle_t *handle, struct inode *inode, +- ext4_lblk_t block, Indirect *where, int num, +- int blks) ++static int ext4_splice_branch(handle_t *handle, ++ struct ext4_allocation_request *ar, ++ Indirect *where, int num) + { + int i; + int err = 0; +@@ -446,9 +433,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ +- if (num == 0 && blks > 1) { ++ if (num == 0 && ar->len > 1) { + current_block = le32_to_cpu(where->key) + 1; +- for (i = 1; i < blks; i++) ++ for (i = 1; i < ar->len; i++) + *(where->p + i) = cpu_to_le32(current_block++); + } + +@@ -465,14 +452,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); +- err = ext4_handle_dirty_metadata(handle, inode, where->bh); ++ err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + */ +- ext4_mark_inode_dirty(handle, inode); ++ ext4_mark_inode_dirty(handle, ar->inode); + jbd_debug(5, "splicing direct\n"); + } + return err; +@@ -484,11 +471,11 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ +- ext4_free_blocks(handle, inode, where[i].bh, 0, 1, ++ ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); + } +- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), +- blks, 0); ++ ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), ++ ar->len, 0); + + return err; + } +@@ -525,11 +512,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + int flags) + { ++ struct ext4_allocation_request ar; + int err = -EIO; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; +- ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; +@@ -579,7 +566,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + return -ENOSPC; + } + +- goal = ext4_find_goal(inode, map->m_lblk, partial); ++ /* Set up for the direct block allocation */ ++ memset(&ar, 0, sizeof(ar)); ++ ar.inode = inode; ++ ar.logical = map->m_lblk; ++ if (S_ISREG(inode->i_mode)) ++ ar.flags = EXT4_MB_HINT_DATA; ++ ++ ar.goal = ext4_find_goal(inode, map->m_lblk, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; +@@ -588,13 +582,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ +- count = ext4_blks_to_allocate(partial, indirect_blks, +- map->m_len, blocks_to_boundary); ++ ar.len = ext4_blks_to_allocate(partial, indirect_blks, ++ map->m_len, blocks_to_boundary); ++ + /* + * Block out ext4_truncate while we alter the tree + */ +- err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, +- &count, goal, ++ err = ext4_alloc_branch(handle, &ar, indirect_blks, + offsets + (partial - chain), partial); + + /* +@@ -605,14 +599,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) +- err = ext4_splice_branch(handle, inode, map->m_lblk, +- partial, indirect_blks, count); ++ err = ext4_splice_branch(handle, &ar, partial, indirect_blks); + if (err) + goto cleanup; + + map->m_flags |= EXT4_MAP_NEW; + + ext4_update_inode_fsync_trans(handle, inode, 1); ++ count = ar.len; + got_it: + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); +-- +2.7.4 + +From e3cf5d5d9a86df1c5e413bdd3725c25a16ff854c Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 4 Sep 2014 18:07:25 -0400 +Subject: [PATCH] ext4: prepare to drop EXT4_STATE_DELALLOC_RESERVED + +The EXT4_STATE_DELALLOC_RESERVED flag was originally implemented +because it was too hard to make sure the mballoc and get_block flags +could be reliably passed down through all of the codepaths that end up +calling ext4_mb_new_blocks(). + +Since then, we have mb_flags passed down through most of the code +paths, so getting rid of EXT4_STATE_DELALLOC_RESERVED isn't as tricky +as it used to. + +This commit plumbs in the last of what is required, and then adds a +WARN_ON check to make sure we haven't missed anything. If this passes +a full regression test run, we can then drop +EXT4_STATE_DELALLOC_RESERVED. + +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +--- + fs/ext4/balloc.c | 3 +-- + fs/ext4/extents.c | 6 +++++- + fs/ext4/indirect.c | 6 +++++- + fs/ext4/mballoc.c | 10 ++++++---- + fs/ext4/xattr.c | 6 ------ + 5 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index 581ef40..d70f154 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -636,8 +636,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + * Account for the allocated meta blocks. We will never + * fail EDQUOT for metdata, but we do account for it. + */ +- if (!(*errp) && +- ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { ++ if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { + dquot_alloc_block_nofail(inode, + EXT4_C2B(EXT4_SB(inode->i_sb), ar.len)); + } +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 3ac1686..8170b32 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1933,6 +1933,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_lblk_t next; + int mb_flags = 0, unwritten; + ++ if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ mb_flags |= EXT4_MB_DELALLOC_RESERVED; + if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { + EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); + return -EIO; +@@ -2054,7 +2056,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + * We're gonna add a new leaf in the tree. + */ + if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) +- mb_flags = EXT4_MB_USE_RESERVED; ++ mb_flags |= EXT4_MB_USE_RESERVED; + err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, + ppath, newext); + if (err) +@@ -4438,6 +4440,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + ar.flags = 0; + if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) + ar.flags |= EXT4_MB_HINT_NOPREALLOC; ++ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ ar.flags |= EXT4_MB_DELALLOC_RESERVED; + newblock = ext4_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 69af0cd..36b3696 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -333,7 +333,9 @@ static int ext4_alloc_branch(handle_t *handle, + new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); + } else + ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, +- ar->inode, ar->goal, 0, NULL, &err); ++ ar->inode, ar->goal, ++ ar->flags & EXT4_MB_DELALLOC_RESERVED, ++ NULL, &err); + if (err) { + i--; + goto failed; +@@ -572,6 +574,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + ar.logical = map->m_lblk; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT4_MB_HINT_DATA; ++ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ ar.flags |= EXT4_MB_DELALLOC_RESERVED; + + ar.goal = ext4_find_goal(inode, map->m_lblk, partial); + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 8b0f9ef..15dffda 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4415,9 +4415,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + * EDQUOT check, as blocks and quotas have been already + * reserved when data being copied into pagecache. + */ +- if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) ++ if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) { ++ WARN_ON((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0); + ar->flags |= EXT4_MB_DELALLOC_RESERVED; +- else { ++ } ++ ++ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { + /* Without delayed allocation we need to verify + * there is enough free blocks to do block allocation + * and verify allocation doesn't exceed the quota limits. +@@ -4528,8 +4531,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + if (inquota && ar->len < inquota) + dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); + if (!ar->len) { +- if (!ext4_test_inode_state(ar->inode, +- EXT4_STATE_DELALLOC_RESERVED)) ++ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) + /* release all the reserved blocks if non delalloc */ + percpu_counter_sub(&sbi->s_dirtyclusters_counter, + reserv_clstrs); +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index e738733..da4df70 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -899,14 +899,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; + +- /* +- * take i_data_sem because we will test +- * i_delalloc_reserved_flag in ext4_mb_new_blocks +- */ +- down_read(&EXT4_I(inode)->i_data_sem); + block = ext4_new_meta_blocks(handle, inode, goal, 0, + NULL, &error); +- up_read((&EXT4_I(inode)->i_data_sem)); + if (error) + goto cleanup; + +-- +2.7.4 + +From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 11 Aug 2016 12:38:55 -0400 +Subject: [PATCH] ext4: avoid deadlock when expanding inode size + +When we need to move xattrs into external xattr block, we call +ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end +up calling ext4_mark_inode_dirty() again which will recurse back into +the inode expansion code leading to deadlocks. + +Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move +its management into ext4_expand_extra_isize_ea() since its manipulation +is safe there (due to xattr_sem) from possible races with +ext4_xattr_set_handle() which plays with it as well. + +CC: stable@vger.kernel.org # 4.4.x +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/inode.c | 2 -- + fs/ext4/xattr.c | 19 +++++++++++++------ + 2 files changed, 13 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 5a6277d..13c95b2 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) + sbi->s_want_extra_isize, + iloc, handle); + if (ret) { +- ext4_set_inode_state(inode, +- EXT4_STATE_NO_EXPAND); + if (mnt_count != + le16_to_cpu(sbi->s_es->s_mnt_count)) { + ext4_warning(inode->i_sb, +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index c893f00..2eb935c 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1358,11 +1358,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + int isize_diff; /* How much do we need to grow i_extra_isize */ + + down_write(&EXT4_I(inode)->xattr_sem); ++ /* ++ * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty ++ */ ++ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + retry: +- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { +- up_write(&EXT4_I(inode)->xattr_sem); +- return 0; +- } ++ if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) ++ goto out; + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); +@@ -1392,8 +1394,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + (void *)header, total_ino, + inode->i_sb->s_blocksize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; +- error = 0; +- goto cleanup; ++ goto out; + } + + /* +@@ -1553,6 +1554,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + kfree(bs); + } + brelse(bh); ++out: ++ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + +@@ -1564,6 +1567,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + kfree(is); + kfree(bs); + brelse(bh); ++ /* ++ * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode ++ * size expansion failed. ++ */ + up_write(&EXT4_I(inode)->xattr_sem); + return error; + } +-- +2.7.4 +