From 10283bfb35cff9980fc7a3f83f39007a22b21d0c Mon Sep 17 00:00:00 2001 From: Yang Sheng Date: Thu, 11 May 2017 01:24:06 +0800 Subject: [PATCH] LU-9384 ldiskfs: port upstream patches for changing extra isize Port upstream 5 patches for changing extra isize as below: commit d0141191a20289f8955c1e03dad08e42e6f71ca9 "ext4: fix xattr shifting when expanding inodes" commit 418c12d08dc64a45107c467ec1ba29b5e69b0715 "ext4: fix xattr shifting when expanding inodes part 2" commit 443a8c41cd49de66a3fda45b32b9860ea0292b84 "ext4: properly align shifted xattrs when expanding inodes" commit e3014d14a81edde488d9a6758eea8afc41752d2d "ext4: fixup free space calculations when expanding inodes" commit 94405713889d4a9d341b4ad92956e4e2ec8ec2c2 "ext4: replace bogus assertion in ext4_xattr_shift_entries()" Signed-off-by: Yang Sheng Change-Id: I01414bcc91d8f57ca72281916d35536d3926e570 Reviewed-on: https://review.whamcloud.com/27045 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Wang Shilong Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- ...-fix-xattr-shifting-when-expanding-inodes.patch | 409 ++++++++++++++++++ .../rhel7/ext4-remove-i_data_sem-from-xattr.patch | 475 +++++++++++++++++++++ .../series/ldiskfs-3.10-rhel7.2.series | 1 + .../series/ldiskfs-3.10-rhel7.3.series | 1 + .../series/ldiskfs-3.10-rhel7.series | 2 + 5 files changed, 888 insertions(+) create mode 100644 ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch create mode 100644 ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch new file mode 100644 index 0000000..a361f1c --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch @@ -0,0 +1,409 @@ +From d0141191a20289f8955c1e03dad08e42e6f71ca9 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 11 Aug 2016 11:50:30 -0400 +Subject: [PATCH] ext4: fix xattr shifting when expanding inodes + +The code in ext4_expand_extra_isize_ea() treated new_extra_isize +argument sometimes as the desired target i_extra_isize and sometimes as +the amount by which we need to grow current i_extra_isize. These happen +to coincide when i_extra_isize is 0 which used to be the common case and +so nobody noticed this until recently when we added i_projid to the +inode and so i_extra_isize now needs to grow from 28 to 32 bytes. + +The result of these bugs was that we sometimes unnecessarily decided to +move xattrs out of inode even if there was enough space and we often +ended up corrupting in-inode xattrs because arguments to +ext4_xattr_shift_entries() were just wrong. This could demonstrate +itself as BUG_ON in ext4_xattr_shift_entries() triggering. + +Fix the problem by introducing new isize_diff variable and use it where +appropriate. + +CC: stable@vger.kernel.org # 4.4.x +Reported-by: Dave Chinner +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 27 ++++++++++++++------------- + 1 file changed, 14 insertions(+), 13 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 39e9cfb..cb1d7b4 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1353,15 +1353,17 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + size_t min_offs, free; + int total_ino; + void *base, *start, *end; +- int extra_isize = 0, error = 0, tried_min_extra_isize = 0; ++ int error = 0, tried_min_extra_isize = 0; + int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); ++ int isize_diff; /* How much do we need to grow i_extra_isize */ + + down_write(&EXT4_I(inode)->xattr_sem); + /* + * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty + */ + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + retry: ++ isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + goto out; + +@@ -1382,7 +1384,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + goto cleanup; + + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); +- if (free >= new_extra_isize) { ++ if (free >= isize_diff) { + entry = IFIRST(header); + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + +@@ -1414,7 +1416,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + end = bh->b_data + bh->b_size; + min_offs = end - base; + free = ext4_xattr_free_space(first, &min_offs, base, NULL); +- if (free < new_extra_isize) { ++ if (free < isize_diff) { + if (!tried_min_extra_isize && s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; +@@ -1428,7 +1430,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + free = inode->i_sb->s_blocksize; + } + +- while (new_extra_isize > 0) { ++ while (isize_diff > 0) { + size_t offs, size, entry_size; + struct ext4_xattr_entry *small_entry = NULL; + struct ext4_xattr_info i = { +@@ -1459,7 +1461,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); + if (total_size <= free && total_size < min_total_size) { +- if (total_size < new_extra_isize) { ++ if (total_size < isize_diff) { + small_entry = last; + } else { + entry = last; +@@ -1516,20 +1518,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + goto cleanup; + + entry = IFIRST(header); +- if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) +- shift_bytes = new_extra_isize; ++ if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) ++ shift_bytes = isize_diff; + else + shift_bytes = entry_size + size; + /* Adjust the offsets and shift the remaining entries ahead */ +- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - +- shift_bytes, (void *)raw_inode + +- EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, ++ ext4_xattr_shift_entries(entry, -shift_bytes, ++ (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ++ EXT4_I(inode)->i_extra_isize + shift_bytes, + (void *)header, total_ino - entry_size, + inode->i_sb->s_blocksize); + +- extra_isize += shift_bytes; +- new_extra_isize -= shift_bytes; +- EXT4_I(inode)->i_extra_isize = extra_isize; ++ isize_diff -= shift_bytes; ++ EXT4_I(inode)->i_extra_isize += shift_bytes; + + i.name = b_entry_name; + i.value = buffer; +-- +2.9.3 + +From 418c12d08dc64a45107c467ec1ba29b5e69b0715 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 11 Aug 2016 11:58:32 -0400 +Subject: [PATCH] ext4: fix xattr shifting when expanding inodes part 2 + +When multiple xattrs need to be moved out of inode, we did not properly +recompute total size of xattr headers in the inode and the new header +position. Thus when moving the second and further xattr we asked +ext4_xattr_shift_entries() to move too much and from the wrong place, +resulting in possible xattr value corruption or general memory +corruption. + +CC: stable@vger.kernel.org # 4.4.x +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index cb1d7b4..b18b1ff 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1516,6 +1516,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + error = ext4_xattr_ibody_set(handle, inode, &i, is); + if (error) + goto cleanup; ++ total_ino -= entry_size; + + entry = IFIRST(header); + if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) +@@ -1526,11 +1527,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + ext4_xattr_shift_entries(entry, -shift_bytes, + (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize + shift_bytes, +- (void *)header, total_ino - entry_size, +- inode->i_sb->s_blocksize); ++ (void *)header, total_ino, inode->i_sb->s_blocksize); + + isize_diff -= shift_bytes; + EXT4_I(inode)->i_extra_isize += shift_bytes; ++ header = IHDR(inode, raw_inode); + + i.name = b_entry_name; + i.value = buffer; +-- +2.9.3 + +From 443a8c41cd49de66a3fda45b32b9860ea0292b84 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 11 Aug 2016 12:00:01 -0400 +Subject: [PATCH] ext4: properly align shifted xattrs when expanding inodes + +We did not count with the padding of xattr value when computing desired +shift of xattrs in the inode when expanding i_extra_isize. As a result +we could create unaligned start of inline xattrs. Account for alignment +properly. + +CC: stable@vger.kernel.org # 4.4.x- +Signed-off-by: Jan Kara +--- + fs/ext4/xattr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index b18b1ff..c893f00 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1522,7 +1522,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) + shift_bytes = isize_diff; + else +- shift_bytes = entry_size + size; ++ shift_bytes = entry_size + EXT4_XATTR_SIZE(size); + /* Adjust the offsets and shift the remaining entries ahead */ + ext4_xattr_shift_entries(entry, -shift_bytes, + (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + +-- +2.9.3 + +From e3014d14a81edde488d9a6758eea8afc41752d2d Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 29 Aug 2016 15:38:11 -0400 +Subject: [PATCH] ext4: fixup free space calculations when expanding inodes + +Conditions checking whether there is enough free space in an xattr block +and when xattr is large enough to make enough space in the inode forgot +to account for the fact that inode need not be completely filled up with +xattrs. Thus we could move unnecessarily many xattrs out of inode or +even falsely claim there is not enough space to expand the inode. We +also forgot to update the amount of free space in xattr block when moving +more xattrs and thus could decide to move too big xattr resulting in +unexpected failure. + +Fix these problems by properly updating free space in the inode and +xattr block as we move xattrs. To simplify the math, avoid shifting +xattrs after removing each one xattr and instead just shift xattrs only +once there is enough free space in the inode. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 58 ++++++++++++++++++++++++--------------------------------- + 1 file changed, 24 insertions(+), 34 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 2eb935c..22d2ebc 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1350,7 +1350,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_xattr_ibody_find *is = NULL; + struct ext4_xattr_block_find *bs = NULL; + char *buffer = NULL, *b_entry_name = NULL; +- size_t min_offs, free; ++ size_t min_offs; ++ size_t ifree, bfree; + int total_ino; + void *base, *start, *end; + int error = 0, tried_min_extra_isize = 0; +@@ -1385,17 +1386,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + if (error) + goto cleanup; + +- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); +- if (free >= isize_diff) { +- entry = IFIRST(header); +- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize +- - new_extra_isize, (void *)raw_inode + +- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, +- (void *)header, total_ino, +- inode->i_sb->s_blocksize); +- EXT4_I(inode)->i_extra_isize = new_extra_isize; +- goto out; +- } ++ ifree = ext4_xattr_free_space(last, &min_offs, base, &total_ino); ++ if (ifree >= isize_diff) ++ goto shift; + + /* + * Enough free space isn't available in the inode, check if +@@ -1416,8 +1409,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + first = BFIRST(bh); + end = bh->b_data + bh->b_size; + min_offs = end - base; +- free = ext4_xattr_free_space(first, &min_offs, base, NULL); +- if (free < isize_diff) { ++ bfree = ext4_xattr_free_space(first, &min_offs, base, NULL); ++ if (bfree + ifree < isize_diff) { + if (!tried_min_extra_isize && s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; +@@ -1428,10 +1421,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + goto cleanup; + } + } else { +- free = inode->i_sb->s_blocksize; ++ bfree = inode->i_sb->s_blocksize; + } + +- while (isize_diff > 0) { ++ while (isize_diff > ifree) { + size_t offs, size, entry_size; + struct ext4_xattr_entry *small_entry = NULL; + struct ext4_xattr_info i = { +@@ -1439,7 +1432,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + .value_len = 0, + }; + unsigned int total_size; /* EA entry size + value size */ +- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ + unsigned int min_total_size = ~0U; + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); +@@ -1461,8 +1453,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + total_size = + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); +- if (total_size <= free && total_size < min_total_size) { +- if (total_size < isize_diff) { ++ if (total_size <= bfree && ++ total_size < min_total_size) { ++ if (total_size + ifree < isize_diff) { + small_entry = last; + } else { + entry = last; +@@ -1491,6 +1484,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + offs = le16_to_cpu(entry->e_value_offs); + size = le32_to_cpu(entry->e_value_size); + entry_size = EXT4_XATTR_LEN(entry->e_name_len); ++ total_size = entry_size + EXT4_XATTR_SIZE(size); + i.name_index = entry->e_name_index, + buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); +@@ -1518,21 +1512,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + if (error) + goto cleanup; + total_ino -= entry_size; +- +- entry = IFIRST(header); +- if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) +- shift_bytes = isize_diff; +- else +- shift_bytes = entry_size + EXT4_XATTR_SIZE(size); +- /* Adjust the offsets and shift the remaining entries ahead */ +- ext4_xattr_shift_entries(entry, -shift_bytes, +- (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + +- EXT4_I(inode)->i_extra_isize + shift_bytes, +- (void *)header, total_ino, inode->i_sb->s_blocksize); +- +- isize_diff -= shift_bytes; +- EXT4_I(inode)->i_extra_isize += shift_bytes; +- header = IHDR(inode, raw_inode); ++ ifree += total_size; ++ bfree -= total_size; + + i.name = b_entry_name; + i.value = buffer; +@@ -1553,6 +1534,15 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + kfree(is); + kfree(bs); + } ++ ++shift: ++ /* Adjust the offsets and shift the remaining entries ahead */ ++ entry = IFIRST(header); ++ ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize ++ - new_extra_isize, (void *)raw_inode + ++ EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, ++ (void *)header, total_ino, inode->i_sb->s_blocksize); ++ EXT4_I(inode)->i_extra_isize = new_extra_isize; + brelse(bh); + out: + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); +-- +2.9.3 + +From 94405713889d4a9d341b4ad92956e4e2ec8ec2c2 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 29 Aug 2016 15:41:11 -0400 +Subject: [PATCH] ext4: replace bogus assertion in ext4_xattr_shift_entries() + +We were checking whether computed offsets do not exceed end of block in +ext4_xattr_shift_entries(). However this does not make sense since we +always only decrease offsets. So replace that assertion with a check +whether we really decrease xattrs value offsets. + +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/xattr.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 1447860..82b025c 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1319,18 +1319,19 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, + */ + static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, + int value_offs_shift, void *to, +- void *from, size_t n, int blocksize) ++ void *from, size_t n) + { + struct ext4_xattr_entry *last = entry; + int new_offs; + ++ /* We always shift xattr headers further thus offsets get lower */ ++ BUG_ON(value_offs_shift > 0); ++ + /* Adjust the value offsets of the entries */ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + if (!last->e_value_inum && last->e_value_size) { + new_offs = le16_to_cpu(last->e_value_offs) + + value_offs_shift; +- BUG_ON(new_offs + le32_to_cpu(last->e_value_size) +- > blocksize); + last->e_value_offs = cpu_to_le16(new_offs); + } + } +@@ -1542,7 +1543,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, +- (void *)header, total_ino, inode->i_sb->s_blocksize); ++ (void *)header, total_ino); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + brelse(bh); + out: +-- +2.9.3 + diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch new file mode 100644 index 0000000..5f46cb9 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch @@ -0,0 +1,475 @@ +From a521100231f816f8cdd9c8e77da14ff1e42c2b17 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 4 Sep 2014 18:06:25 -0400 +Subject: [PATCH] ext4: pass allocation_request struct to + ext4_(alloc,splice)_branch + +Instead of initializing the allocation_request structure in +ext4_alloc_branch(), set it up in ext4_ind_map_blocks(), and then pass +it to ext4_alloc_branch() and ext4_splice_branch(). + +This allows ext4_ind_map_blocks to pass flags in the allocation +request structure without having to add Yet Another argument to +ext4_alloc_branch(). + +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +--- + fs/ext4/indirect.c | 82 +++++++++++++++++++++++++----------------------------- + 1 file changed, 38 insertions(+), 44 deletions(-) + +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index e75f840..69af0cd 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -318,34 +318,22 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, + * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain + * as described above and return 0. + */ +-static int ext4_alloc_branch(handle_t *handle, struct inode *inode, +- ext4_lblk_t iblock, int indirect_blks, +- int *blks, ext4_fsblk_t goal, +- ext4_lblk_t *offsets, Indirect *branch) ++static int ext4_alloc_branch(handle_t *handle, ++ struct ext4_allocation_request *ar, ++ int indirect_blks, ext4_lblk_t *offsets, ++ Indirect *branch) + { +- struct ext4_allocation_request ar; + struct buffer_head * bh; + ext4_fsblk_t b, new_blocks[4]; + __le32 *p; + int i, j, err, len = 1; + +- /* +- * Set up for the direct block allocation +- */ +- memset(&ar, 0, sizeof(ar)); +- ar.inode = inode; +- ar.len = *blks; +- ar.logical = iblock; +- if (S_ISREG(inode->i_mode)) +- ar.flags = EXT4_MB_HINT_DATA; +- + for (i = 0; i <= indirect_blks; i++) { + if (i == indirect_blks) { +- ar.goal = goal; +- new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); ++ new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); + } else +- goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, +- goal, 0, NULL, &err); ++ ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, ++ ar->inode, ar->goal, 0, NULL, &err); + if (err) { + i--; + goto failed; +@@ -354,7 +342,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + if (i == 0) + continue; + +- bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); ++ bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); + if (unlikely(!bh)) { + err = -ENOMEM; + goto failed; +@@ -372,7 +360,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + b = new_blocks[i]; + + if (i == indirect_blks) +- len = ar.len; ++ len = ar->len; + for (j = 0; j < len; j++) + *p++ = cpu_to_le32(b++); + +@@ -381,11 +369,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + unlock_buffer(bh); + + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); +- err = ext4_handle_dirty_metadata(handle, inode, bh); ++ err = ext4_handle_dirty_metadata(handle, ar->inode, bh); + if (err) + goto failed; + } +- *blks = ar.len; + return 0; + failed: + for (; i >= 0; i--) { +@@ -396,10 +383,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + * existing before ext4_alloc_branch() was called. + */ + if (i > 0 && i != indirect_blks && branch[i].bh) +- ext4_forget(handle, 1, inode, branch[i].bh, ++ ext4_forget(handle, 1, ar->inode, branch[i].bh, + branch[i].bh->b_blocknr); +- ext4_free_blocks(handle, inode, NULL, new_blocks[i], +- (i == indirect_blks) ? ar.len : 1, 0); ++ ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], ++ (i == indirect_blks) ? ar->len : 1, 0); + } + return err; + } +@@ -419,9 +406,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. + */ +-static int ext4_splice_branch(handle_t *handle, struct inode *inode, +- ext4_lblk_t block, Indirect *where, int num, +- int blks) ++static int ext4_splice_branch(handle_t *handle, ++ struct ext4_allocation_request *ar, ++ Indirect *where, int num) + { + int i; + int err = 0; +@@ -446,9 +433,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ +- if (num == 0 && blks > 1) { ++ if (num == 0 && ar->len > 1) { + current_block = le32_to_cpu(where->key) + 1; +- for (i = 1; i < blks; i++) ++ for (i = 1; i < ar->len; i++) + *(where->p + i) = cpu_to_le32(current_block++); + } + +@@ -465,14 +452,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + */ + jbd_debug(5, "splicing indirect only\n"); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); +- err = ext4_handle_dirty_metadata(handle, inode, where->bh); ++ err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); + if (err) + goto err_out; + } else { + /* + * OK, we spliced it into the inode itself on a direct block. + */ +- ext4_mark_inode_dirty(handle, inode); ++ ext4_mark_inode_dirty(handle, ar->inode); + jbd_debug(5, "splicing direct\n"); + } + return err; +@@ -484,11 +471,11 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. + */ +- ext4_free_blocks(handle, inode, where[i].bh, 0, 1, ++ ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); + } +- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), +- blks, 0); ++ ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), ++ ar->len, 0); + + return err; + } +@@ -525,11 +512,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + int flags) + { ++ struct ext4_allocation_request ar; + int err = -EIO; + ext4_lblk_t offsets[4]; + Indirect chain[4]; + Indirect *partial; +- ext4_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; +@@ -579,7 +566,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + return -ENOSPC; + } + +- goal = ext4_find_goal(inode, map->m_lblk, partial); ++ /* Set up for the direct block allocation */ ++ memset(&ar, 0, sizeof(ar)); ++ ar.inode = inode; ++ ar.logical = map->m_lblk; ++ if (S_ISREG(inode->i_mode)) ++ ar.flags = EXT4_MB_HINT_DATA; ++ ++ ar.goal = ext4_find_goal(inode, map->m_lblk, partial); + + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; +@@ -588,13 +582,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ +- count = ext4_blks_to_allocate(partial, indirect_blks, +- map->m_len, blocks_to_boundary); ++ ar.len = ext4_blks_to_allocate(partial, indirect_blks, ++ map->m_len, blocks_to_boundary); ++ + /* + * Block out ext4_truncate while we alter the tree + */ +- err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, +- &count, goal, ++ err = ext4_alloc_branch(handle, &ar, indirect_blks, + offsets + (partial - chain), partial); + + /* +@@ -605,14 +599,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + * may need to return -EAGAIN upwards in the worst case. --sct + */ + if (!err) +- err = ext4_splice_branch(handle, inode, map->m_lblk, +- partial, indirect_blks, count); ++ err = ext4_splice_branch(handle, &ar, partial, indirect_blks); + if (err) + goto cleanup; + + map->m_flags |= EXT4_MAP_NEW; + + ext4_update_inode_fsync_trans(handle, inode, 1); ++ count = ar.len; + got_it: + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = le32_to_cpu(chain[depth-1].key); +-- +2.7.4 + +From e3cf5d5d9a86df1c5e413bdd3725c25a16ff854c Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 4 Sep 2014 18:07:25 -0400 +Subject: [PATCH] ext4: prepare to drop EXT4_STATE_DELALLOC_RESERVED + +The EXT4_STATE_DELALLOC_RESERVED flag was originally implemented +because it was too hard to make sure the mballoc and get_block flags +could be reliably passed down through all of the codepaths that end up +calling ext4_mb_new_blocks(). + +Since then, we have mb_flags passed down through most of the code +paths, so getting rid of EXT4_STATE_DELALLOC_RESERVED isn't as tricky +as it used to. + +This commit plumbs in the last of what is required, and then adds a +WARN_ON check to make sure we haven't missed anything. If this passes +a full regression test run, we can then drop +EXT4_STATE_DELALLOC_RESERVED. + +Signed-off-by: Theodore Ts'o +Reviewed-by: Jan Kara +--- + fs/ext4/balloc.c | 3 +-- + fs/ext4/extents.c | 6 +++++- + fs/ext4/indirect.c | 6 +++++- + fs/ext4/mballoc.c | 10 ++++++---- + fs/ext4/xattr.c | 6 ------ + 5 files changed, 17 insertions(+), 14 deletions(-) + +diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c +index 581ef40..d70f154 100644 +--- a/fs/ext4/balloc.c ++++ b/fs/ext4/balloc.c +@@ -636,8 +636,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + * Account for the allocated meta blocks. We will never + * fail EDQUOT for metdata, but we do account for it. + */ +- if (!(*errp) && +- ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { ++ if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) { + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + dquot_alloc_block_nofail(inode, +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 3ac1686..8170b32 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1933,6 +1933,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + ext4_lblk_t next; + int mb_flags = 0, unwritten; + ++ if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ mb_flags |= EXT4_MB_DELALLOC_RESERVED; + if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { + EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); + return -EIO; +@@ -2054,7 +2056,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + * We're gonna add a new leaf in the tree. + */ + if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) +- mb_flags = EXT4_MB_USE_RESERVED; ++ mb_flags |= EXT4_MB_USE_RESERVED; + err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, + ppath, newext); + if (err) +@@ -4438,6 +4440,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + ar.flags = 0; + if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) + ar.flags |= EXT4_MB_HINT_NOPREALLOC; ++ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ ar.flags |= EXT4_MB_DELALLOC_RESERVED; + newblock = ext4_mb_new_blocks(handle, &ar, &err); + if (!newblock) + goto out2; +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index 69af0cd..36b3696 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -333,7 +333,9 @@ static int ext4_alloc_branch(handle_t *handle, + new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); + } else + ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, +- ar->inode, ar->goal, 0, NULL, &err); ++ ar->inode, ar->goal, ++ ar->flags & EXT4_MB_DELALLOC_RESERVED, ++ NULL, &err); + if (err) { + i--; + goto failed; +@@ -572,6 +574,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, + ar.logical = map->m_lblk; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT4_MB_HINT_DATA; ++ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ++ ar.flags |= EXT4_MB_DELALLOC_RESERVED; + + ar.goal = ext4_find_goal(inode, map->m_lblk, partial); + +diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c +index 8b0f9ef..15dffda 100644 +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4415,9 +4415,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + * EDQUOT check, as blocks and quotas have been already + * reserved when data being copied into pagecache. + */ +- if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) ++ if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) { ++ WARN_ON((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0); + ar->flags |= EXT4_MB_DELALLOC_RESERVED; +- else { ++ } ++ ++ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) { + /* Without delayed allocation we need to verify + * there is enough free blocks to do block allocation + * and verify allocation doesn't exceed the quota limits. +@@ -4528,8 +4531,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, + if (inquota && ar->len < inquota) + dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); + if (!ar->len) { +- if (!ext4_test_inode_state(ar->inode, +- EXT4_STATE_DELALLOC_RESERVED)) ++ if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) + /* release all the reserved blocks if non delalloc */ + percpu_counter_sub(&sbi->s_dirtyclusters_counter, + reserv_clstrs); +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index e738733..da4df70 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -899,14 +899,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; + +- /* +- * take i_data_sem because we will test +- * i_delalloc_reserved_flag in ext4_mb_new_blocks +- */ +- down_read(&EXT4_I(inode)->i_data_sem); + block = ext4_new_meta_blocks(handle, inode, goal, 0, + NULL, &error); +- up_read((&EXT4_I(inode)->i_data_sem)); + if (error) + goto cleanup; + +-- +2.7.4 + +From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 11 Aug 2016 12:38:55 -0400 +Subject: [PATCH] ext4: avoid deadlock when expanding inode size + +When we need to move xattrs into external xattr block, we call +ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end +up calling ext4_mark_inode_dirty() again which will recurse back into +the inode expansion code leading to deadlocks. + +Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move +its management into ext4_expand_extra_isize_ea() since its manipulation +is safe there (due to xattr_sem) from possible races with +ext4_xattr_set_handle() which plays with it as well. + +CC: stable@vger.kernel.org # 4.4.x +Signed-off-by: Jan Kara +Signed-off-by: Theodore Ts'o +--- + fs/ext4/inode.c | 2 -- + fs/ext4/xattr.c | 19 +++++++++++++------ + 2 files changed, 13 insertions(+), 8 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 5a6277d..13c95b2 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) + sbi->s_want_extra_isize, + iloc, handle); + if (ret) { +- ext4_set_inode_state(inode, +- EXT4_STATE_NO_EXPAND); + if (mnt_count != + le16_to_cpu(sbi->s_es->s_mnt_count)) { + ext4_warning(inode->i_sb, +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index c893f00..2eb935c 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1358,11 +1358,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + int isize_diff; /* How much do we need to grow i_extra_isize */ + + down_write(&EXT4_I(inode)->xattr_sem); ++ /* ++ * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty ++ */ ++ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); + retry: +- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { +- up_write(&EXT4_I(inode)->xattr_sem); +- return 0; +- } ++ if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) ++ goto out; + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); +@@ -1392,8 +1394,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + (void *)header, total_ino, + inode->i_sb->s_blocksize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; +- error = 0; +- goto cleanup; ++ goto out; + } + + /* +@@ -1553,6 +1554,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + kfree(bs); + } + brelse(bh); ++out: ++ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + +@@ -1564,6 +1567,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + kfree(is); + kfree(bs); + brelse(bh); ++ /* ++ * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode ++ * size expansion failed. ++ */ + up_write(&EXT4_I(inode)->xattr_sem); + return error; + } +-- +2.7.4 + diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series index c83b3a0..33e7a35 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series @@ -29,3 +29,4 @@ rhel7/ext4-projid-ignore-maxquotas.patch rhel7/ext4-projid-feature-support.patch rhel7/ext4-projid-quotas.patch rhel7/ext4-projid-xfs-ioctls.patch +rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series index 3103ded..82d0b1d 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series @@ -27,3 +27,4 @@ rhel7/ext4-projid-ignore-maxquotas.patch rhel7/ext4-projid-feature-support.patch rhel7/ext4-projid-quotas.patch rhel7/ext4-projid-xfs-ioctls.patch +rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series index 86476d2..c91c1e9 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series @@ -21,7 +21,9 @@ rhel7/ext4-give-warning-with-dir-htree-growing.patch rhel7/ext4-mmp-brelse.patch rhel7/ext4-jcb-optimization.patch rhel7/ext4_s_max_ext_tree_depth.patch +rhel7/ext4-remove-i_data_sem-from-xattr.patch rhel7/ext4-projid-ignore-maxquotas.patch rhel7/ext4-projid-feature-support.patch rhel7/ext4-projid-quotas.patch rhel7/ext4-projid-xfs-ioctls.patch +rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch -- 1.8.3.1