Whamcloud - gitweb
LU-9384 ldiskfs: port upstream patches for changing extra isize 45/27045/4
authorYang Sheng <yang.sheng@intel.com>
Wed, 10 May 2017 17:24:06 +0000 (01:24 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 20 May 2017 18:44:07 +0000 (18:44 +0000)
Port upstream 5 patches for changing extra isize as below:

commit d0141191a20289f8955c1e03dad08e42e6f71ca9
"ext4: fix xattr shifting when expanding inodes"
commit 418c12d08dc64a45107c467ec1ba29b5e69b0715
"ext4: fix xattr shifting when expanding inodes part 2"
commit 443a8c41cd49de66a3fda45b32b9860ea0292b84
"ext4: properly align shifted xattrs when expanding inodes"
commit e3014d14a81edde488d9a6758eea8afc41752d2d
"ext4: fixup free space calculations when expanding inodes"
commit 94405713889d4a9d341b4ad92956e4e2ec8ec2c2
"ext4: replace bogus assertion in ext4_xattr_shift_entries()"

Signed-off-by: Yang Sheng <yang.sheng@intel.com>
Change-Id: I01414bcc91d8f57ca72281916d35536d3926e570
Reviewed-on: https://review.whamcloud.com/27045
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series

diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch
new file mode 100644 (file)
index 0000000..a361f1c
--- /dev/null
@@ -0,0 +1,409 @@
+From d0141191a20289f8955c1e03dad08e42e6f71ca9 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 11 Aug 2016 11:50:30 -0400
+Subject: [PATCH] ext4: fix xattr shifting when expanding inodes
+
+The code in ext4_expand_extra_isize_ea() treated new_extra_isize
+argument sometimes as the desired target i_extra_isize and sometimes as
+the amount by which we need to grow current i_extra_isize. These happen
+to coincide when i_extra_isize is 0 which used to be the common case and
+so nobody noticed this until recently when we added i_projid to the
+inode and so i_extra_isize now needs to grow from 28 to 32 bytes.
+
+The result of these bugs was that we sometimes unnecessarily decided to
+move xattrs out of inode even if there was enough space and we often
+ended up corrupting in-inode xattrs because arguments to
+ext4_xattr_shift_entries() were just wrong. This could demonstrate
+itself as BUG_ON in ext4_xattr_shift_entries() triggering.
+
+Fix the problem by introducing new isize_diff variable and use it where
+appropriate.
+
+CC: stable@vger.kernel.org   # 4.4.x
+Reported-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/xattr.c | 27 ++++++++++++++-------------
+ 1 file changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 39e9cfb..cb1d7b4 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1353,15 +1353,17 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       size_t min_offs, free;
+       int total_ino;
+       void *base, *start, *end;
+-      int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
++      int error = 0, tried_min_extra_isize = 0;
+       int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
++      int isize_diff; /* How much do we need to grow i_extra_isize */
+       down_write(&EXT4_I(inode)->xattr_sem);
+       /*
+        * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+        */
+       ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ retry:
++      isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+       if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+               goto out;
+
+@@ -1382,7 +1384,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               goto cleanup;
+       free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
+-      if (free >= new_extra_isize) {
++      if (free >= isize_diff) {
+               entry = IFIRST(header);
+               ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
+                               - new_extra_isize, (void *)raw_inode +
+@@ -1414,7 +1416,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               end = bh->b_data + bh->b_size;
+               min_offs = end - base;
+               free = ext4_xattr_free_space(first, &min_offs, base, NULL);
+-              if (free < new_extra_isize) {
++              if (free < isize_diff) {
+                       if (!tried_min_extra_isize && s_min_extra_isize) {
+                               tried_min_extra_isize++;
+                               new_extra_isize = s_min_extra_isize;
+@@ -1428,7 +1430,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               free = inode->i_sb->s_blocksize;
+       }
+-      while (new_extra_isize > 0) {
++      while (isize_diff > 0) {
+               size_t offs, size, entry_size;
+               struct ext4_xattr_entry *small_entry = NULL;
+               struct ext4_xattr_info i = {
+@@ -1459,7 +1461,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                       EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
+                                       EXT4_XATTR_LEN(last->e_name_len);
+                       if (total_size <= free && total_size < min_total_size) {
+-                              if (total_size < new_extra_isize) {
++                              if (total_size < isize_diff) {
+                                       small_entry = last;
+                               } else {
+                                       entry = last;
+@@ -1516,20 +1518,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                       goto cleanup;
+               entry = IFIRST(header);
+-              if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
+-                      shift_bytes = new_extra_isize;
++              if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
++                      shift_bytes = isize_diff;
+               else
+                       shift_bytes = entry_size + size;
+               /* Adjust the offsets and shift the remaining entries ahead */
+-              ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
+-                      shift_bytes, (void *)raw_inode +
+-                      EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
++              ext4_xattr_shift_entries(entry, -shift_bytes,
++                      (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
++                      EXT4_I(inode)->i_extra_isize + shift_bytes,
+                       (void *)header, total_ino - entry_size,
+                       inode->i_sb->s_blocksize);
+-              extra_isize += shift_bytes;
+-              new_extra_isize -= shift_bytes;
+-              EXT4_I(inode)->i_extra_isize = extra_isize;
++              isize_diff -= shift_bytes;
++              EXT4_I(inode)->i_extra_isize += shift_bytes;
+               i.name = b_entry_name;
+               i.value = buffer;
+-- 
+2.9.3
+
+From 418c12d08dc64a45107c467ec1ba29b5e69b0715 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 11 Aug 2016 11:58:32 -0400
+Subject: [PATCH] ext4: fix xattr shifting when expanding inodes part 2
+
+When multiple xattrs need to be moved out of inode, we did not properly
+recompute total size of xattr headers in the inode and the new header
+position. Thus when moving the second and further xattr we asked
+ext4_xattr_shift_entries() to move too much and from the wrong place,
+resulting in possible xattr value corruption or general memory
+corruption.
+
+CC: stable@vger.kernel.org  # 4.4.x
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/xattr.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index cb1d7b4..b18b1ff 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1516,6 +1516,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               error = ext4_xattr_ibody_set(handle, inode, &i, is);
+               if (error)
+                       goto cleanup;
++              total_ino -= entry_size;
+               entry = IFIRST(header);
+               if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+@@ -1526,11 +1527,11 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               ext4_xattr_shift_entries(entry, -shift_bytes,
+                       (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+                       EXT4_I(inode)->i_extra_isize + shift_bytes,
+-                      (void *)header, total_ino - entry_size,
+-                      inode->i_sb->s_blocksize);
++                      (void *)header, total_ino, inode->i_sb->s_blocksize);
+               isize_diff -= shift_bytes;
+               EXT4_I(inode)->i_extra_isize += shift_bytes;
++              header = IHDR(inode, raw_inode);
+               i.name = b_entry_name;
+               i.value = buffer;
+-- 
+2.9.3
+
+From 443a8c41cd49de66a3fda45b32b9860ea0292b84 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 11 Aug 2016 12:00:01 -0400
+Subject: [PATCH] ext4: properly align shifted xattrs when expanding inodes
+
+We did not count with the padding of xattr value when computing desired
+shift of xattrs in the inode when expanding i_extra_isize. As a result
+we could create unaligned start of inline xattrs. Account for alignment
+properly.
+
+CC: stable@vger.kernel.org  # 4.4.x-
+Signed-off-by: Jan Kara <jack@suse.cz>
+---
+ fs/ext4/xattr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index b18b1ff..c893f00 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1522,7 +1522,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+                       shift_bytes = isize_diff;
+               else
+-                      shift_bytes = entry_size + size;
++                      shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
+               /* Adjust the offsets and shift the remaining entries ahead */
+               ext4_xattr_shift_entries(entry, -shift_bytes,
+                       (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+-- 
+2.9.3
+
+From e3014d14a81edde488d9a6758eea8afc41752d2d Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 29 Aug 2016 15:38:11 -0400
+Subject: [PATCH] ext4: fixup free space calculations when expanding inodes
+
+Conditions checking whether there is enough free space in an xattr block
+and when xattr is large enough to make enough space in the inode forgot
+to account for the fact that inode need not be completely filled up with
+xattrs. Thus we could move unnecessarily many xattrs out of inode or
+even falsely claim there is not enough space to expand the inode. We
+also forgot to update the amount of free space in xattr block when moving
+more xattrs and thus could decide to move too big xattr resulting in
+unexpected failure.
+
+Fix these problems by properly updating free space in the inode and
+xattr block as we move xattrs. To simplify the math, avoid shifting
+xattrs after removing each one xattr and instead just shift xattrs only
+once there is enough free space in the inode.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/xattr.c | 58 ++++++++++++++++++++++++---------------------------------
+ 1 file changed, 24 insertions(+), 34 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 2eb935c..22d2ebc 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1350,7 +1350,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       struct ext4_xattr_ibody_find *is = NULL;
+       struct ext4_xattr_block_find *bs = NULL;
+       char *buffer = NULL, *b_entry_name = NULL;
+-      size_t min_offs, free;
++      size_t min_offs;
++      size_t ifree, bfree;
+       int total_ino;
+       void *base, *start, *end;
+       int error = 0, tried_min_extra_isize = 0;
+@@ -1385,17 +1386,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       if (error)
+               goto cleanup;
+-      free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
+-      if (free >= isize_diff) {
+-              entry = IFIRST(header);
+-              ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
+-                              - new_extra_isize, (void *)raw_inode +
+-                              EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
+-                              (void *)header, total_ino,
+-                              inode->i_sb->s_blocksize);
+-              EXT4_I(inode)->i_extra_isize = new_extra_isize;
+-              goto out;
+-      }
++      ifree = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
++      if (ifree >= isize_diff)
++              goto shift;
+       /*
+        * Enough free space isn't available in the inode, check if
+@@ -1416,8 +1409,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               first = BFIRST(bh);
+               end = bh->b_data + bh->b_size;
+               min_offs = end - base;
+-              free = ext4_xattr_free_space(first, &min_offs, base, NULL);
+-              if (free < isize_diff) {
++              bfree = ext4_xattr_free_space(first, &min_offs, base, NULL);
++              if (bfree + ifree < isize_diff) {
+                       if (!tried_min_extra_isize && s_min_extra_isize) {
+                               tried_min_extra_isize++;
+                               new_extra_isize = s_min_extra_isize;
+@@ -1428,10 +1421,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                       goto cleanup;
+               }
+       } else {
+-              free = inode->i_sb->s_blocksize;
++              bfree = inode->i_sb->s_blocksize;
+       }
+-      while (isize_diff > 0) {
++      while (isize_diff > ifree) {
+               size_t offs, size, entry_size;
+               struct ext4_xattr_entry *small_entry = NULL;
+               struct ext4_xattr_info i = {
+@@ -1439,7 +1432,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                       .value_len = 0,
+               };
+               unsigned int total_size;  /* EA entry size + value size */
+-              unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
+               unsigned int min_total_size = ~0U;
+               is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
+@@ -1461,8 +1453,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                       total_size =
+                       EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
+                                       EXT4_XATTR_LEN(last->e_name_len);
+-                      if (total_size <= free && total_size < min_total_size) {
+-                              if (total_size < isize_diff) {
++                      if (total_size <= bfree &&
++                          total_size < min_total_size) {
++                              if (total_size + ifree < isize_diff) {
+                                       small_entry = last;
+                               } else {
+                                       entry = last;
+@@ -1491,6 +1484,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               offs = le16_to_cpu(entry->e_value_offs);
+               size = le32_to_cpu(entry->e_value_size);
+               entry_size = EXT4_XATTR_LEN(entry->e_name_len);
++              total_size = entry_size + EXT4_XATTR_SIZE(size);
+               i.name_index = entry->e_name_index,
+               buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
+               b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
+@@ -1518,21 +1512,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               if (error)
+                       goto cleanup;
+               total_ino -= entry_size;
+-
+-              entry = IFIRST(header);
+-              if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+-                      shift_bytes = isize_diff;
+-              else
+-                      shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
+-              /* Adjust the offsets and shift the remaining entries ahead */
+-              ext4_xattr_shift_entries(entry, -shift_bytes,
+-                      (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+-                      EXT4_I(inode)->i_extra_isize + shift_bytes,
+-                      (void *)header, total_ino, inode->i_sb->s_blocksize);
+-
+-              isize_diff -= shift_bytes;
+-              EXT4_I(inode)->i_extra_isize += shift_bytes;
+-              header = IHDR(inode, raw_inode);
++              ifree += total_size;
++              bfree -= total_size;
+               i.name = b_entry_name;
+               i.value = buffer;
+@@ -1553,6 +1534,15 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               kfree(is);
+               kfree(bs);
+       }
++
++shift:
++      /* Adjust the offsets and shift the remaining entries ahead */
++      entry = IFIRST(header);
++      ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
++                      - new_extra_isize, (void *)raw_inode +
++                      EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
++                      (void *)header, total_ino, inode->i_sb->s_blocksize);
++      EXT4_I(inode)->i_extra_isize = new_extra_isize;
+       brelse(bh);
+ out:
+       ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+-- 
+2.9.3
+
+From 94405713889d4a9d341b4ad92956e4e2ec8ec2c2 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 29 Aug 2016 15:41:11 -0400
+Subject: [PATCH] ext4: replace bogus assertion in ext4_xattr_shift_entries()
+
+We were checking whether computed offsets do not exceed end of block in
+ext4_xattr_shift_entries(). However this does not make sense since we
+always only decrease offsets. So replace that assertion with a check
+whether we really decrease xattrs value offsets.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/xattr.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 1447860..82b025c 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1319,18 +1319,19 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
+  */
+ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
+                                    int value_offs_shift, void *to,
+-                                   void *from, size_t n, int blocksize)
++                                   void *from, size_t n)
+ {
+       struct ext4_xattr_entry *last = entry;
+       int new_offs;
++      /* We always shift xattr headers further thus offsets get lower */
++      BUG_ON(value_offs_shift > 0);
++
+       /* Adjust the value offsets of the entries */
+       for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+               if (!last->e_value_inum && last->e_value_size) {
+                       new_offs = le16_to_cpu(last->e_value_offs) +
+                                                       value_offs_shift;
+-                      BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
+-                               > blocksize);
+                       last->e_value_offs = cpu_to_le16(new_offs);
+               }
+       }
+@@ -1542,7 +1543,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
+                       - new_extra_isize, (void *)raw_inode +
+                       EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
+-                      (void *)header, total_ino, inode->i_sb->s_blocksize);
++                      (void *)header, total_ino);
+       EXT4_I(inode)->i_extra_isize = new_extra_isize;
+       brelse(bh);
+ out:
+-- 
+2.9.3
+
diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-remove-i_data_sem-from-xattr.patch
new file mode 100644 (file)
index 0000000..5f46cb9
--- /dev/null
@@ -0,0 +1,475 @@
+From a521100231f816f8cdd9c8e77da14ff1e42c2b17 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 4 Sep 2014 18:06:25 -0400
+Subject: [PATCH] ext4: pass allocation_request struct to
+ ext4_(alloc,splice)_branch
+
+Instead of initializing the allocation_request structure in
+ext4_alloc_branch(), set it up in ext4_ind_map_blocks(), and then pass
+it to ext4_alloc_branch() and ext4_splice_branch().
+
+This allows ext4_ind_map_blocks to pass flags in the allocation
+request structure without having to add Yet Another argument to
+ext4_alloc_branch().
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+---
+ fs/ext4/indirect.c | 82 +++++++++++++++++++++++++-----------------------------
+ 1 file changed, 38 insertions(+), 44 deletions(-)
+
+diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
+index e75f840..69af0cd 100644
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -318,34 +318,22 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
+  *    ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+  *    as described above and return 0.
+  */
+-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+-                           ext4_lblk_t iblock, int indirect_blks,
+-                           int *blks, ext4_fsblk_t goal,
+-                           ext4_lblk_t *offsets, Indirect *branch)
++static int ext4_alloc_branch(handle_t *handle,
++                           struct ext4_allocation_request *ar,
++                           int indirect_blks, ext4_lblk_t *offsets,
++                           Indirect *branch)
+ {
+-      struct ext4_allocation_request  ar;
+       struct buffer_head *            bh;
+       ext4_fsblk_t                    b, new_blocks[4];
+       __le32                          *p;
+       int                             i, j, err, len = 1;
+-      /*
+-       * Set up for the direct block allocation
+-       */
+-      memset(&ar, 0, sizeof(ar));
+-      ar.inode = inode;
+-      ar.len = *blks;
+-      ar.logical = iblock;
+-      if (S_ISREG(inode->i_mode))
+-              ar.flags = EXT4_MB_HINT_DATA;
+-
+       for (i = 0; i <= indirect_blks; i++) {
+               if (i == indirect_blks) {
+-                      ar.goal = goal;
+-                      new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
++                      new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
+               } else
+-                      goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
+-                                                      goal, 0, NULL, &err);
++                      ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
++                                  ar->inode, ar->goal, 0, NULL, &err);
+               if (err) {
+                       i--;
+                       goto failed;
+@@ -354,7 +342,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+               if (i == 0)
+                       continue;
+-              bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
++              bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
+               if (unlikely(!bh)) {
+                       err = -ENOMEM;
+                       goto failed;
+@@ -372,7 +360,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+               b = new_blocks[i];
+               if (i == indirect_blks)
+-                      len = ar.len;
++                      len = ar->len;
+               for (j = 0; j < len; j++)
+                       *p++ = cpu_to_le32(b++);
+@@ -381,11 +369,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+               unlock_buffer(bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+-              err = ext4_handle_dirty_metadata(handle, inode, bh);
++              err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
+               if (err)
+                       goto failed;
+       }
+-      *blks = ar.len;
+       return 0;
+ failed:
+       for (; i >= 0; i--) {
+@@ -396,10 +383,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+                * existing before ext4_alloc_branch() was called.
+                */
+               if (i > 0 && i != indirect_blks && branch[i].bh)
+-                      ext4_forget(handle, 1, inode, branch[i].bh,
++                      ext4_forget(handle, 1, ar->inode, branch[i].bh,
+                                   branch[i].bh->b_blocknr);
+-              ext4_free_blocks(handle, inode, NULL, new_blocks[i],
+-                               (i == indirect_blks) ? ar.len : 1, 0);
++              ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
++                               (i == indirect_blks) ? ar->len : 1, 0);
+       }
+       return err;
+ }
+@@ -419,9 +406,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+  * inode (->i_blocks, etc.). In case of success we end up with the full
+  * chain to new block and return 0.
+  */
+-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
+-                            ext4_lblk_t block, Indirect *where, int num,
+-                            int blks)
++static int ext4_splice_branch(handle_t *handle,
++                            struct ext4_allocation_request *ar,
++                            Indirect *where, int num)
+ {
+       int i;
+       int err = 0;
+@@ -446,9 +433,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
+        * Update the host buffer_head or inode to point to more just allocated
+        * direct blocks blocks
+        */
+-      if (num == 0 && blks > 1) {
++      if (num == 0 && ar->len > 1) {
+               current_block = le32_to_cpu(where->key) + 1;
+-              for (i = 1; i < blks; i++)
++              for (i = 1; i < ar->len; i++)
+                       *(where->p + i) = cpu_to_le32(current_block++);
+       }
+@@ -465,14 +452,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
+                */
+               jbd_debug(5, "splicing indirect only\n");
+               BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+-              err = ext4_handle_dirty_metadata(handle, inode, where->bh);
++              err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
+               if (err)
+                       goto err_out;
+       } else {
+               /*
+                * OK, we spliced it into the inode itself on a direct block.
+                */
+-              ext4_mark_inode_dirty(handle, inode);
++              ext4_mark_inode_dirty(handle, ar->inode);
+               jbd_debug(5, "splicing direct\n");
+       }
+       return err;
+@@ -484,11 +471,11 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
+                * need to revoke the block, which is why we don't
+                * need to set EXT4_FREE_BLOCKS_METADATA.
+                */
+-              ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
++              ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
+                                EXT4_FREE_BLOCKS_FORGET);
+       }
+-      ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
+-                       blks, 0);
++      ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
++                       ar->len, 0);
+       return err;
+ }
+@@ -525,11 +512,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+                       struct ext4_map_blocks *map,
+                       int flags)
+ {
++      struct ext4_allocation_request ar;
+       int err = -EIO;
+       ext4_lblk_t offsets[4];
+       Indirect chain[4];
+       Indirect *partial;
+-      ext4_fsblk_t goal;
+       int indirect_blks;
+       int blocks_to_boundary = 0;
+       int depth;
+@@ -579,7 +566,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+               return -ENOSPC;
+       }
+-      goal = ext4_find_goal(inode, map->m_lblk, partial);
++      /* Set up for the direct block allocation */
++      memset(&ar, 0, sizeof(ar));
++      ar.inode = inode;
++      ar.logical = map->m_lblk;
++      if (S_ISREG(inode->i_mode))
++              ar.flags = EXT4_MB_HINT_DATA;
++
++      ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
+       /* the number of blocks need to allocate for [d,t]indirect blocks */
+       indirect_blks = (chain + depth) - partial - 1;
+@@ -588,13 +582,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+        * Next look up the indirect map to count the totoal number of
+        * direct blocks to allocate for this branch.
+        */
+-      count = ext4_blks_to_allocate(partial, indirect_blks,
+-                                    map->m_len, blocks_to_boundary);
++      ar.len = ext4_blks_to_allocate(partial, indirect_blks,
++                                     map->m_len, blocks_to_boundary);
++
+       /*
+        * Block out ext4_truncate while we alter the tree
+        */
+-      err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
+-                              &count, goal,
++      err = ext4_alloc_branch(handle, &ar, indirect_blks,
+                               offsets + (partial - chain), partial);
+       /*
+@@ -605,14 +599,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+        * may need to return -EAGAIN upwards in the worst case.  --sct
+        */
+       if (!err)
+-              err = ext4_splice_branch(handle, inode, map->m_lblk,
+-                                       partial, indirect_blks, count);
++              err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
+       if (err)
+               goto cleanup;
+       map->m_flags |= EXT4_MAP_NEW;
+       ext4_update_inode_fsync_trans(handle, inode, 1);
++      count = ar.len;
+ got_it:
+       map->m_flags |= EXT4_MAP_MAPPED;
+       map->m_pblk = le32_to_cpu(chain[depth-1].key);
+-- 
+2.7.4
+
+From e3cf5d5d9a86df1c5e413bdd3725c25a16ff854c Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 4 Sep 2014 18:07:25 -0400
+Subject: [PATCH] ext4: prepare to drop EXT4_STATE_DELALLOC_RESERVED
+
+The EXT4_STATE_DELALLOC_RESERVED flag was originally implemented
+because it was too hard to make sure the mballoc and get_block flags
+could be reliably passed down through all of the codepaths that end up
+calling ext4_mb_new_blocks().
+
+Since then, we have mb_flags passed down through most of the code
+paths, so getting rid of EXT4_STATE_DELALLOC_RESERVED isn't as tricky
+as it used to.
+
+This commit plumbs in the last of what is required, and then adds a
+WARN_ON check to make sure we haven't missed anything.  If this passes
+a full regression test run, we can then drop
+EXT4_STATE_DELALLOC_RESERVED.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+---
+ fs/ext4/balloc.c   |  3 +--
+ fs/ext4/extents.c  |  6 +++++-
+ fs/ext4/indirect.c |  6 +++++-
+ fs/ext4/mballoc.c  | 10 ++++++----
+ fs/ext4/xattr.c    |  6 ------
+ 5 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index 581ef40..d70f154 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -636,8 +636,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
+        * Account for the allocated meta blocks.  We will never
+        * fail EDQUOT for metdata, but we do account for it.
+        */
+-      if (!(*errp) &&
+-          ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
++      if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
+               spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+               dquot_alloc_block_nofail(inode,
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 3ac1686..8170b32 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1933,6 +1933,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+       ext4_lblk_t next;
+       int mb_flags = 0, unwritten;
++      if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
++              mb_flags |= EXT4_MB_DELALLOC_RESERVED;
+       if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
+               EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
+               return -EIO;
+@@ -2054,7 +2056,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+        * We're gonna add a new leaf in the tree.
+        */
+       if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+-              mb_flags = EXT4_MB_USE_RESERVED;
++              mb_flags |= EXT4_MB_USE_RESERVED;
+       err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
+                                      ppath, newext);
+       if (err)
+@@ -4438,6 +4440,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+               ar.flags = 0;
+       if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
+               ar.flags |= EXT4_MB_HINT_NOPREALLOC;
++      if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
++              ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+       newblock = ext4_mb_new_blocks(handle, &ar, &err);
+       if (!newblock)
+               goto out2;
+diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
+index 69af0cd..36b3696 100644
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -333,7 +333,9 @@ static int ext4_alloc_branch(handle_t *handle,
+                       new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
+               } else
+                       ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
+-                                  ar->inode, ar->goal, 0, NULL, &err);
++                                      ar->inode, ar->goal,
++                                      ar->flags & EXT4_MB_DELALLOC_RESERVED,
++                                      NULL, &err);
+               if (err) {
+                       i--;
+                       goto failed;
+@@ -572,6 +574,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+       ar.logical = map->m_lblk;
+       if (S_ISREG(inode->i_mode))
+               ar.flags = EXT4_MB_HINT_DATA;
++      if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
++              ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+       ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 8b0f9ef..15dffda 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4415,9 +4415,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+        * EDQUOT check, as blocks and quotas have been already
+        * reserved when data being copied into pagecache.
+        */
+-      if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
++      if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED)) {
++              WARN_ON((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0);
+               ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+-      else {
++      }
++
++      if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
+               /* Without delayed allocation we need to verify
+                * there is enough free blocks to do block allocation
+                * and verify allocation doesn't exceed the quota limits.
+@@ -4528,8 +4531,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+       if (inquota && ar->len < inquota)
+               dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
+       if (!ar->len) {
+-              if (!ext4_test_inode_state(ar->inode,
+-                                         EXT4_STATE_DELALLOC_RESERVED))
++              if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
+                       /* release all the reserved blocks if non delalloc */
+                       percpu_counter_sub(&sbi->s_dirtyclusters_counter,
+                                               reserv_clstrs);
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index e738733..da4df70 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -899,14 +899,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+                               goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
+-                      /*
+-                       * take i_data_sem because we will test
+-                       * i_delalloc_reserved_flag in ext4_mb_new_blocks
+-                       */
+-                      down_read(&EXT4_I(inode)->i_data_sem);
+                       block = ext4_new_meta_blocks(handle, inode, goal, 0,
+                                                    NULL, &error);
+-                      up_read((&EXT4_I(inode)->i_data_sem));
+                       if (error)
+                               goto cleanup;
+-- 
+2.7.4
+
+From 2e81a4eeedcaa66e35f58b81e0755b87057ce392 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 11 Aug 2016 12:38:55 -0400
+Subject: [PATCH] ext4: avoid deadlock when expanding inode size
+
+When we need to move xattrs into external xattr block, we call
+ext4_xattr_block_set() from ext4_expand_extra_isize_ea(). That may end
+up calling ext4_mark_inode_dirty() again which will recurse back into
+the inode expansion code leading to deadlocks.
+
+Protect from recursion using EXT4_STATE_NO_EXPAND inode flag and move
+its management into ext4_expand_extra_isize_ea() since its manipulation
+is safe there (due to xattr_sem) from possible races with
+ext4_xattr_set_handle() which plays with it as well.
+
+CC: stable@vger.kernel.org   # 4.4.x
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+ fs/ext4/inode.c |  2 --
+ fs/ext4/xattr.c | 19 +++++++++++++------
+ 2 files changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 5a6277d..13c95b2 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5466,8 +5466,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
+                                                     sbi->s_want_extra_isize,
+                                                     iloc, handle);
+                       if (ret) {
+-                              ext4_set_inode_state(inode,
+-                                                   EXT4_STATE_NO_EXPAND);
+                               if (mnt_count !=
+                                       le16_to_cpu(sbi->s_es->s_mnt_count)) {
+                                       ext4_warning(inode->i_sb,
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index c893f00..2eb935c 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1358,11 +1358,13 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       int isize_diff; /* How much do we need to grow i_extra_isize */
+       down_write(&EXT4_I(inode)->xattr_sem);
++      /*
++       * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
++       */
++      ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
+ retry:
+-      if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
+-              up_write(&EXT4_I(inode)->xattr_sem);
+-              return 0;
+-      }
++      if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
++              goto out;
+       header = IHDR(inode, raw_inode);
+       entry = IFIRST(header);
+@@ -1392,8 +1394,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                               (void *)header, total_ino,
+                               inode->i_sb->s_blocksize);
+               EXT4_I(inode)->i_extra_isize = new_extra_isize;
+-              error = 0;
+-              goto cleanup;
++              goto out;
+       }
+       /*
+@@ -1553,6 +1554,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+               kfree(bs);
+       }
+       brelse(bh);
++out:
++      ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+       up_write(&EXT4_I(inode)->xattr_sem);
+       return 0;
+@@ -1564,6 +1567,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+       kfree(is);
+       kfree(bs);
+       brelse(bh);
++      /*
++       * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
++       * size expansion failed.
++       */
+       up_write(&EXT4_I(inode)->xattr_sem);
+       return error;
+ }
+-- 
+2.7.4
+
index c83b3a0..33e7a35 100644 (file)
@@ -29,3 +29,4 @@ rhel7/ext4-projid-ignore-maxquotas.patch
 rhel7/ext4-projid-feature-support.patch
 rhel7/ext4-projid-quotas.patch
 rhel7/ext4-projid-xfs-ioctls.patch
+rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch
index 3103ded..82d0b1d 100644 (file)
@@ -27,3 +27,4 @@ rhel7/ext4-projid-ignore-maxquotas.patch
 rhel7/ext4-projid-feature-support.patch
 rhel7/ext4-projid-quotas.patch
 rhel7/ext4-projid-xfs-ioctls.patch
+rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch
index 86476d2..c91c1e9 100644 (file)
@@ -21,7 +21,9 @@ rhel7/ext4-give-warning-with-dir-htree-growing.patch
 rhel7/ext4-mmp-brelse.patch
 rhel7/ext4-jcb-optimization.patch
 rhel7/ext4_s_max_ext_tree_depth.patch
+rhel7/ext4-remove-i_data_sem-from-xattr.patch
 rhel7/ext4-projid-ignore-maxquotas.patch
 rhel7/ext4-projid-feature-support.patch
 rhel7/ext4-projid-quotas.patch
 rhel7/ext4-projid-xfs-ioctls.patch
+rhel7/ext4-fix-xattr-shifting-when-expanding-inodes.patch