Whamcloud - gitweb
LU-8534 ldiskfs: Add patch series for RHEL7.3 13/22113/5
authorChristopher J. Morrone <morrone2@llnl.gov>
Wed, 24 Aug 2016 17:22:00 +0000 (13:22 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 18 Nov 2016 15:35:18 +0000 (15:35 +0000)
Add the new ldiskfs patch series file ldiskfs-3.10-rhel7.3.series which
supports the RHEL7.3 kernel.  Three patch files needed contextual updates
to allow them to apply.

Note that the new RHEL7.3 kernel contains a backport of the
upstream linux kernel commit 923ae0ff9250430133b3310fe62c47538cf1cbc1,
which introduces DAX to ext4.  This adds the flag EXT4_MOUNT_DAX
with value 0x00200.  This conflicted with ext4-data-in-dirent.patch's
EXT4_MOUNT_DIRDATA flag value.  Therefore, for RHEL7.3 the value of the
EXT4_MOUNT_DIRDATA flag is changed to 0x00002.

The ext4-corrupted-inode-block-bitmaps-handling-patches.patch needed
updating for two problems:

In ext4_validate_block_bitmap(), the patch removes the
struct ext4_group_info *grp declaration.  The upstream kernel now
has the following at the beginning of the function:

        if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                return;

The declaration/definion of grp is reintroduced to address that
use.

Change-Id: Ia1a2455c1f353b59202b48ce6cdaad801a7f42d2
Signed-off-by: Christopher J. Morrone <morrone2@llnl.gov>
Reviewed-on: http://review.whamcloud.com/22113
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Minh Diep <minh.diep@intel.com>
Reviewed-by: Yang Sheng <yang.sheng@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
config/lustre-build-ldiskfs.m4
ldiskfs/kernel_patches/patches/rhel7.3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/rhel7.3/ext4-data-in-dirent.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/rhel7.3/ext4-disable-mb-cache.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series [new file with mode: 0644]

index 180eaf4..7de591f 100644 (file)
@@ -13,6 +13,7 @@ esac
 AS_IF([test -z "$LDISKFS_SERIES"], [
 AS_IF([test x$RHEL_KERNEL = xyes], [
        case $RHEL_RELEASE_NO in
 AS_IF([test -z "$LDISKFS_SERIES"], [
 AS_IF([test x$RHEL_KERNEL = xyes], [
        case $RHEL_RELEASE_NO in
+       73)     LDISKFS_SERIES="3.10-rhel7.3.series"    ;;
        72)     LDISKFS_SERIES="3.10-rhel7.2.series"    ;;
        71)     LDISKFS_SERIES="3.10-rhel7.series"      ;;
        68)     LDISKFS_SERIES="2.6-rhel6.8.series"     ;;
        72)     LDISKFS_SERIES="3.10-rhel7.2.series"    ;;
        71)     LDISKFS_SERIES="3.10-rhel7.series"      ;;
        68)     LDISKFS_SERIES="2.6-rhel6.8.series"     ;;
diff --git a/ldiskfs/kernel_patches/patches/rhel7.3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch b/ldiskfs/kernel_patches/patches/rhel7.3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
new file mode 100644 (file)
index 0000000..247f753
--- /dev/null
@@ -0,0 +1,464 @@
+Since we could skip corrupt block groups, this patch
+use ext4_warning() intead of ext4_error() to make FS not
+emount RO in default, also fix a leftover from upstream
+commit 163a203ddb36c36d4a1c942
+---
+Index: linux-stage/fs/ext4/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/balloc.c
++++ linux-stage/fs/ext4/balloc.c
+@@ -185,25 +185,17 @@ static int ext4_init_block_bitmap(struct
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       ext4_fsblk_t start, tmp;
+       int flex_bg = 0;
+-      struct ext4_group_info *grp;
+       J_ASSERT_BH(bh, buffer_locked(bh));
+       /* If checksum is bad mark all blocks used to prevent allocation
+        * essentially implementing a per-group read-only flag. */
+       if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
+-              grp = ext4_get_group_info(sb, block_group);
+-              if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+-                      percpu_counter_sub(&sbi->s_freeclusters_counter,
+-                                         grp->bb_free);
+-              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+-              if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+-                      int count;
+-                      count = ext4_free_inodes_count(sb, gdp);
+-                      percpu_counter_sub(&sbi->s_freeinodes_counter,
+-                                         count);
+-              }
+-              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT |
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "Checksum bad for group %u",
++                              block_group);
+               return -EIO;
+       }
+       memset(bh->b_data, 0, sb->s_blocksize);
+@@ -368,7 +360,6 @@ static void ext4_validate_block_bitmap(s
+ {
+       ext4_fsblk_t    blk;
+       struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       if (buffer_verified(bh) || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+               return;
+@@ -377,22 +368,19 @@ static void ext4_validate_block_bitmap(s
+       blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
+       if (unlikely(blk != 0)) {
+               ext4_unlock_group(sb, block_group);
+-              ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
+-                         block_group, blk);
+-              if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+-                      percpu_counter_sub(&sbi->s_freeclusters_counter,
+-                                         grp->bb_free);
+-              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                              "bg %u: block %llu: invalid block bitmap",
++                              block_group, blk);
+               return;
+       }
+       if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
+                       desc, bh))) {
+               ext4_unlock_group(sb, block_group);
+-              ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+-              if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+-                      percpu_counter_sub(&sbi->s_freeclusters_counter,
+-                                         grp->bb_free);
+-              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                              "bg %u: bad block bitmap checksum",
++                              block_group);
+               return;
+       }
+       set_buffer_verified(bh);
+@@ -445,8 +433,6 @@ ext4_read_block_bitmap_nowait(struct sup
+               set_buffer_uptodate(bh);
+               ext4_unlock_group(sb, block_group);
+               unlock_buffer(bh);
+-              if (err)
+-                      ext4_error(sb, "Checksum bad for grp %u", block_group);
+               goto verify;
+       }
+       ext4_unlock_group(sb, block_group);
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -91,6 +91,17 @@ typedef __u32 ext4_lblk_t;
+ /* data type for block group number */
+ typedef unsigned int ext4_group_t;
++void __ext4_corrupted_block_group(struct super_block *sb,
++                                ext4_group_t group, unsigned int flags,
++                                const char *function, unsigned int line);
++
++#define ext4_corrupted_block_group(sb, group, flags, fmt, ...)                \
++      do {                                                            \
++              __ext4_warning(sb, __func__, __LINE__, fmt,             \
++                              ##__VA_ARGS__);                         \
++              __ext4_corrupted_block_group(sb, group, flags,          \
++                                      __func__, __LINE__);            \
++      } while (0)
+ /*
+  * Flags used in mballoc's allocation_context flags field.
+  *
+@@ -2676,7 +2687,11 @@ struct ext4_group_info {
+ #define EXT4_GROUP_INFO_NEED_INIT_BIT         0
+ #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT               1
+ #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT   2
++#define EXT4_GROUP_INFO_BBITMAP_CORRUPT               \
++      (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
++#define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
++      (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+Index: linux-stage/fs/ext4/ialloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ialloc.c
++++ linux-stage/fs/ext4/ialloc.c
+@@ -70,26 +70,15 @@ static unsigned ext4_init_inode_bitmap(s
+                                      ext4_group_t block_group,
+                                      struct ext4_group_desc *gdp)
+ {
+-      struct ext4_group_info *grp;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       J_ASSERT_BH(bh, buffer_locked(bh));
+       /* If checksum is bad mark all blocks and inodes use to prevent
+        * allocation, essentially implementing a per-group read-only flag. */
+       if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
+-              ext4_error(sb, "Checksum bad for group %u", block_group);
+-              grp = ext4_get_group_info(sb, block_group);
+-              if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+-                      percpu_counter_sub(&sbi->s_freeclusters_counter,
+-                                         grp->bb_free);
+-              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+-              if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+-                      int count;
+-                      count = ext4_free_inodes_count(sb, gdp);
+-                      percpu_counter_sub(&sbi->s_freeinodes_counter,
+-                                         count);
+-              }
+-              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT |
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "Checksum bad for group %u", block_group);
+               return 0;
+       }
+@@ -125,8 +114,6 @@ ext4_read_inode_bitmap(struct super_bloc
+       struct ext4_group_desc *desc;
+       struct buffer_head *bh = NULL;
+       ext4_fsblk_t bitmap_blk;
+-      struct ext4_group_info *grp;
+-      struct ext4_sb_info *sbi = EXT4_SB(sb);
+       desc = ext4_get_group_desc(sb, block_group, NULL);
+       if (!desc)
+@@ -193,16 +180,10 @@ verify:
+                                          EXT4_INODES_PER_GROUP(sb) / 8)) {
+               ext4_unlock_group(sb, block_group);
+               put_bh(bh);
+-              ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
+-                         "inode_bitmap = %llu", block_group, bitmap_blk);
+-              grp = ext4_get_group_info(sb, block_group);
+-              if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+-                      int count;
+-                      count = ext4_free_inodes_count(sb, desc);
+-                      percpu_counter_sub(&sbi->s_freeinodes_counter,
+-                                         count);
+-              }
+-              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "Corrupt inode bitmap - block_group = %u, inode_bitmap = %llu",
++                              block_group, bitmap_blk);
+               return NULL;
+       }
+       ext4_unlock_group(sb, block_group);
+@@ -337,14 +318,9 @@ out:
+               if (!fatal)
+                       fatal = err;
+       } else {
+-              ext4_error(sb, "bit already cleared for inode %lu", ino);
+-              if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+-                      int count;
+-                      count = ext4_free_inodes_count(sb, gdp);
+-                      percpu_counter_sub(&sbi->s_freeinodes_counter,
+-                                         count);
+-              }
+-              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "bit already cleared for inode %lu", ino);
+       }
+ error_return:
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -752,10 +752,18 @@ int ext4_mb_generate_buddy(struct super_
+       if (free != grp->bb_free) {
+               struct ext4_group_desc *gdp;
+               gdp = ext4_get_group_desc(sb, group, NULL);
+-              ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
+-                      "%u in gd, %lu pa's\n", (long unsigned int)group,
+-                      free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
+-                      grp->bb_prealloc_nr);
++
++              ext4_corrupted_block_group(sb, group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                              "group %lu: %u blocks in bitmap, %u in bb, %u in gd, %lu pa's block bitmap corrupt",
++                              (unsigned long int)group, free, grp->bb_free,
++                              ext4_free_group_clusters(sb, gdp),
++                              grp->bb_prealloc_nr);
++              /*
++               * If we intend to continue, we consider group descriptor
++               * corrupt and update bb_free using bitmap value
++               */
++              grp->bb_free = free;
+               return -EIO;
+       }
+       mb_set_largest_free_order(sb, grp);
+@@ -1101,7 +1109,7 @@ ext4_mb_load_buddy(struct super_block *s
+       int block;
+       int pnum;
+       int poff;
+-      struct page *page;
++      struct page *page = NULL;
+       int ret;
+       struct ext4_group_info *grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -1127,7 +1135,7 @@ ext4_mb_load_buddy(struct super_block *s
+                */
+               ret = ext4_mb_init_group(sb, group);
+               if (ret)
+-                      return ret;
++                      goto err;
+       }
+       /*
+@@ -1227,6 +1235,7 @@ err:
+               page_cache_release(e4b->bd_buddy_page);
+       e4b->bd_buddy = NULL;
+       e4b->bd_bitmap = NULL;
++      ext4_warning(sb, "Error loading buddy information for %u", group);
+       return ret;
+ }
+@@ -3598,9 +3607,11 @@ int ext4_mb_check_ondisk_bitmap(struct s
+       }
+       if (free != ext4_free_group_clusters(sb, gdp)) {
+-              ext4_error(sb, "on-disk bitmap for group %d"
+-                      "corrupted: %u blocks free in bitmap, %u - in gd\n",
+-                      group, free, ext4_free_group_clusters(sb, gdp));
++              ext4_corrupted_block_group(sb, group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                              "on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
++                              group, free,
++                              ext4_free_group_clusters(sb, gdp));
+               return -EIO;
+       }
+       return 0;
+@@ -3961,16 +3972,8 @@ ext4_mb_release_inode_pa(struct ext4_bud
+       /* "free < pa->pa_free" means we maybe double alloc the same blocks,
+        * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
+       if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
+-              ext4_error(sb, "pa free mismatch: [pa %p] "
+-                              "[phy %lu] [logic %lu] [len %u] [free %u] "
+-                              "[error %u] [inode %lu] [freed %u]", pa,
+-                              (unsigned long)pa->pa_pstart,
+-                              (unsigned long)pa->pa_lstart,
+-                              (unsigned)pa->pa_len, (unsigned)pa->pa_free,
+-                              (unsigned)pa->pa_error, pa->pa_inode->i_ino,
+-                              free);
+               ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
+-                                      free, pa->pa_free);
++                                    free, pa->pa_free);
+               /*
+                * pa is already deleted so we use the value obtained
+                * from the bitmap and continue.
+@@ -4030,14 +4033,11 @@ ext4_mb_discard_group_preallocations(str
+               return 0;
+       bitmap_bh = ext4_read_block_bitmap(sb, group);
+-      if (bitmap_bh == NULL) {
+-              ext4_error(sb, "Error reading block bitmap for %u", group);
++      if (bitmap_bh == NULL)
+               return 0;
+-      }
+       err = ext4_mb_load_buddy(sb, group, &e4b);
+       if (err) {
+-              ext4_error(sb, "Error loading buddy information for %u", group);
+               put_bh(bitmap_bh);
+               return 0;
+       }
+@@ -4197,16 +4197,11 @@ repeat:
+               group = ext4_get_group_number(sb, pa->pa_pstart);
+               err = ext4_mb_load_buddy(sb, group, &e4b);
+-              if (err) {
+-                      ext4_error(sb, "Error loading buddy information for %u",
+-                                      group);
++              if (err)
+                       return;
+-              }
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
+               if (bitmap_bh == NULL) {
+-                      ext4_error(sb, "Error reading block bitmap for %u",
+-                                      group);
+                       ext4_mb_unload_buddy(&e4b);
+                       continue;
+               }
+@@ -4466,11 +4461,8 @@ ext4_mb_discard_lg_preallocations(struct
+       list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+               group = ext4_get_group_number(sb, pa->pa_pstart);
+-              if (ext4_mb_load_buddy(sb, group, &e4b)) {
+-                      ext4_error(sb, "Error loading buddy information for %u",
+-                                      group);
++              if (ext4_mb_load_buddy(sb, group, &e4b))
+                       continue;
+-              }
+               ext4_lock_group(sb, group);
+               list_del(&pa->pa_group_list);
+               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
+@@ -4741,17 +4733,18 @@ errout:
+                        * been updated or not when fail case. So can
+                        * not revert pa_free back, just mark pa_error*/
+                       pa->pa_error++;
+-                      ext4_error(sb,
+-                              "Updating bitmap error: [err %d] "
+-                              "[pa %p] [phy %lu] [logic %lu] "
+-                              "[len %u] [free %u] [error %u] "
+-                              "[inode %lu]", *errp, pa,
+-                              (unsigned long)pa->pa_pstart,
+-                              (unsigned long)pa->pa_lstart,
+-                              (unsigned)pa->pa_len,
+-                              (unsigned)pa->pa_free,
+-                              (unsigned)pa->pa_error,
+-                              pa->pa_inode ? pa->pa_inode->i_ino : 0);
++                      ext4_corrupted_block_group(sb, 0, 0,
++                                      "Updating bitmap error: [err %d] "
++                                      "[pa %p] [phy %lu] [logic %lu] "
++                                      "[len %u] [free %u] [error %u] "
++                                      "[inode %lu]", *errp, pa,
++                                      (unsigned long)pa->pa_pstart,
++                                      (unsigned long)pa->pa_lstart,
++                                      (unsigned)pa->pa_len,
++                                      (unsigned)pa->pa_free,
++                                      (unsigned)pa->pa_error,
++                                      pa->pa_inode ?
++                                      pa->pa_inode->i_ino : 0);
+               }
+       }
+       ext4_mb_release_context(ac);
+@@ -5036,7 +5029,7 @@ do_more:
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       if (err)
+-              goto error_return;
++              goto error_brelse;
+       if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
+               struct ext4_free_data *new_entry;
+@@ -5118,8 +5111,9 @@ do_more:
+               goto do_more;
+       }
+ error_return:
+-      brelse(bitmap_bh);
+       ext4_std_error(sb, err);
++error_brelse:
++      brelse(bitmap_bh);
+       return;
+ }
+@@ -5215,7 +5209,7 @@ int ext4_group_add_blocks(handle_t *hand
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       if (err)
+-              goto error_return;
++              goto error_brelse;
+       /*
+        * need to update group_info->bb_free and bitmap
+@@ -5252,8 +5246,9 @@ int ext4_group_add_blocks(handle_t *hand
+               err = ret;
+ error_return:
+-      brelse(bitmap_bh);
+       ext4_std_error(sb, err);
++error_brelse:
++      brelse(bitmap_bh);
+       return err;
+ }
+@@ -5328,11 +5323,9 @@ ext4_trim_all_free(struct super_block *s
+       trace_ext4_trim_all_free(sb, group, start, max);
+       ret = ext4_mb_load_buddy(sb, group, &e4b);
+-      if (ret) {
+-              ext4_error(sb, "Error in loading buddy "
+-                              "information for %u", group);
++      if (ret)
+               return ret;
+-      }
++
+       bitmap = e4b.bd_bitmap;
+       ext4_lock_group(sb, group);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -633,6 +633,37 @@ void __ext4_warning(struct super_block *
+       va_end(args);
+ }
++void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
++                                unsigned int flags, const char *function,
++                                unsigned int line)
++{
++      struct ext4_sb_info *sbi = EXT4_SB(sb);
++      struct ext4_group_info *grp = ext4_get_group_info(sb, group);
++      struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
++
++      if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
++          !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
++              percpu_counter_sub(&sbi->s_freeclusters_counter,
++                                      grp->bb_free);
++              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
++                      &grp->bb_state);
++      }
++
++      if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
++          !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
++              if (gdp) {
++                      int count;
++
++                      count = ext4_free_inodes_count(sb, gdp);
++                      percpu_counter_sub(&sbi->s_freeinodes_counter,
++                                         count);
++              }
++              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
++                      &grp->bb_state);
++      }
++      save_error_info(sb, function, line);
++}
++
+ void __ext4_grp_locked_error(const char *function, unsigned int line,
+                            struct super_block *sb, ext4_group_t grp,
+                            unsigned long ino, ext4_fsblk_t block,
diff --git a/ldiskfs/kernel_patches/patches/rhel7.3/ext4-data-in-dirent.patch b/ldiskfs/kernel_patches/patches/rhel7.3/ext4-data-in-dirent.patch
new file mode 100644 (file)
index 0000000..7e89b5a
--- /dev/null
@@ -0,0 +1,749 @@
+this patch implements feature which allows ext4 fs users (e.g. Lustre)
+to store data in ext4 dirent.
+data is stored in ext4 dirent after file-name, this space is accounted
+in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
+is present.
+
+make use of dentry->d_fsdata to pass fid to ext4. so no
+changes in ext4_add_entry() interface required.
+
+Index: linux-stage/fs/ext4/dir.c
+===================================================================
+--- linux-stage.orig/fs/ext4/dir.c
++++ linux-stage/fs/ext4/dir.c
+@@ -71,11 +71,11 @@ int __ext4_check_dir_entry(const char *f
+       const int rlen = ext4_rec_len_from_disk(de->rec_len,
+                                               dir->i_sb->s_blocksize);
+-      if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
++      if (unlikely(rlen < __EXT4_DIR_REC_LEN(1)))
+               error_msg = "rec_len is smaller than minimal";
+       else if (unlikely(rlen % 4 != 0))
+               error_msg = "rec_len % 4 != 0";
+-      else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
++      else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
+               error_msg = "rec_len is too small for name_len";
+       else if (unlikely(((char *) de - buf) + rlen > size))
+               error_msg = "directory entry across range";
+@@ -208,7 +208,7 @@ revalidate:
+                                * failure will be detected in the
+                                * dirent test below. */
+                               if (ext4_rec_len_from_disk(de->rec_len,
+-                                      sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
++                                  sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
+                                       break;
+                               i += ext4_rec_len_from_disk(de->rec_len,
+                                                           sb->s_blocksize);
+@@ -438,12 +438,17 @@ int ext4_htree_store_dirent(struct file
+       struct fname *fname, *new_fn;
+       struct dir_private_info *info;
+       int len;
++      int extra_data = 0;
+       info = dir_file->private_data;
+       p = &info->root.rb_node;
+       /* Create and allocate the fname structure */
+-      len = sizeof(struct fname) + dirent->name_len + 1;
++      if (dirent->file_type & EXT4_DIRENT_LUFID)
++              extra_data = ext4_get_dirent_data_len(dirent);
++
++      len = sizeof(struct fname) + dirent->name_len + extra_data + 1;
++
+       new_fn = kzalloc(len, GFP_KERNEL);
+       if (!new_fn)
+               return -ENOMEM;
+@@ -452,7 +457,7 @@ int ext4_htree_store_dirent(struct file
+       new_fn->inode = le32_to_cpu(dirent->inode);
+       new_fn->name_len = dirent->name_len;
+       new_fn->file_type = dirent->file_type;
+-      memcpy(new_fn->name, dirent->name, dirent->name_len);
++      memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data);
+       new_fn->name[dirent->name_len] = 0;
+       while (*p) {
+@@ -635,7 +640,7 @@ int ext4_check_all_de(struct inode *dir,
+               if (ext4_check_dir_entry(dir, NULL, de, bh,
+                                        buf, buf_size, offset))
+                       return -EIO;
+-              nlen = EXT4_DIR_REC_LEN(de->name_len);
++              nlen = EXT4_DIR_REC_LEN(de);
+               rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+               de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+               offset += rlen;
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -963,6 +963,7 @@ struct ext4_inode_info {
+ /*
+  * Mount flags set via mount options or defaults
+  */
++#define EXT4_MOUNT_DIRDATA            0x00002 /* Data in directory entries*/
+ #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
+ #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
+ #define EXT4_MOUNT_ERRORS_CONT                0x00010 /* Continue on errors */
+@@ -1574,6 +1575,7 @@ static inline void ext4_clear_state_flag
+                                        EXT4_FEATURE_INCOMPAT_64BIT| \
+                                        EXT4_FEATURE_INCOMPAT_FLEX_BG| \
+                                        EXT4_FEATURE_INCOMPAT_MMP |    \
++                                       EXT4_FEATURE_INCOMPAT_DIRDATA| \
+                                        EXT4_FEATURE_INCOMPAT_INLINE_DATA)
+ #define EXT4_FEATURE_RO_COMPAT_SUPP   (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+@@ -1680,6 +1682,43 @@ struct ext4_dir_entry_tail {
+ #define EXT4_FT_SYMLINK               7
+ #define EXT4_FT_MAX           8
++#define EXT4_FT_MASK          0xf
++
++#if EXT4_FT_MAX > EXT4_FT_MASK
++#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
++#endif
++
++/*
++ * d_type has 4 unused bits, so it can hold four types data. these different
++ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
++ * stored, in flag order, after file-name in ext4 dirent.
++*/
++/*
++ * this flag is added to d_type if ext4 dirent has extra data after
++ * filename. this data length is variable and length is stored in first byte
++ * of data. data start after filename NUL byte.
++ * This is used by Lustre FS.
++  */
++#define EXT4_DIRENT_LUFID             0x10
++
++#define EXT4_LUFID_MAGIC    0xAD200907UL
++struct ext4_dentry_param {
++      __u32  edp_magic;       /* EXT4_LUFID_MAGIC */
++      char   edp_len;         /* size of edp_data in bytes */
++      char   edp_data[0];     /* packed array of data */
++} __packed;
++
++static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
++                                                struct ext4_dentry_param *p)
++
++{
++      if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
++              return NULL;
++      if (p && p->edp_magic == EXT4_LUFID_MAGIC)
++              return &p->edp_len;
++      else
++              return NULL;
++}
+ #define EXT4_FT_DIR_CSUM      0xDE
+@@ -1690,8 +1729,11 @@ struct ext4_dir_entry_tail {
+  */
+ #define EXT4_DIR_PAD                  4
+ #define EXT4_DIR_ROUND                        (EXT4_DIR_PAD - 1)
+-#define EXT4_DIR_REC_LEN(name_len)    (((name_len) + 8 + EXT4_DIR_ROUND) & \
++#define __EXT4_DIR_REC_LEN(name_len)  (((name_len) + 8 + EXT4_DIR_ROUND) & \
+                                        ~EXT4_DIR_ROUND)
++#define EXT4_DIR_REC_LEN(de)          (__EXT4_DIR_REC_LEN((de)->name_len +\
++                                      ext4_get_dirent_data_len(de)))
++
+ #define EXT4_MAX_REC_LEN              ((1<<16)-1)
+ /*
+@@ -2016,11 +2058,11 @@ extern int ext4_find_dest_de(struct inod
+                            struct buffer_head *bh,
+                            void *buf, int buf_size,
+                            const char *name, int namelen,
+-                           struct ext4_dir_entry_2 **dest_de);
++                           struct ext4_dir_entry_2 **dest_de, int *dlen);
+ void ext4_insert_dentry(struct inode *inode,
+                       struct ext4_dir_entry_2 *de,
+                       int buf_size,
+-                      const char *name, int namelen);
++                      const char *name, int namelen, void *data);
+ static inline void ext4_update_dx_flag(struct inode *inode)
+ {
+       if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+@@ -2033,11 +2075,18 @@ static unsigned char ext4_filetype_table
+ static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
+ {
++      int fl_index = filetype & EXT4_FT_MASK;
++
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
+-          (filetype >= EXT4_FT_MAX))
++          (fl_index >= EXT4_FT_MAX))
+               return DT_UNKNOWN;
+-      return ext4_filetype_table[filetype];
++      if (!test_opt(sb, DIRDATA))
++              return ext4_filetype_table[fl_index];
++
++      return (ext4_filetype_table[fl_index]) |
++              (filetype & EXT4_DIRENT_LUFID);
++
+ }
+ extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
+                            void *buf, int buf_size);
+@@ -2186,6 +2235,8 @@ extern struct inode *ext4_create_inode(h
+ extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
+                            struct ext4_dir_entry_2 *de_del,
+                            struct buffer_head *bh);
++extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
++                             struct inode *inode, const void *, const void *);
+ extern int search_dir(struct buffer_head *bh,
+                     char *search_buf,
+                     int buf_size,
+@@ -2865,6 +2916,28 @@ extern struct mutex ext4__aio_mutex[EXT4
+ extern int ext4_resize_begin(struct super_block *sb);
+ extern void ext4_resize_end(struct super_block *sb);
++/*
++ * Compute the total directory entry data length.
++ * This includes the filename and an implicit NUL terminator (always present),
++ * and optional extensions.  Each extension has a bit set in the high 4 bits of
++ * de->file_type, and the extension length is the first byte in each entry.
++ */
++static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
++{
++      char *len = de->name + de->name_len + 1 /* NUL terminator */;
++      int dlen = 0;
++      __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
++
++      while (extra_data_flags) {
++              if (extra_data_flags & 1) {
++                      dlen += *len + (dlen == 0);
++                      len += *len;
++              }
++              extra_data_flags >>= 1;
++      }
++      return dlen;
++}
++
+ #endif        /* __KERNEL__ */
+ #endif        /* _EXT4_H */
+Index: linux-stage/fs/ext4/namei.c
+===================================================================
+--- linux-stage.orig/fs/ext4/namei.c
++++ linux-stage/fs/ext4/namei.c
+@@ -239,7 +239,8 @@ static unsigned dx_get_count(struct dx_e
+ static unsigned dx_get_limit(struct dx_entry *entries);
+ static void dx_set_count(struct dx_entry *entries, unsigned value);
+ static void dx_set_limit(struct dx_entry *entries, unsigned value);
+-static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
++static inline unsigned dx_root_limit(struct inode *dir,
++              struct ext4_dir_entry_2 *dot_de, unsigned infosize);
+ static unsigned dx_node_limit(struct inode *dir);
+ static struct dx_frame *dx_probe(const struct qstr *d_name,
+                                struct inode *dir,
+@@ -500,11 +501,12 @@ ext4_next_entry(struct ext4_dir_entry_2
+  */
+ struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
+ {
++      BUG_ON(de->name_len != 1);
+       /* get dotdot first */
+-      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
++      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
+       /* dx root info is after dotdot entry */
+-      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
++      de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
+       return (struct dx_root_info *)de;
+ }
+@@ -549,10 +551,16 @@ static inline void dx_set_limit(struct d
+       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+-static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
++static inline unsigned dx_root_limit(struct inode *dir,
++              struct ext4_dir_entry_2 *dot_de, unsigned infosize)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
+-              EXT4_DIR_REC_LEN(2) - infosize;
++      struct ext4_dir_entry_2 *dotdot_de;
++      unsigned entry_space;
++
++      BUG_ON(dot_de->name_len != 1);
++      dotdot_de = ext4_next_entry(dot_de, dir->i_sb->s_blocksize);
++      entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(dot_de) -
++                       EXT4_DIR_REC_LEN(dotdot_de) - infosize;
+       if (ext4_has_metadata_csum(dir->i_sb))
+               entry_space -= sizeof(struct dx_tail);
+@@ -561,7 +569,7 @@ static inline unsigned dx_root_limit(str
+ static inline unsigned dx_node_limit(struct inode *dir)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
++      unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
+       if (ext4_has_metadata_csum(dir->i_sb))
+               entry_space -= sizeof(struct dx_tail);
+@@ -611,7 +619,7 @@ static struct stats dx_show_leaf(struct
+                               printk(":%x.%u ", h.hash,
+                                      (unsigned) ((char *) de - base));
+                       }
+-                      space += EXT4_DIR_REC_LEN(de->name_len);
++                      space += EXT4_DIR_REC_LEN(de);
+                       names++;
+               }
+               de = ext4_next_entry(de, size);
+@@ -719,12 +727,15 @@ dx_probe(const struct qstr *d_name, stru
+       entries = (struct dx_entry *)(((char *)info) + info->info_length);
+-      if (dx_get_limit(entries) != dx_root_limit(dir,
+-                                                 info->info_length)) {
++      if (dx_get_limit(entries) !=
++          dx_root_limit(dir, (struct ext4_dir_entry_2 *)bh->b_data,
++                        info->info_length)) {
+               ext4_warning(dir->i_sb, "dx entry: limit != root limit "
+                            "inode #%lu: dx entry: limit %u != root limit %u",
+                            dir->i_ino, dx_get_limit(entries),
+-                           dx_root_limit(dir, info->info_length));
++                           dx_root_limit(dir,
++                                        (struct ext4_dir_entry_2 *)bh->b_data,
++                                        info->info_length));
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+@@ -916,7 +927,7 @@ static int htree_dirblock_to_tree(struct
+       de = (struct ext4_dir_entry_2 *) bh->b_data;
+       top = (struct ext4_dir_entry_2 *) ((char *) de +
+                                          dir->i_sb->s_blocksize -
+-                                         EXT4_DIR_REC_LEN(0));
++                                         __EXT4_DIR_REC_LEN(0));
+       for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
+               if (ext4_check_dir_entry(dir, NULL, de, bh,
+                               bh->b_data, bh->b_size,
+@@ -1520,7 +1531,7 @@ dx_move_dirents(char *from, char *to, st
+       while (count--) {
+               struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
+                                               (from + (map->offs<<2));
+-              rec_len = EXT4_DIR_REC_LEN(de->name_len);
++              rec_len = EXT4_DIR_REC_LEN(de);
+               memcpy (to, de, rec_len);
+               ((struct ext4_dir_entry_2 *) to)->rec_len =
+                               ext4_rec_len_to_disk(rec_len, blocksize);
+@@ -1544,7 +1555,7 @@ static struct ext4_dir_entry_2* dx_pack_
+       while ((char*)de < base + blocksize) {
+               next = ext4_next_entry(de, blocksize);
+               if (de->inode && de->name_len) {
+-                      rec_len = EXT4_DIR_REC_LEN(de->name_len);
++                      rec_len = EXT4_DIR_REC_LEN(de);
+                       if (de > to)
+                               memmove(to, de, rec_len);
+                       to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
+@@ -1675,14 +1686,16 @@ int ext4_find_dest_de(struct inode *dir,
+                     struct buffer_head *bh,
+                     void *buf, int buf_size,
+                     const char *name, int namelen,
+-                    struct ext4_dir_entry_2 **dest_de)
++                    struct ext4_dir_entry_2 **dest_de, int *dlen)
+ {
+       struct ext4_dir_entry_2 *de;
+-      unsigned short reclen = EXT4_DIR_REC_LEN(namelen);
++      unsigned short reclen = __EXT4_DIR_REC_LEN(namelen) +
++                                                      (dlen ? *dlen : 0);
+       int nlen, rlen;
+       unsigned int offset = 0;
+       char *top;
++      dlen ? *dlen = 0 : 0; /* default set to 0 */
+       de = (struct ext4_dir_entry_2 *)buf;
+       top = buf + buf_size - reclen;
+       while ((char *) de <= top) {
+@@ -1691,10 +1704,26 @@ int ext4_find_dest_de(struct inode *dir,
+                       return -EIO;
+               if (ext4_match(namelen, name, de))
+                       return -EEXIST;
+-              nlen = EXT4_DIR_REC_LEN(de->name_len);
++              nlen = EXT4_DIR_REC_LEN(de);
+               rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+               if ((de->inode ? rlen - nlen : rlen) >= reclen)
+                       break;
++              /* Then for dotdot entries, check for the smaller space
++               * required for just the entry, no FID */
++              if (namelen == 2 && memcmp(name, "..", 2) == 0) {
++                      if ((de->inode ? rlen - nlen : rlen) >=
++                          __EXT4_DIR_REC_LEN(namelen)) {
++                              /* set dlen=1 to indicate not
++                               * enough space store fid */
++                              dlen ? *dlen = 1 : 0;
++                              break;
++                      }
++                      /* The new ".." entry must be written over the
++                       * previous ".." entry, which is the first
++                       * entry traversed by this scan. If it doesn't
++                       * fit, something is badly wrong, so -EIO. */
++                      return -EIO;
++              }
+               de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
+               offset += rlen;
+       }
+@@ -1708,12 +1737,12 @@ int ext4_find_dest_de(struct inode *dir,
+ void ext4_insert_dentry(struct inode *inode,
+                       struct ext4_dir_entry_2 *de,
+                       int buf_size,
+-                      const char *name, int namelen)
++                      const char *name, int namelen, void *data)
+ {
+       int nlen, rlen;
+-      nlen = EXT4_DIR_REC_LEN(de->name_len);
++      nlen = EXT4_DIR_REC_LEN(de);
+       rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+       if (de->inode) {
+               struct ext4_dir_entry_2 *de1 =
+@@ -1727,6 +1756,11 @@ void ext4_insert_dentry(struct inode *in
+       ext4_set_de_type(inode->i_sb, de, inode->i_mode);
+       de->name_len = namelen;
+       memcpy(de->name, name, namelen);
++      if (data) {
++              de->name[namelen] = 0;
++              memcpy(&de->name[namelen + 1], data, *(char *)data);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
+ }
+ /*
+  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
+@@ -1745,15 +1779,20 @@ static int add_dirent_to_buf(handle_t *h
+       int             namelen = dentry->d_name.len;
+       unsigned int    blocksize = dir->i_sb->s_blocksize;
+       int             csum_size = 0;
+-      int             err;
++      int             err, dlen = 0;
++      unsigned char   *data;
++      data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
++                                              dentry->d_fsdata);
+       if (ext4_has_metadata_csum(inode->i_sb))
+               csum_size = sizeof(struct ext4_dir_entry_tail);
+       if (!de) {
++              if (data)
++                      dlen = (*data) + 1;
+               err = ext4_find_dest_de(dir, inode,
+                                       bh, bh->b_data, blocksize - csum_size,
+-                                      name, namelen, &de);
++                                      name, namelen, &de, &dlen);
+               if (err)
+                       return err;
+       }
+@@ -1765,7 +1804,10 @@ static int add_dirent_to_buf(handle_t *h
+       }
+       /* By now the buffer is marked for journaling */
+-      ext4_insert_dentry(inode, de, blocksize, name, namelen);
++      /* If writing the short form of "dotdot", don't add the data section */
++      if (dlen == 1)
++              data = NULL;
++      ext4_insert_dentry(inode, de, blocksize, name, namelen, data);
+       /*
+        * XXX shouldn't update any times until successful
+@@ -1877,7 +1919,8 @@ static int make_indexed_dir(handle_t *ha
+       dx_set_block(entries, 1);
+       dx_set_count(entries, 1);
+-      dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
++      dx_set_limit(entries, dx_root_limit(dir,
++                                       dot_de, sizeof(*dx_info)));
+       /* Initialize as for dx_probe */
+       hinfo.hash_version = dx_info->hash_version;
+@@ -1927,6 +1970,8 @@ static int ext4_update_dotdot(handle_t *
+       struct buffer_head *dir_block;
+       struct ext4_dir_entry_2 *de;
+       int len, journal = 0, err = 0;
++      int dlen = 0;
++      char *data;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+@@ -1942,19 +1987,24 @@ static int ext4_update_dotdot(handle_t *
+       /* the first item must be "." */
+       assert(de->name_len == 1 && de->name[0] == '.');
+       len = le16_to_cpu(de->rec_len);
+-      assert(len >= EXT4_DIR_REC_LEN(1));
+-      if (len > EXT4_DIR_REC_LEN(1)) {
++      assert(len >= __EXT4_DIR_REC_LEN(1));
++      if (len > __EXT4_DIR_REC_LEN(1)) {
+               BUFFER_TRACE(dir_block, "get_write_access");
+               err = ext4_journal_get_write_access(handle, dir_block);
+               if (err)
+                       goto out_journal;
+               journal = 1;
+-              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
++              de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
+       }
+-      len -= EXT4_DIR_REC_LEN(1);
+-      assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
++      len -= EXT4_DIR_REC_LEN(de);
++      data = ext4_dentry_get_data(dir->i_sb,
++                      (struct ext4_dentry_param *)dentry->d_fsdata);
++      if (data)
++              dlen = *data + 1;
++      assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
++
+       de = (struct ext4_dir_entry_2 *)
+                       ((char *) de + le16_to_cpu(de->rec_len));
+       if (!journal) {
+@@ -1968,10 +2018,15 @@ static int ext4_update_dotdot(handle_t *
+       if (len > 0)
+               de->rec_len = cpu_to_le16(len);
+       else
+-              assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
++              assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
+       de->name_len = 2;
+       strcpy(de->name, "..");
+-      ext4_set_de_type(dir->i_sb, de, S_IFDIR);
++      if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) {
++              de->name[2] = 0;
++              memcpy(&de->name[2 + 1], data, *data);
++              ext4_set_de_type(dir->i_sb, de, S_IFDIR);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
+ out_journal:
+       if (journal) {
+@@ -2445,37 +2500,70 @@ retry:
+       return err;
+ }
++struct tp_block {
++      struct inode *inode;
++      void *data1;
++      void *data2;
++};
++
+ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
+                         struct ext4_dir_entry_2 *de,
+                         int blocksize, int csum_size,
+                         unsigned int parent_ino, int dotdot_real_len)
+ {
++      void *data1 = NULL, *data2 = NULL;
++      int dot_reclen = 0;
++
++      if (dotdot_real_len == 10) {
++              struct tp_block *tpb = (struct tp_block *)inode;
++              data1 = tpb->data1;
++              data2 = tpb->data2;
++              inode = tpb->inode;
++              dotdot_real_len = 0;
++      }
+       de->inode = cpu_to_le32(inode->i_ino);
+       de->name_len = 1;
+-      de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+-                                         blocksize);
+       strcpy(de->name, ".");
+       ext4_set_de_type(inode->i_sb, de, S_IFDIR);
++      /* get packed fid data*/
++      data1 = ext4_dentry_get_data(inode->i_sb,
++                              (struct ext4_dentry_param *) data1);
++      if (data1) {
++              de->name[1] = 0;
++              memcpy(&de->name[2], data1, *(char *) data1);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
++      de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
++      dot_reclen = cpu_to_le16(de->rec_len);
+       de = ext4_next_entry(de, blocksize);
+       de->inode = cpu_to_le32(parent_ino);
+       de->name_len = 2;
++      strcpy(de->name, "..");
++      ext4_set_de_type(inode->i_sb, de, S_IFDIR);
++      data2 = ext4_dentry_get_data(inode->i_sb,
++                      (struct ext4_dentry_param *) data2);
++      if (data2) {
++              de->name[2] = 0;
++              memcpy(&de->name[3], data2, *(char *) data2);
++              de->file_type |= EXT4_DIRENT_LUFID;
++      }
+       if (!dotdot_real_len)
+               de->rec_len = ext4_rec_len_to_disk(blocksize -
+-                                      (csum_size + EXT4_DIR_REC_LEN(1)),
++                                      (csum_size + dot_reclen),
+                                       blocksize);
+       else
+               de->rec_len = ext4_rec_len_to_disk(
+-                              EXT4_DIR_REC_LEN(de->name_len), blocksize);
+-      strcpy(de->name, "..");
+-      ext4_set_de_type(inode->i_sb, de, S_IFDIR);
++                              EXT4_DIR_REC_LEN(de), blocksize);
+       return ext4_next_entry(de, blocksize);
+ }
+ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+-                           struct inode *inode)
++                           struct inode *inode,
++                           const void *data1, const void *data2)
+ {
++      struct tp_block param;
+       struct buffer_head *dir_block = NULL;
+       struct ext4_dir_entry_2 *de;
+       struct ext4_dir_entry_tail *t;
+@@ -2500,7 +2588,11 @@ static int ext4_init_new_dir(handle_t *h
+       if (IS_ERR(dir_block))
+               return PTR_ERR(dir_block);
+       de = (struct ext4_dir_entry_2 *)dir_block->b_data;
+-      ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
++      param.inode = inode;
++      param.data1 = (void *)data1;
++      param.data2 = (void *)data2;
++      ext4_init_dot_dotdot((struct inode *)(&param), de, blocksize,
++                           csum_size, dir->i_ino, 10);
+       set_nlink(inode, 2);
+       if (csum_size) {
+               t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
+@@ -2517,6 +2609,29 @@ out:
+       return err;
+ }
++/* Initialize @inode as a subdirectory of @dir, and add the
++ * "." and ".." entries into the first directory block. */
++int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
++                      struct inode *inode,
++                      const void *data1, const void *data2)
++{
++      int rc;
++
++      if (IS_ERR(handle))
++              return PTR_ERR(handle);
++
++      if (IS_DIRSYNC(dir))
++              ext4_handle_sync(handle);
++
++      inode->i_op = &ext4_dir_inode_operations.ops;
++      inode->i_fop = &ext4_dir_operations;
++      rc = ext4_init_new_dir(handle, dir, inode, data1, data2);
++      if (!rc)
++              rc = ext4_mark_inode_dirty(handle, inode);
++      return rc;
++}
++EXPORT_SYMBOL(ext4_add_dot_dotdot);
++
+ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+ {
+       handle_t *handle;
+@@ -2542,7 +2657,7 @@ retry:
+       inode->i_op = &ext4_dir_inode_operations.ops;
+       inode->i_fop = &ext4_dir_operations;
+       inode->i_flags |= S_IOPS_WRAPPER;
+-      err = ext4_init_new_dir(handle, dir, inode);
++      err = ext4_init_new_dir(handle, dir, inode, NULL, NULL);
+       if (err)
+               goto out_clear_inode;
+       err = ext4_mark_inode_dirty(handle, inode);
+@@ -2594,7 +2709,7 @@ static int empty_dir(struct inode *inode
+       }
+       sb = inode->i_sb;
+-      if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
++      if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2)) {
+               EXT4_ERROR_INODE(inode, "invalid size");
+               return 1;
+       }
+Index: linux-stage/fs/ext4/inline.c
+===================================================================
+--- linux-stage.orig/fs/ext4/inline.c
++++ linux-stage/fs/ext4/inline.c
+@@ -1006,7 +1006,7 @@ static int ext4_add_dirent_to_inline(han
+       err = ext4_find_dest_de(dir, inode, iloc->bh,
+                               inline_start, inline_size,
+-                              name, namelen, &de);
++                              name, namelen, &de, NULL);
+       if (err)
+               return err;
+@@ -1014,7 +1014,7 @@ static int ext4_add_dirent_to_inline(han
+       err = ext4_journal_get_write_access(handle, iloc->bh);
+       if (err)
+               return err;
+-      ext4_insert_dentry(inode, de, inline_size, name, namelen);
++      ext4_insert_dentry(inode, de, inline_size, name, namelen, NULL);
+       ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
+@@ -1084,7 +1084,7 @@ static int ext4_update_inline_dir(handle
+       int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
+       int new_size = get_max_inline_xattr_value_size(dir, iloc);
+-      if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
++      if (new_size - old_size <= __EXT4_DIR_REC_LEN(1))
+               return -ENOSPC;
+       ret = ext4_update_inline_data(handle, dir,
+@@ -1365,7 +1365,7 @@ int htree_inlinedir_to_tree(struct file
+                       fake.name_len = 1;
+                       strcpy(fake.name, ".");
+                       fake.rec_len = ext4_rec_len_to_disk(
+-                                              EXT4_DIR_REC_LEN(fake.name_len),
++                                              EXT4_DIR_REC_LEN(&fake),
+                                               inline_size);
+                       ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+                       de = &fake;
+@@ -1375,7 +1375,7 @@ int htree_inlinedir_to_tree(struct file
+                       fake.name_len = 2;
+                       strcpy(fake.name, "..");
+                       fake.rec_len = ext4_rec_len_to_disk(
+-                                              EXT4_DIR_REC_LEN(fake.name_len),
++                                              EXT4_DIR_REC_LEN(&fake),
+                                               inline_size);
+                       ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
+                       de = &fake;
+@@ -1473,8 +1473,8 @@ int ext4_read_inline_dir(struct file *fi
+        * So we will use extra_offset and extra_size to indicate them
+        * during the inline dir iteration.
+        */
+-      dotdot_offset = EXT4_DIR_REC_LEN(1);
+-      dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
++      dotdot_offset = __EXT4_DIR_REC_LEN(1);
++      dotdot_size = dotdot_offset + __EXT4_DIR_REC_LEN(2);
+       extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
+       extra_size = extra_offset + inline_size;
+@@ -1511,7 +1511,7 @@ revalidate:
+                                * failure will be detected in the
+                                * dirent test below. */
+                               if (ext4_rec_len_from_disk(de->rec_len,
+-                                      extra_size) < EXT4_DIR_REC_LEN(1))
++                                      extra_size) < __EXT4_DIR_REC_LEN(1))
+                                       break;
+                               i += ext4_rec_len_from_disk(de->rec_len,
+                                                           extra_size);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1155,7 +1155,7 @@ enum {
+       Opt_data_err_abort, Opt_data_err_ignore,
+       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+-      Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
++      Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
+       Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+       Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
+@@ -1223,6 +1223,7 @@ static const match_table_t tokens = {
+       {Opt_stripe, "stripe=%u"},
+       {Opt_delalloc, "delalloc"},
+       {Opt_nodelalloc, "nodelalloc"},
++      {Opt_dirdata, "dirdata"},
+       {Opt_removed, "mblk_io_submit"},
+       {Opt_removed, "nomblk_io_submit"},
+       {Opt_block_validity, "block_validity"},
+@@ -1436,6 +1437,7 @@ static const struct mount_opts {
+       {Opt_usrjquota, 0, MOPT_Q},
+       {Opt_grpjquota, 0, MOPT_Q},
+       {Opt_offusrjquota, 0, MOPT_Q},
++      {Opt_dirdata, EXT4_MOUNT_DIRDATA, MOPT_SET},
+       {Opt_offgrpjquota, 0, MOPT_Q},
+       {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
+       {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
diff --git a/ldiskfs/kernel_patches/patches/rhel7.3/ext4-disable-mb-cache.patch b/ldiskfs/kernel_patches/patches/rhel7.3/ext4-disable-mb-cache.patch
new file mode 100644 (file)
index 0000000..49bb23e
--- /dev/null
@@ -0,0 +1,157 @@
+mbcache provides absolutely no value for Lustre xattrs (because
+they are unique and cannot be shared between files) and as we can
+see it has a noticable overhead in some cases. In the past there
+was a CONFIG_MBCACHE option that would allow it to be disabled,
+but this was removed in newer kernels, so we will need to patch
+ldiskfs to fix this.
+
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -963,6 +963,7 @@ struct ext4_inode_info {
+ /*
+  * Mount flags set via mount options or defaults
+  */
++#define EXT4_MOUNT_NO_MBCACHE         0x00001 /* Disable mbcache */
+ #define EXT4_MOUNT_DIRDATA            0x00002 /* Data in directory entries*/
+ #define EXT4_MOUNT_GRPID              0x00004 /* Create files with directory's group */
+ #define EXT4_MOUNT_DEBUG              0x00008 /* Some debugging messages */
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1161,6 +1161,7 @@ enum {
+       Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_dioread_nolock, Opt_dioread_lock,
++      Opt_no_mbcache,
+       Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+       Opt_max_dir_size_kb, Opt_nojournal_checksum,
+ };
+@@ -1238,6 +1239,7 @@ static const match_table_t tokens = {
+       {Opt_discard, "discard"},
+       {Opt_nodiscard, "nodiscard"},
+       {Opt_init_itable, "init_itable=%u"},
++      {Opt_no_mbcache, "no_mbcache"},
+       {Opt_init_itable, "init_itable"},
+       {Opt_noinit_itable, "noinit_itable"},
+       {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+@@ -1400,6 +1402,7 @@ static const struct mount_opts {
+       {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
+       {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
+       {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
++      {Opt_no_mbcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+       {Opt_commit, 0, MOPT_GTE0},
+       {Opt_max_batch_time, 0, MOPT_GTE0},
+       {Opt_min_batch_time, 0, MOPT_GTE0},
+Index: linux-stage/fs/ext4/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext4/xattr.c
++++ linux-stage/fs/ext4/xattr.c
+@@ -81,7 +81,8 @@
+ # define ea_bdebug(bh, fmt, ...)      no_printk(fmt, ##__VA_ARGS__)
+ #endif
+-static void ext4_xattr_cache_insert(struct buffer_head *);
++static void ext4_xattr_cache_insert(struct super_block *,
++                                  struct buffer_head *);
+ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
+                                                struct ext4_xattr_header *,
+                                                struct mb_cache_entry **);
+@@ -405,7 +406,7 @@ bad_block:
+               error = -EIO;
+               goto cleanup;
+       }
+-      ext4_xattr_cache_insert(bh);
++      ext4_xattr_cache_insert(inode->i_sb, bh);
+       entry = BFIRST(bh);
+       error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
+                                     inode);
+@@ -569,7 +570,7 @@ ext4_xattr_block_list(struct dentry *den
+               error = -EIO;
+               goto cleanup;
+       }
+-      ext4_xattr_cache_insert(bh);
++      ext4_xattr_cache_insert(inode->i_sb, bh);
+       error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
+ cleanup:
+@@ -667,7 +668,9 @@ ext4_xattr_release_block(handle_t *handl
+       struct mb_cache_entry *ce = NULL;
+       int error = 0;
+-      ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
++      if (!test_opt(inode->i_sb, NO_MBCACHE))
++              ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev,
++                                      bh->b_blocknr);
+       BUFFER_TRACE(bh, "get_write_access");
+       error = ext4_journal_get_write_access(handle, bh);
+       if (error)
+@@ -1082,8 +1085,10 @@ ext4_xattr_block_set(handle_t *handle, s
+ #define header(x) ((struct ext4_xattr_header *)(x))
+       if (s->base) {
+-              ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
+-                                      bs->bh->b_blocknr);
++              if (!test_opt(inode->i_sb, NO_MBCACHE))
++                      ce = mb_cache_entry_get(ext4_xattr_cache,
++                                              bs->bh->b_bdev,
++                                              bs->bh->b_blocknr);
+               BUFFER_TRACE(bs->bh, "get_write_access");
+               error = ext4_journal_get_write_access(handle, bs->bh);
+               if (error)
+@@ -1101,7 +1106,7 @@ ext4_xattr_block_set(handle_t *handle, s
+                               if (!IS_LAST_ENTRY(s->first))
+                                       ext4_xattr_rehash(header(s->base),
+                                                         s->here);
+-                              ext4_xattr_cache_insert(bs->bh);
++                              ext4_xattr_cache_insert(sb, bs->bh);
+                       }
+                       unlock_buffer(bs->bh);
+                       if (error == -EIO)
+@@ -1185,7 +1190,8 @@ inserted:
+                               if (error)
+                                       goto cleanup_dquot;
+                       }
+-                      mb_cache_entry_release(ce);
++                      if (ce)
++                              mb_cache_entry_release(ce);
+                       ce = NULL;
+               } else if (bs->bh && s->base == bs->bh->b_data) {
+                       /* We were modifying this block in-place. */
+@@ -1238,7 +1244,7 @@ getblk_failed:
+                       memcpy(new_bh->b_data, s->base, new_bh->b_size);
+                       set_buffer_uptodate(new_bh);
+                       unlock_buffer(new_bh);
+-                      ext4_xattr_cache_insert(new_bh);
++                      ext4_xattr_cache_insert(sb, new_bh);
+                       error = ext4_handle_dirty_xattr_block(handle,
+                                                             inode, new_bh);
+                       if (error)
+@@ -2022,12 +2028,15 @@ ext4_xattr_put_super(struct super_block
+  * Returns 0, or a negative error number on failure.
+  */
+ static void
+-ext4_xattr_cache_insert(struct buffer_head *bh)
++ext4_xattr_cache_insert(struct super_block *sb, struct buffer_head *bh)
+ {
+       __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+       struct mb_cache_entry *ce;
+       int error;
++      if (test_opt(sb, NO_MBCACHE))
++              return;
++
+       ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS);
+       if (!ce) {
+               ea_bdebug(bh, "out of memory");
+@@ -2100,6 +2109,8 @@ ext4_xattr_cache_find(struct inode *inod
+       __u32 hash = le32_to_cpu(header->h_hash);
+       struct mb_cache_entry *ce;
++      if (test_opt(inode->i_sb, NO_MBCACHE))
++              return NULL;
+       if (!header->h_hash)
+               return NULL;  /* never share */
+       ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series
new file mode 100644 (file)
index 0000000..1dfd3e5
--- /dev/null
@@ -0,0 +1,24 @@
+rhel7/ext4-inode-version.patch
+rhel7/ext4-lookup-dotdot.patch
+rhel6.3/ext4-print-inum-in-htree-warning.patch
+rhel7/ext4-prealloc.patch
+rhel7/ext4-mballoc-extra-checks.patch
+rhel7/ext4-misc.patch
+rhel7/ext4-osd-iop-common.patch
+rhel7/ext4-hash-indexed-dir-dotdot-update.patch
+rhel7/ext4-kill-dx-root.patch
+rhel7/ext4-mballoc-pa-free-mismatch.patch
+rhel7.3/ext4-data-in-dirent.patch
+rhel7.2/ext4-large-eas.patch
+rhel7.3/ext4-disable-mb-cache.patch
+rhel7/ext4-nocmtime.patch
+rhel7/ext4-large-dir.patch
+rhel7.2/ext4-pdirop.patch
+rhel7/ext4-max-dir-size.patch
+rhel7/ext4-remove-truncate-warning.patch
+rhel7.3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
+rhel7/ext4-give-warning-with-dir-htree-growing.patch
+rhel7/ext4-mmp-brelse.patch
+rhel7/ext4-jcb-optimization.patch
+rhel7/ext4_s_max_ext_tree_depth.patch
+rhel7.2/ext4-release-bh-in-makeinxdir.patch