Whamcloud - gitweb
LU-13520 ldiskfs: fastpath in bitmap prefetching 13/38513/8
authorAlex Zhuravlev <bzzz@whamcloud.com>
Wed, 6 May 2020 12:25:32 +0000 (15:25 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 6 Jun 2020 14:02:55 +0000 (14:02 +0000)
getblk() can be very expensive if many threads are trying to find
specific block which can happen when threads are trying to prefetch
same set of block bitmaps (where only the one wins). use atomic
bitset to prevent this situation.

# mpirun -np 640 mdtest -D -C -r -u -n 1000 -vv -p 10 -i 3 -d /mdt0
               Max        Min        Mean       Std Dev
before b7cd65  76243.849  62728.925  69264.981  5525.345
after b7cd65   44270.444  42144.617  43138.707   873.040
this patch     83171.845  71796.197  77274.256  4652.121

Fixes: b7cd65a3d1 ("LU-12988 ldiskfs: mballoc to prefetch groups")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I3194aa0e13f22a1f34f5df846cb4b15feba5f432
Reviewed-on: https://review.whamcloud.com/38513
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@hpe.com>
ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch
ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch
ldiskfs/kernel_patches/patches/rhel8.1/ext4-mballoc-prefetch.patch
ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch

index b68880c..dd9baa9 100644 (file)
  extern int ext4_wait_block_bitmap(struct super_block *sb,
                                  ext4_group_t block_group,
                                  struct buffer_head *bh);
+@@ -3047,6 +3051,7 @@ struct ext4_group_info {
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
+       (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT      4
+
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+@@ -3065,6 +3070,8 @@ struct ext4_group_info {
+       (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+ #define EXT4_MB_GRP_CLEAR_TRIMMED(grp)        \
+       (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp)    \
++      (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
+ #define EXT4_MAX_CONTENTION           8
+ #define EXT4_CONTENTION_THRESHOLD     2
 --- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
 +++ linux-4.18/fs/ext4/mballoc.c       2019-12-02 11:21:46.656897291 +0300
 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct
@@ -65,7 +82,7 @@
                        err = -ENOMEM;
                        goto out;
                }
-@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct
+@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct
        return 0;
  }
  
 +      }
 +      while (nr-- > 0) {
 +              grp = ext4_get_group_info(sb, group);
++              /* prevent expensive getblk() on groups w/ IO in progress */
++              if (EXT4_MB_GRP_TEST_AND_SET_READ(grp))
++                      goto next;
++
 +              /* ignore empty groups - those will be skipped
 +               * during the scanning as well */
 +              if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
 +                              brelse(bh);
 +                      }
 +              }
++next:
 +              if (++group >= ngroups)
 +                      group = 0;
 +      }
index dd80c6a..5bb1d04 100644 (file)
  extern int ext4_wait_block_bitmap(struct super_block *sb,
                                  ext4_group_t block_group,
                                  struct buffer_head *bh);
+@@ -3047,6 +3051,7 @@ struct ext4_group_info {
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
+       (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT      4
+
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+@@ -3065,6 +3070,8 @@ struct ext4_group_info {
+       (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+ #define EXT4_MB_GRP_CLEAR_TRIMMED(grp)        \
+       (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp)    \
++      (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
+ #define EXT4_MAX_CONTENTION           8
+ #define EXT4_CONTENTION_THRESHOLD     2
 --- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
 +++ linux-4.18/fs/ext4/mballoc.c       2019-12-02 11:21:46.656897291 +0300
 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct
@@ -65,7 +82,7 @@
                        err = -ENOMEM;
                        goto out;
                }
-@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct
+@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct
        return 0;
  }
  
 +      }
 +      while (nr-- > 0) {
 +              grp = ext4_get_group_info(sb, group);
++              /* prevent expensive getblk() on groups w/ IO in progress */
++              if (EXT4_MB_GRP_TEST_AND_SET_READ(grp))
++                      goto next;
++
 +              /* ignore empty groups - those will be skipped
 +               * during the scanning as well */
 +              if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
 +                              brelse(bh);
 +                      }
 +              }
++next:
 +              if (++group >= ngroups)
 +                      group = 0;
 +      }
index 2690a99..e0715dd 100644 (file)
  extern int ext4_wait_block_bitmap(struct super_block *sb,
                                  ext4_group_t block_group,
                                  struct buffer_head *bh);
+@@ -3047,6 +3051,7 @@ struct ext4_group_info {
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
+       (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT      4
+
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+@@ -3065,6 +3070,8 @@ struct ext4_group_info {
+       (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+ #define EXT4_MB_GRP_CLEAR_TRIMMED(grp)        \
+       (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp)    \
++      (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
+ #define EXT4_MAX_CONTENTION           8
+ #define EXT4_CONTENTION_THRESHOLD     2
 --- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
 +++ linux-4.18/fs/ext4/mballoc.c       2019-12-02 11:21:46.656897291 +0300
 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct
@@ -65,7 +82,7 @@
                if (IS_ERR(bh[i])) {
                        err = PTR_ERR(bh[i]);
                        bh[i] = NULL;
-@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct
+@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct
        return 0;
  }
  
 +      }
 +      while (nr-- > 0) {
 +              grp = ext4_get_group_info(sb, group);
++              /* prevent expensive getblk() on groups w/ IO in progress */
++              if (EXT4_MB_GRP_TEST_AND_SET_READ(grp))
++                      goto next;
++
 +              /* ignore empty groups - those will be skipped
 +               * during the scanning as well */
 +              if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
 +                              brelse(bh);
 +                      }
 +              }
++next:
 +              if (++group >= ngroups)
 +                      group = 0;
 +      }
index 8f7dd1a..a382280 100644 (file)
  extern int ext4_wait_block_bitmap(struct super_block *sb,
                                  ext4_group_t block_group,
                                  struct buffer_head *bh);
+@@ -3047,6 +3051,7 @@ struct ext4_group_info {
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
+ #define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
+       (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT      4
+
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+@@ -3065,6 +3070,8 @@ struct ext4_group_info {
+       (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+ #define EXT4_MB_GRP_CLEAR_TRIMMED(grp)        \
+       (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp)    \
++      (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
+ #define EXT4_MAX_CONTENTION           8
+ #define EXT4_CONTENTION_THRESHOLD     2
 --- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
 +++ linux-4.18/fs/ext4/mballoc.c       2019-12-02 11:21:46.656897291 +0300
 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct
@@ -65,7 +82,7 @@
                if (IS_ERR(bh[i])) {
                        err = PTR_ERR(bh[i]);
                        bh[i] = NULL;
-@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct
+@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct
        return 0;
  }
  
 +      }
 +      while (nr-- > 0) {
 +              grp = ext4_get_group_info(sb, group);
++              /* prevent expensive getblk() on groups w/ IO in progress */
++              if (EXT4_MB_GRP_TEST_AND_SET_READ(grp))
++                      goto next;
++
 +              /* ignore empty groups - those will be skipped
 +               * during the scanning as well */
 +              if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
 +                              brelse(bh);
 +                      }
 +              }
++next:
 +              if (++group >= ngroups)
 +                      group = 0;
 +      }