From 6a15798f51a9dcb29333e8107a59c5551f607fff Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Wed, 6 May 2020 15:25:32 +0300 Subject: [PATCH] LU-13520 ldiskfs: fastpath in bitmap prefetching getblk() can be very expensive if many threads are trying to find specific block which can happen when threads are trying to prefetch same set of block bitmaps (where only the one wins). use atomic bitset to prevent this situation. # mpirun -np 640 mdtest -D -C -r -u -n 1000 -vv -p 10 -i 3 -d /mdt0 Max Min Mean Std Dev before b7cd65 76243.849 62728.925 69264.981 5525.345 after b7cd65 44270.444 42144.617 43138.707 873.040 this patch 83171.845 71796.197 77274.256 4652.121 Fixes: b7cd65a3d1 ("LU-12988 ldiskfs: mballoc to prefetch groups") Signed-off-by: Alex Zhuravlev Change-Id: I3194aa0e13f22a1f34f5df846cb4b15feba5f432 Reviewed-on: https://review.whamcloud.com/38513 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Wang Shilong Reviewed-by: Artem Blagodarenko --- .../patches/rhel7.6/ext4-mballoc-prefetch.patch | 24 +++++++++++++++++++++- .../patches/rhel7.7/ext4-mballoc-prefetch.patch | 24 +++++++++++++++++++++- .../patches/rhel8.1/ext4-mballoc-prefetch.patch | 24 +++++++++++++++++++++- .../patches/rhel8/ext4-mballoc-prefetch.patch | 24 +++++++++++++++++++++- 4 files changed, 92 insertions(+), 4 deletions(-) diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch index b68880c..dd9baa9 100644 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch +++ b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch @@ -54,6 +54,23 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); +@@ -3047,6 +3051,7 @@ struct ext4_group_info { + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ + (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) ++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 + + #define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +@@ -3065,6 +3070,8 @@ struct ext4_group_info { + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) ++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ ++ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) + + #define EXT4_MAX_CONTENTION 8 + #define EXT4_CONTENTION_THRESHOLD 2 --- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 +++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct @@ -65,7 +82,7 @@ err = -ENOMEM; goto out; } -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct +@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct return 0; } @@ -110,6 +127,10 @@ + } + while (nr-- > 0) { + grp = ext4_get_group_info(sb, group); ++ /* prevent expensive getblk() on groups w/ IO in progress */ ++ if (EXT4_MB_GRP_TEST_AND_SET_READ(grp)) ++ goto next; ++ + /* ignore empty groups - those will be skipped + * during the scanning as well */ + if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { @@ -120,6 +141,7 @@ + brelse(bh); + } + } ++next: + if (++group >= ngroups) + group = 0; + } diff --git a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch index dd80c6a..5bb1d04 100644 --- a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch +++ b/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch @@ -54,6 +54,23 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); +@@ -3047,6 +3051,7 @@ struct ext4_group_info { + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ + (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) ++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 + + #define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +@@ -3065,6 +3070,8 @@ struct ext4_group_info { + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) ++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ ++ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) + + #define EXT4_MAX_CONTENTION 8 + #define EXT4_CONTENTION_THRESHOLD 2 --- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 +++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct @@ -65,7 +82,7 @@ err = -ENOMEM; goto out; } -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct +@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct return 0; } @@ -110,6 +127,10 @@ + } + while (nr-- > 0) { + grp = ext4_get_group_info(sb, group); ++ /* prevent expensive getblk() on groups w/ IO in progress */ ++ if (EXT4_MB_GRP_TEST_AND_SET_READ(grp)) ++ goto next; ++ + /* ignore empty groups - those will be skipped + * during the scanning as well */ + if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { @@ -120,6 +141,7 @@ + brelse(bh); + } + } ++next: + if (++group >= ngroups) + group = 0; + } diff --git a/ldiskfs/kernel_patches/patches/rhel8.1/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel8.1/ext4-mballoc-prefetch.patch index 2690a99..e0715dd 100644 --- a/ldiskfs/kernel_patches/patches/rhel8.1/ext4-mballoc-prefetch.patch +++ b/ldiskfs/kernel_patches/patches/rhel8.1/ext4-mballoc-prefetch.patch @@ -54,6 +54,23 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); +@@ -3047,6 +3051,7 @@ struct ext4_group_info { + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ + (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) ++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 + + #define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +@@ -3065,6 +3070,8 @@ struct ext4_group_info { + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) ++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ ++ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) + + #define EXT4_MAX_CONTENTION 8 + #define EXT4_CONTENTION_THRESHOLD 2 --- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 +++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct @@ -65,7 +82,7 @@ if (IS_ERR(bh[i])) { err = PTR_ERR(bh[i]); bh[i] = NULL; -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct +@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct return 0; } @@ -110,6 +127,10 @@ + } + while (nr-- > 0) { + grp = ext4_get_group_info(sb, group); ++ /* prevent expensive getblk() on groups w/ IO in progress */ ++ if (EXT4_MB_GRP_TEST_AND_SET_READ(grp)) ++ goto next; ++ + /* ignore empty groups - those will be skipped + * during the scanning as well */ + if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { @@ -120,6 +141,7 @@ + brelse(bh); + } + } ++next: + if (++group >= ngroups) + group = 0; + } diff --git a/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch index 8f7dd1a..a382280 100644 --- a/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch +++ b/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch @@ -54,6 +54,23 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, struct buffer_head *bh); +@@ -3047,6 +3051,7 @@ struct ext4_group_info { + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 + #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ + (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) ++#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 + + #define EXT4_MB_GRP_NEED_INIT(grp) \ + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +@@ -3065,6 +3070,8 @@ struct ext4_group_info { + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) + #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) ++#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ ++ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) + + #define EXT4_MAX_CONTENTION 8 + #define EXT4_CONTENTION_THRESHOLD 2 --- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 +++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 @@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct @@ -65,7 +82,7 @@ if (IS_ERR(bh[i])) { err = PTR_ERR(bh[i]); bh[i] = NULL; -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct +@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct return 0; } @@ -110,6 +127,10 @@ + } + while (nr-- > 0) { + grp = ext4_get_group_info(sb, group); ++ /* prevent expensive getblk() on groups w/ IO in progress */ ++ if (EXT4_MB_GRP_TEST_AND_SET_READ(grp)) ++ goto next; ++ + /* ignore empty groups - those will be skipped + * during the scanning as well */ + if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { @@ -120,6 +141,7 @@ + brelse(bh); + } + } ++next: + if (++group >= ngroups) + group = 0; + } -- 1.8.3.1