From 2c5700fcb4cb15056dc901fedf97001d9b9fd845 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Wed, 19 Feb 2020 11:12:31 +0300 Subject: [PATCH 1/1] LU-12988 ldiskfs: revert prefetch patch as a problem leading to IO errors was found. also, the patch for 4.18 kernel needs fixes. Revert "LU-12988 ldiskfs: mballoc to prefetch groups" This reverts commit 05f31782be20fc4c46082dba02c10bcea59539e3. Signed-off-by: Alex Zhuravlev Change-Id: I478a011e561633516524697f3a4aa03734791790 Reviewed-on: https://review.whamcloud.com/37619 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- .../patches/rhel7.7/ext4-mballoc-prefetch.patch | 235 -------------------- .../patches/rhel7/ext4-mballoc-prefetch.patch | 235 -------------------- .../patches/rhel8/ext4-mballoc-prefetch.patch | 236 --------------------- .../series/ldiskfs-3.10-rhel7.2.series | 1 - .../series/ldiskfs-3.10-rhel7.3.series | 1 - .../series/ldiskfs-3.10-rhel7.4.series | 1 - .../series/ldiskfs-3.10-rhel7.5.series | 1 - .../series/ldiskfs-3.10-rhel7.6.series | 1 - .../series/ldiskfs-3.10-rhel7.7.series | 1 - .../series/ldiskfs-3.10-rhel7.series | 1 - .../series/ldiskfs-4.18-rhel8.series | 1 - 11 files changed, 714 deletions(-) delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-prefetch.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch deleted file mode 100644 index fb29c72..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-mballoc-prefetch.patch +++ /dev/null @@ -1,235 +0,0 @@ ---- linux-4.18/fs/ext4/balloc.c 2019-11-28 14:55:26.506546036 +0300 -+++ linux-4.18/fs/ext4/balloc.c 2019-12-02 11:21:50.565975537 +0300 -@@ -404,7 +404,8 @@ verified: - * Return buffer_head on success or NULL in case of failure. - */ - struct buffer_head * --ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) -+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, -+ int ignore_locked) - { - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -435,6 +436,13 @@ ext4_read_block_bitmap_nowait(struct - if (bitmap_uptodate(bh)) - goto verify; - -+ if (ignore_locked && buffer_locked(bh)) { -+ /* buffer under IO already, do not wait -+ * if called for prefetching */ -+ put_bh(bh); -+ return NULL; -+ } -+ - lock_buffer(bh); - if (bitmap_uptodate(bh)) { - unlock_buffer(bh); -@@ -524,7 +532,7 @@ ext4_read_block_bitmap(struct super_b - { - struct buffer_head *bh; - -- bh = ext4_read_block_bitmap_nowait(sb, block_group); -+ bh = ext4_read_block_bitmap_nowait(sb, block_group, 1); - if (!bh) - return NULL; - err = ext4_wait_block_bitmap(sb, block_group, bh); ---- linux-4.18/fs/ext4/ext4.h 2019-11-28 14:55:26.470545343 +0300 -+++ linux-4.18/fs/ext4/ext4.h 2019-12-02 11:21:40.795779972 +0300 -@@ -1446,6 +1446,8 @@ struct ext4_sb_info { - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; -+ unsigned int s_mb_prefetch; -+ unsigned int s_mb_prefetch_limit; - - /* stats for buddy allocator */ - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -@@ -2401,7 +2403,8 @@ extern struct ext4_group_desc * ldisk - extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); - - extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, -- ext4_group_t block_group); -+ ext4_group_t block_group, -+ int ignore_locked); - extern int ext4_wait_block_bitmap(struct super_block *sb, - ext4_group_t block_group, - struct buffer_head *bh); ---- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 -+++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 -@@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct - bh[i] = NULL; - continue; - } -- if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { -+ if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group, 0))) { - err = -ENOMEM; - goto out; - } -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct - return 0; - } - -+/* -+ * each allocation context (i.e. a thread doing allocation) has own -+ * sliding prefetch window of @s_mb_prefetch size which starts at the -+ * very first goal and moves ahead of scaning. -+ * a side effect is that subsequent allocations will likely find -+ * the bitmaps in cache or at least in-flight. -+ */ -+static void -+ext4_mb_prefetch(struct ext4_allocation_context *ac, -+ ext4_group_t start) -+{ -+ struct super_block *sb = ac->ac_sb; -+ ext4_group_t ngroups = ext4_get_groups_count(sb); -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ struct ext4_group_info *grp; -+ ext4_group_t group = start; -+ struct buffer_head *bh; -+ int nr; -+ -+ /* limit prefetching at cr=0, otherwise mballoc can -+ * spend a lot of time loading imperfect groups */ -+ if (ac->ac_criteria < 2 && ac->ac_prefetch_ios >= sbi->s_mb_prefetch_limit) -+ return; -+ -+ /* batch prefetching to get few READs in flight */ -+ nr = ac->ac_prefetch - group; -+ if (ac->ac_prefetch < group) -+ /* wrapped to the first groups */ -+ nr += ngroups; -+ if (nr > 0) -+ return; -+ BUG_ON(nr < 0); -+ -+ nr = sbi->s_mb_prefetch; -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* align to flex_bg to get more bitmas with a single IO */ -+ nr = (group / sbi->s_mb_prefetch) * sbi->s_mb_prefetch; -+ nr = nr + sbi->s_mb_prefetch - group; -+ } -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(sb, group); -+ /* ignore empty groups - those will be skipped -+ * during the scanning as well */ -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ bh = ext4_read_block_bitmap_nowait(sb, group, 1); -+ if (bh && !IS_ERR(bh)) { -+ if (!buffer_uptodate(bh)) -+ ac->ac_prefetch_ios++; -+ brelse(bh); -+ } -+ } -+ if (++group >= ngroups) -+ group = 0; -+ } -+ ac->ac_prefetch = group; -+} -+ -+static void -+ext4_mb_prefetch_fini(struct ext4_allocation_context *ac) -+{ -+ struct ext4_group_info *grp; -+ ext4_group_t group; -+ int nr, rc; -+ -+ /* initialize last window of prefetched groups */ -+ nr = ac->ac_prefetch_ios; -+ if (nr > EXT4_SB(ac->ac_sb)->s_mb_prefetch) -+ nr = EXT4_SB(ac->ac_sb)->s_mb_prefetch; -+ group = ac->ac_prefetch; -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(ac->ac_sb, group); -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ rc = ext4_mb_init_group(ac->ac_sb, group); -+ if (rc) -+ break; -+ } -+ if (group-- == 0) -+ group = ext4_get_groups_count(ac->ac_sb) - 1; -+ } -+} -+ - static noinline_for_stack int - ext4_mb_regular_allocator(struct ext4_allocation_context *ac) - { -@@ -2176,6 +2264,7 @@ repeat: - * from the goal value specified - */ - group = ac->ac_g_ex.fe_group; -+ ac->ac_prefetch = group; - - for (i = 0; i < ngroups; group++, i++) { - int ret = 0; -@@ -2188,6 +2277,8 @@ repeat: - if (group >= ngroups) - group = 0; - -+ ext4_mb_prefetch(ac, group); -+ - /* This now checks without needing the buddy page */ - ret = ext4_mb_good_group(ac, group, cr); - if (ret <= 0) { -@@ -2260,6 +2351,8 @@ repeat: - out: - if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) - err = first_err; -+ /* use prefetched bitmaps to init buddy so that read info is not lost */ -+ ext4_mb_prefetch_fini(ac); - return err; - } - -@@ -2832,6 +2925,22 @@ int ext4_mb_init(struct super_block * - sbi->s_mb_large_req = sbi->s_stripe * 8; - sbi->s_mb_group_prealloc = sbi->s_stripe * 4; - } -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* a single flex group is supposed to be read by a single IO */ -+ sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; -+ sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ -+ } else { -+ sbi->s_mb_prefetch = 32; -+ } -+ if (sbi->s_mb_prefetch >= ext4_get_groups_count(sb)) -+ sbi->s_mb_prefetch = ext4_get_groups_count(sb); -+ /* now many real IOs to prefetch within a single allocation at cr=0 -+ * given cr=0 is an CPU-related optimization we shouldn't try to -+ * load too many groups, at some point we should start to use what -+ * we've got in memory. -+ * with an average random access time 5ms, it'd take a second to get -+ * 200 groups (* N with flex_bg), so let's make this limit 32 */ -+ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 32; - - sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); - if (sbi->s_locality_groups == NULL) { ---- linux-4.18/fs/ext4/mballoc.h 2019-11-28 14:55:26.471545362 +0300 -+++ linux-4.18/fs/ext4/mballoc.h 2019-12-02 11:21:57.028104886 +0300 -@@ -177,6 +177,8 @@ struct ext4_allocation_context { - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -+ ext4_group_t ac_prefetch; -+ int ac_prefetch_ios; /* number of initialied prefetch IO */ - }; - - #define AC_STATUS_CONTINUE 1 ---- linux-4.18/fs/ext4/super.c 2019-11-28 14:55:26.502545959 +0300 -+++ linux-4.18/fs/ext4/super.c 2019-11-28 20:07:48.104558177 +0300 -@@ -190,6 +190,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_bur - EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); - EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); - EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); - EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); - EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); - EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -@@ -223,6 +224,8 @@ static struct attribute *ext4_attrs[] - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), -+ ATTR_LIST(mb_prefetch), -+ ATTR_LIST(mb_prefetch_limit), - NULL, - }; - diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-prefetch.patch deleted file mode 100644 index b68880c..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-prefetch.patch +++ /dev/null @@ -1,235 +0,0 @@ ---- linux-4.18/fs/ext4/balloc.c 2019-11-28 14:55:26.506546036 +0300 -+++ linux-4.18/fs/ext4/balloc.c 2019-12-02 11:21:50.565975537 +0300 -@@ -404,7 +404,8 @@ verified: - * Return buffer_head on success or NULL in case of failure. - */ - struct buffer_head * --ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) -+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, -+ int ignore_locked) - { - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -435,6 +436,13 @@ ext4_read_block_bitmap_nowait(struct - if (bitmap_uptodate(bh)) - goto verify; - -+ if (ignore_locked && buffer_locked(bh)) { -+ /* buffer under IO already, do not wait -+ * if called for prefetching */ -+ put_bh(bh); -+ return NULL; -+ } -+ - lock_buffer(bh); - if (bitmap_uptodate(bh)) { - unlock_buffer(bh); -@@ -524,7 +532,7 @@ ext4_read_block_bitmap(struct super_b - struct buffer_head *bh; - int err; - -- bh = ext4_read_block_bitmap_nowait(sb, block_group); -+ bh = ext4_read_block_bitmap_nowait(sb, block_group, 1); - if (!bh) - return NULL; - err = ext4_wait_block_bitmap(sb, block_group, bh); ---- linux-4.18/fs/ext4/ext4.h 2019-11-28 14:55:26.470545343 +0300 -+++ linux-4.18/fs/ext4/ext4.h 2019-12-02 11:21:40.795779972 +0300 -@@ -1446,6 +1446,8 @@ struct ext4_sb_info { - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; -+ unsigned int s_mb_prefetch; -+ unsigned int s_mb_prefetch_limit; - - /* stats for buddy allocator */ - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -@@ -2401,7 +2403,8 @@ extern struct ext4_group_desc * ldisk - extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); - - extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, -- ext4_group_t block_group); -+ ext4_group_t block_group, -+ int ignore_locked); - extern int ext4_wait_block_bitmap(struct super_block *sb, - ext4_group_t block_group, - struct buffer_head *bh); ---- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 -+++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 -@@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct - bh[i] = NULL; - continue; - } -- if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { -+ if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group, 0))) { - err = -ENOMEM; - goto out; - } -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct - return 0; - } - -+/* -+ * each allocation context (i.e. a thread doing allocation) has own -+ * sliding prefetch window of @s_mb_prefetch size which starts at the -+ * very first goal and moves ahead of scaning. -+ * a side effect is that subsequent allocations will likely find -+ * the bitmaps in cache or at least in-flight. -+ */ -+static void -+ext4_mb_prefetch(struct ext4_allocation_context *ac, -+ ext4_group_t start) -+{ -+ struct super_block *sb = ac->ac_sb; -+ ext4_group_t ngroups = ext4_get_groups_count(sb); -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ struct ext4_group_info *grp; -+ ext4_group_t group = start; -+ struct buffer_head *bh; -+ int nr; -+ -+ /* limit prefetching at cr=0, otherwise mballoc can -+ * spend a lot of time loading imperfect groups */ -+ if (ac->ac_criteria < 2 && ac->ac_prefetch_ios >= sbi->s_mb_prefetch_limit) -+ return; -+ -+ /* batch prefetching to get few READs in flight */ -+ nr = ac->ac_prefetch - group; -+ if (ac->ac_prefetch < group) -+ /* wrapped to the first groups */ -+ nr += ngroups; -+ if (nr > 0) -+ return; -+ BUG_ON(nr < 0); -+ -+ nr = sbi->s_mb_prefetch; -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* align to flex_bg to get more bitmas with a single IO */ -+ nr = (group / sbi->s_mb_prefetch) * sbi->s_mb_prefetch; -+ nr = nr + sbi->s_mb_prefetch - group; -+ } -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(sb, group); -+ /* ignore empty groups - those will be skipped -+ * during the scanning as well */ -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ bh = ext4_read_block_bitmap_nowait(sb, group, 1); -+ if (bh && !IS_ERR(bh)) { -+ if (!buffer_uptodate(bh)) -+ ac->ac_prefetch_ios++; -+ brelse(bh); -+ } -+ } -+ if (++group >= ngroups) -+ group = 0; -+ } -+ ac->ac_prefetch = group; -+} -+ -+static void -+ext4_mb_prefetch_fini(struct ext4_allocation_context *ac) -+{ -+ struct ext4_group_info *grp; -+ ext4_group_t group; -+ int nr, rc; -+ -+ /* initialize last window of prefetched groups */ -+ nr = ac->ac_prefetch_ios; -+ if (nr > EXT4_SB(ac->ac_sb)->s_mb_prefetch) -+ nr = EXT4_SB(ac->ac_sb)->s_mb_prefetch; -+ group = ac->ac_prefetch; -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(ac->ac_sb, group); -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ rc = ext4_mb_init_group(ac->ac_sb, group); -+ if (rc) -+ break; -+ } -+ if (group-- == 0) -+ group = ext4_get_groups_count(ac->ac_sb) - 1; -+ } -+} -+ - static noinline_for_stack int - ext4_mb_regular_allocator(struct ext4_allocation_context *ac) - { -@@ -2176,6 +2264,7 @@ repeat: - * from the goal value specified - */ - group = ac->ac_g_ex.fe_group; -+ ac->ac_prefetch = group; - - for (i = 0; i < ngroups; group++, i++) { - int ret = 0; -@@ -2188,6 +2277,8 @@ repeat: - if (group >= ngroups) - group = 0; - -+ ext4_mb_prefetch(ac, group); -+ - /* This now checks without needing the buddy page */ - ret = ext4_mb_good_group(ac, group, cr); - if (ret <= 0) { -@@ -2260,6 +2351,8 @@ repeat: - } - } - out: -+ /* use prefetched bitmaps to init buddy so that read info is not lost */ -+ ext4_mb_prefetch_fini(ac); - return err; - } - -@@ -2832,6 +2925,22 @@ int ext4_mb_init(struct super_block * - sbi->s_mb_large_req = sbi->s_stripe * 8; - sbi->s_mb_group_prealloc = sbi->s_stripe * 4; - } -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* a single flex group is supposed to be read by a single IO */ -+ sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; -+ sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ -+ } else { -+ sbi->s_mb_prefetch = 32; -+ } -+ if (sbi->s_mb_prefetch >= ext4_get_groups_count(sb)) -+ sbi->s_mb_prefetch = ext4_get_groups_count(sb); -+ /* now many real IOs to prefetch within a single allocation at cr=0 -+ * given cr=0 is an CPU-related optimization we shouldn't try to -+ * load too many groups, at some point we should start to use what -+ * we've got in memory. -+ * with an average random access time 5ms, it'd take a second to get -+ * 200 groups (* N with flex_bg), so let's make this limit 32 */ -+ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 32; - - sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); - if (sbi->s_locality_groups == NULL) { ---- linux-4.18/fs/ext4/mballoc.h 2019-11-28 14:55:26.471545362 +0300 -+++ linux-4.18/fs/ext4/mballoc.h 2019-12-02 11:21:57.028104886 +0300 -@@ -177,6 +177,8 @@ struct ext4_allocation_context { - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -+ ext4_group_t ac_prefetch; -+ int ac_prefetch_ios; /* number of initialied prefetch IO */ - }; - - #define AC_STATUS_CONTINUE 1 ---- linux-4.18/fs/ext4/super.c 2019-11-28 14:55:26.502545959 +0300 -+++ linux-4.18/fs/ext4/super.c 2019-11-28 20:07:48.104558177 +0300 -@@ -190,6 +190,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_bur - EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req); - EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req); - EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); - EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); - EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); - EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -@@ -223,6 +224,8 @@ static struct attribute *ext4_attrs[] - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), -+ ATTR_LIST(mb_prefetch), -+ ATTR_LIST(mb_prefetch_limit), - NULL, - }; - diff --git a/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch deleted file mode 100644 index e17c10d..0000000 --- a/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-prefetch.patch +++ /dev/null @@ -1,236 +0,0 @@ ---- linux-4.18/fs/ext4/balloc.c 2019-11-28 14:55:26.506546036 +0300 -+++ linux-4.18/fs/ext4/balloc.c 2019-12-02 11:21:50.565975537 +0300 -@@ -404,7 +404,8 @@ verified: - * Return buffer_head on success or NULL in case of failure. - */ - struct buffer_head * --ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) -+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, -+ int ignore_locked) - { - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -435,6 +436,13 @@ ext4_read_block_bitmap_nowait(struct - if (bitmap_uptodate(bh)) - goto verify; - -+ if (ignore_locked && buffer_locked(bh)) { -+ /* buffer under IO already, do not wait -+ * if called for prefetching */ -+ put_bh(bh); -+ return NULL; -+ } -+ - lock_buffer(bh); - if (bitmap_uptodate(bh)) { - unlock_buffer(bh); -@@ -524,7 +532,7 @@ ext4_read_block_bitmap(struct super_b - struct buffer_head *bh; - int err; - -- bh = ext4_read_block_bitmap_nowait(sb, block_group); -+ bh = ext4_read_block_bitmap_nowait(sb, block_group, 1); - if (IS_ERR(bh)) - return bh; - err = ext4_wait_block_bitmap(sb, block_group, bh); ---- linux-4.18/fs/ext4/ext4.h 2019-11-28 14:55:26.470545343 +0300 -+++ linux-4.18/fs/ext4/ext4.h 2019-12-02 11:21:40.795779972 +0300 -@@ -1446,6 +1446,8 @@ struct ext4_sb_info { - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; -+ unsigned int s_mb_prefetch; -+ unsigned int s_mb_prefetch_limit; - - /* stats for buddy allocator */ - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -@@ -2401,7 +2403,8 @@ extern struct ext4_group_desc * ldisk - extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); - - extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, -- ext4_group_t block_group); -+ ext4_group_t block_group, -+ int ignore_locked); - extern int ext4_wait_block_bitmap(struct super_block *sb, - ext4_group_t block_group, - struct buffer_head *bh); ---- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 -+++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 -@@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct - bh[i] = NULL; - continue; - } -- bh[i] = ext4_read_block_bitmap_nowait(sb, group); -+ bh[i] = ext4_read_block_bitmap_nowait(sb, group, 0); - if (IS_ERR(bh[i])) { - err = PTR_ERR(bh[i]); - bh[i] = NULL; -@@ -2104,6 +2112,87 @@ static int ext4_mb_good_group(struct - return 0; - } - -+/* -+ * each allocation context (i.e. a thread doing allocation) has own -+ * sliding prefetch window of @s_mb_prefetch size which starts at the -+ * very first goal and moves ahead of scaning. -+ * a side effect is that subsequent allocations will likely find -+ * the bitmaps in cache or at least in-flight. -+ */ -+static void -+ext4_mb_prefetch(struct ext4_allocation_context *ac, -+ ext4_group_t start) -+{ -+ struct super_block *sb = ac->ac_sb; -+ ext4_group_t ngroups = ext4_get_groups_count(sb); -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ struct ext4_group_info *grp; -+ ext4_group_t group = start; -+ struct buffer_head *bh; -+ int nr; -+ -+ /* limit prefetching at cr=0, otherwise mballoc can -+ * spend a lot of time loading imperfect groups */ -+ if (ac->ac_criteria < 2 && ac->ac_prefetch_ios >= sbi->s_mb_prefetch_limit) -+ return; -+ -+ /* batch prefetching to get few READs in flight */ -+ nr = ac->ac_prefetch - group; -+ if (ac->ac_prefetch < group) -+ /* wrapped to the first groups */ -+ nr += ngroups; -+ if (nr > 0) -+ return; -+ BUG_ON(nr < 0); -+ -+ nr = sbi->s_mb_prefetch; -+ if (ext4_has_feature_flex_bg(sb)) { -+ /* align to flex_bg to get more bitmas with a single IO */ -+ nr = (group / sbi->s_mb_prefetch) * sbi->s_mb_prefetch; -+ nr = nr + sbi->s_mb_prefetch - group; -+ } -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(sb, group); -+ /* ignore empty groups - those will be skipped -+ * during the scanning as well */ -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ bh = ext4_read_block_bitmap_nowait(sb, group, 1); -+ if (bh && !IS_ERR(bh)) { -+ if (!buffer_uptodate(bh)) -+ ac->ac_prefetch_ios++; -+ brelse(bh); -+ } -+ } -+ if (++group >= ngroups) -+ group = 0; -+ } -+ ac->ac_prefetch = group; -+} -+ -+static void -+ext4_mb_prefetch_fini(struct ext4_allocation_context *ac) -+{ -+ struct ext4_group_info *grp; -+ ext4_group_t group; -+ int nr, rc; -+ -+ /* initialize last window of prefetched groups */ -+ nr = ac->ac_prefetch_ios; -+ if (nr > EXT4_SB(ac->ac_sb)->s_mb_prefetch) -+ nr = EXT4_SB(ac->ac_sb)->s_mb_prefetch; -+ group = ac->ac_prefetch; -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(ac->ac_sb, group); -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ rc = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS); -+ if (rc) -+ break; -+ } -+ if (group-- == 0) -+ group = ext4_get_groups_count(ac->ac_sb) - 1; -+ } -+} -+ - static noinline_for_stack int - ext4_mb_regular_allocator(struct ext4_allocation_context *ac) - { -@@ -2176,6 +2264,7 @@ repeat: - * searching for the right group start - * from the goal value specified - */ - group = ac->ac_g_ex.fe_group + 1; -+ ac->ac_prefetch = group; - - for (i = 0; i < ngroups; group++, i++) { - int ret = 0; -@@ -2188,6 +2277,8 @@ repeat: - if (group >= ngroups) - group = 0; - -+ ext4_mb_prefetch(ac, group); -+ - /* This now checks without needing the buddy page */ - ret = ext4_mb_good_group(ac, group, cr); - if (ret <= 0) { -@@ -2260,6 +2351,8 @@ repeat: - out: - if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) - err = first_err; -+ /* use prefetched bitmaps to init buddy so that read info is not lost */ -+ ext4_mb_prefetch_fini(ac); - return err; - } - -@@ -2832,6 +2925,22 @@ int ext4_mb_init(struct super_block * - sbi->s_mb_large_req = sbi->s_stripe * 8; - sbi->s_mb_group_prealloc = sbi->s_stripe * 4; - } -+ if (ext4_has_feature_flex_bg(sb)) { -+ /* a single flex group is supposed to be read by a single IO */ -+ sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; -+ sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ -+ } else { -+ sbi->s_mb_prefetch = 32; -+ } -+ if (sbi->s_mb_prefetch >= ext4_get_groups_count(sb)) -+ sbi->s_mb_prefetch = ext4_get_groups_count(sb); -+ /* now many real IOs to prefetch within a single allocation at cr=0 -+ * given cr=0 is an CPU-related optimization we shouldn't try to -+ * load too many groups, at some point we should start to use what -+ * we've got in memory. -+ * with an average random access time 5ms, it'd take a second to get -+ * 200 groups (* N with flex_bg), so let's make this limit 32 */ -+ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 32; - - sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); - if (sbi->s_locality_groups == NULL) { ---- linux-4.18/fs/ext4/mballoc.h 2019-11-28 14:55:26.471545362 +0300 -+++ linux-4.18/fs/ext4/mballoc.h 2019-12-02 11:21:57.028104886 +0300 -@@ -177,6 +177,8 @@ struct ext4_allocation_context { - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -+ ext4_group_t ac_prefetch; -+ int ac_prefetch_ios; /* number of initialied prefetch IO */ - }; - - #define AC_STATUS_CONTINUE 1 ---- linux-4.18/fs/ext4/sysfs.c 2019-11-28 14:55:26.502545959 +0300 -+++ linux-4.18/fs/ext4/sysfs.c 2019-11-28 20:07:48.104558177 +0300 -@@ -190,6 +190,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_bur - EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); - EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time); - EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); - - static unsigned int old_bump_val = 128; - EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); -@@ -223,6 +224,8 @@ static struct attribute *ext4_attrs[] - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), -+ ATTR_LIST(mb_prefetch), -+ ATTR_LIST(mb_prefetch_limit), - NULL, - }; - diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series index 9019133..3948c95 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series @@ -38,4 +38,3 @@ rhel7/ext4-mmp-dont-mark-bh-dirty.patch rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series index 0322656..c0d41d9 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series @@ -38,4 +38,3 @@ rhel7/ext4-mmp-dont-mark-bh-dirty.patch rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.4.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.4.series index adf98a8..906c6f5 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.4.series @@ -38,4 +38,3 @@ rhel7/ext4-mmp-dont-mark-bh-dirty.patch rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.5.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.5.series index 1615249..5be93d7 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.5.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.5.series @@ -38,4 +38,3 @@ rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inode rhel7.2/ext4-export-mb-stream-allocator-variables.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series index 45cd224..4602afa 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series @@ -39,4 +39,3 @@ rhel7.2/ext4-export-mb-stream-allocator-variables.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7.2/ext4-simple-blockalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series index 0dc2498..db81695 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series @@ -39,4 +39,3 @@ rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7.7/ext4-fix-project-with-unpatched-kernel.patch rhel7.2/ext4-simple-blockalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7.7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series index 5912b55..cf7f31d 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series @@ -33,4 +33,3 @@ rhel7/ext4-mmp-dont-mark-bh-dirty.patch rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7/ext4-mballoc-prefetch.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.series b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.series index 8cb32e7..adcca20 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.series @@ -23,4 +23,3 @@ ubuntu18/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-in rhel8/ext4-export-mb-stream-allocator-variables.patch rhel8/ext4-simple-blockalloc.patch rhel8/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel8/ext4-mballoc-prefetch.patch -- 1.8.3.1