Whamcloud - gitweb
LU-12988 ldiskfs: skip non-loaded groups at cr=0/1 39/37539/5
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 28 Nov 2019 12:04:25 +0000 (15:04 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 14 Apr 2020 17:56:03 +0000 (17:56 +0000)
cr=0 is supposed to be an optimization to save CPU cycles,
but if buddy data (in memory) is not initialized then all
this makes no sense as we have to do sync IO taking a lot
of cycles.  also, at cr=0 mballoc doesn't store any avaibale
chunk. cr=1 also skips groups using heruistic based on avg.
fragment size.
it's more useful to skip such groups and switch to cr=2 where
groups will be scanned for available chunks.

using sparse image and dm-slow virtual device of 120TB was
simulated. then the image was formatted as OST and filled
using debugfs to mark ~85% of available space as busy.
mount as OST w/o the patch couldn't complete in half an hour
(according to vmstat it would take ~10-11 hours). with the
patch applied mount took ~20 seconds.

Lustre-change: https://review.whamcloud.com/36891
Lustre-commit: 6a7a700a1490dfde6b60c2fb36df92a052059866

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I88c8c1b01b386af0fa438bfeb97acb6110bd00ec
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Artem Blagodarenko <c17828@cray.com>
Signed-off-by: Minh Diep <mdiep@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/37539
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch [new file with mode: 0644]
ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-skip-uninit-groups-cr0.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.2.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.3.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.4.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.5.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series
ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.series

diff --git a/ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch b/ldiskfs/kernel_patches/patches/rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
new file mode 100644 (file)
index 0000000..1bb8398
--- /dev/null
@@ -0,0 +1,19 @@
+--- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
++++ linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:53:18.600086008 +0300
+@@ -2060,7 +2060,15 @@ static int ext4_mb_good_group(struct
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+-              int ret = ext4_mb_init_group(ac->ac_sb, group);
++              int ret;
++
++              /* cr=0/1 is a very optimistic search to find large
++               * good chunks almost for free. if buddy data is
++               * not ready, then this optimization makes no sense */
++
++              if (cr < 2)
++                      return 0;
++              ret = ext4_mb_init_group(ac->ac_sb, group);
+               if (ret)
+                       return ret;
+       }
diff --git a/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-skip-uninit-groups-cr0.patch b/ldiskfs/kernel_patches/patches/rhel8/ext4-mballoc-skip-uninit-groups-cr0.patch
new file mode 100644 (file)
index 0000000..38925ac
--- /dev/null
@@ -0,0 +1,19 @@
+--- linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:55:26.500545920 +0300
++++ linux-4.18/fs/ext4/mballoc.c       2019-11-28 14:53:18.600086008 +0300
+@@ -2060,7 +2060,15 @@ static int ext4_mb_good_group(struct
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+-              int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
++              int ret;
++
++              /* cr=0/1 is a very optimistic search to find large
++               * good chunks almost for free. if buddy data is
++               * not ready, then this optimization makes no sense */
++
++              if (cr < 2)
++                      return 0;
++              ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
+               if (ret)
+                       return ret;
+       }
index ac83b2f..dfbd2e4 100644 (file)
@@ -38,3 +38,4 @@ rhel7/ext4-export-orphan-add.patch
 rhel7/ext4-mmp-dont-mark-bh-dirty.patch
 rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index 3dfecb7..3b04032 100644 (file)
@@ -38,3 +38,4 @@ rhel7/ext4-export-orphan-add.patch
 rhel7/ext4-mmp-dont-mark-bh-dirty.patch
 rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index 66035ef..fd3e90d 100644 (file)
@@ -38,3 +38,4 @@ rhel7/ext4-export-orphan-add.patch
 rhel7/ext4-mmp-dont-mark-bh-dirty.patch
 rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index 6c4e7dd..84307c9 100644 (file)
@@ -38,3 +38,4 @@ rhel7/ext4-mmp-dont-mark-bh-dirty.patch
 rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
 rhel7.2/ext4-export-mb-stream-allocator-variables.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index d10b17f..94a4c07 100644 (file)
@@ -39,3 +39,4 @@ rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inode
 rhel7.2/ext4-export-mb-stream-allocator-variables.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
 rhel7.2/ext4-simple-blockalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index ca67087..47592d7 100644 (file)
@@ -39,3 +39,4 @@ rhel7.2/ext4-export-mb-stream-allocator-variables.patch
 rhel7.7/ext4-fix-project-with-unpatched-kernel.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
 rhel7.2/ext4-simple-blockalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch
index 2f67631..52228ba 100644 (file)
@@ -33,3 +33,4 @@ rhel7/ext4-export-orphan-add.patch
 rhel7/ext4-mmp-dont-mark-bh-dirty.patch
 rhel7/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
 rhel7/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+rhel7/ext4-mballoc-skip-uninit-groups-cr0.patch