Whamcloud - gitweb
LU-14305 ldiskfs: add parameters for mb_c123_threshold
authorArtem Blagodarenko <artem.blagodarenko@hpe.com>
Thu, 8 Sep 2022 03:13:07 +0000 (23:13 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 23 Sep 2022 16:30:59 +0000 (16:30 +0000)
Add mount options for /sys/fs/ldiskfs/*/mb_c[123]_threshold values
so that they can be set persistently via mount options.

The /sys/fs/ldiskfs/*/mb_c[123]_threshold values are always shown
rounded down to the next lower percentage value due to integer
division, since internal values are stored as blocks for efficiency.

Round up the values shown to the next percent to match what was
used to originally set these parameters.

Lustre-change: https://review.whamcloud.com/41193
Lustre-commit: c2fd5297b46c4973aeda4d4d02cbc7ca2faa0d50

Fixes: 95f8ae567749 ("LU-12103 ldiskfs: don't search large block range if disk full")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Artem Blagodarenko <ablagodarenko@whamcloud.com>
Change-Id: Ie36a6667f8bca7481aa8179ab5b97c85d449d619
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@hpe.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/41955
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/48499

ldiskfs/kernel_patches/patches/rhel7.6/ext4-simple-blockalloc.patch

index b44d58b..b47d3a0 100644 (file)
@@ -1,31 +1,7 @@
-diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index caf007ba..5a61cec7 100644
---- a/fs/ext4/ext4.h
-+++ b/fs/ext4/ext4.h
-@@ -1407,6 +1407,9 @@ struct ext4_sb_info {
-       unsigned int s_mb_min_to_scan;
-       unsigned int s_mb_stats;
-       unsigned int s_mb_order2_reqs;
-+      ext4_fsblk_t s_mb_c1_blocks;
-+      ext4_fsblk_t s_mb_c2_blocks;
-+      ext4_fsblk_t s_mb_c3_blocks;
-       unsigned long *s_mb_prealloc_table;
-       unsigned long s_mb_prealloc_table_size;
-       unsigned int s_mb_group_prealloc;
-@@ -1424,6 +1427,9 @@ struct ext4_sb_info {
-       atomic_t s_bal_goals;   /* goal hits */
-       atomic_t s_bal_breaks;  /* too long searches */
-       atomic_t s_bal_2orders; /* 2^order hits */
-+      /* cX loop didn't find blocks */
-+      atomic64_t s_bal_cX_failed[3];
-+      atomic64_t s_bal_cX_skipped[3];
-       spinlock_t s_bal_lock;
-       unsigned long s_mb_buddies_generated;
-       unsigned long long s_mb_generation_time;
-diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
-index e4a00c78..14a4fb25 100644
---- a/fs/ext4/mballoc.c
-+++ b/fs/ext4/mballoc.c
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
 @@ -2098,6 +2098,21 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
  }
@@ -48,7 +24,7 @@ index e4a00c78..14a4fb25 100644
  static noinline_for_stack int
  ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
  {
-@@ -2107,6 +2122,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+@@ -2087,6 +2102,7 @@ ext4_mb_regular_allocator(struct ext4_al
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        struct ext4_buddy e4b;
@@ -56,7 +32,7 @@ index e4a00c78..14a4fb25 100644
  
        sb = ac->ac_sb;
        sbi = EXT4_SB(sb);
-@@ -2156,6 +2172,21 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+@@ -2136,6 +2152,21 @@ ext4_mb_regular_allocator(struct ext4_al
  
        /* Let's just scan groups to find more-less suitable blocks */
        cr = ac->ac_2order ? 0 : 1;
@@ -78,7 +54,7 @@ index e4a00c78..14a4fb25 100644
        /*
         * cr == 0 try to get exact allocation,
         * cr == 3  try to get anything
-@@ -2213,6 +2244,9 @@ repeat:
+@@ -2193,6 +2224,9 @@ repeat:
                        if (ac->ac_status != AC_STATUS_CONTINUE)
                                break;
                }
@@ -88,7 +64,7 @@ index e4a00c78..14a4fb25 100644
        }
  
        if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
-@@ -2336,6 +2370,93 @@ static const struct seq_operations ext4_mb_seq_groups_ops = {
+@@ -2316,6 +2350,93 @@ static const struct seq_operations ext4_
        .show   = ext4_mb_seq_groups_show,
  };
  
@@ -181,12 +157,13 @@ index e4a00c78..14a4fb25 100644
 +
  #define EXT4_MB_PREALLOC_TABLE          "prealloc_table"
  
- static ssize_t ext4_mb_prealloc_table_proc_write(struct file *file,
-@@ -2738,6 +2859,7 @@ static int ext4_groupinfo_create_slab(size_t size)
+ static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi,
+@@ -2730,6 +2851,8 @@ static int ext4_groupinfo_create_slab(si
        return 0;
  }
  
-+#define THRESHOLD_BLOCKS(ts) (ext4_blocks_count(sbi->s_es) / 100 * ts)
++#define THRESHOLD_BLOCKS(sbi, percent)                                        \
++      (ext4_blocks_count((sbi)->s_es) / 100 * (percent))
  int ext4_mb_init(struct super_block *sb)
  {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -194,19 +171,19 @@ index e4a00c78..14a4fb25 100644
        sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
        sbi->s_mb_stats = MB_DEFAULT_STATS;
        sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-+      sbi->s_mb_c1_blocks =
-+              min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD),
-+                    MB_DEFAULT_MAX_C1_BYTES >> sb->s_blocksize_bits);
-+      sbi->s_mb_c2_blocks =
-+              min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD),
-+                    MB_DEFAULT_MAX_C2_BYTES >> sb->s_blocksize_bits);
-+      sbi->s_mb_c3_blocks =
-+              min_t(u64, THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD),
-+                    MB_DEFAULT_MAX_C3_BYTES >> sb->s_blocksize_bits);
++      if (!sbi->s_mb_c1_blocks)
++              sbi->s_mb_c1_blocks =
++                      THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C1_THRESHOLD);
++      if (!sbi->s_mb_c2_blocks)
++              sbi->s_mb_c2_blocks =
++                      THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C2_THRESHOLD);
++      if (!sbi->s_mb_c3_blocks)
++              sbi->s_mb_c3_blocks =
++                      THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C3_THRESHOLD);
        /*
         * The default group preallocation is 512, which for 4k block
         * sizes translates to 2 megabytes.  However for bigalloc file
-@@ -2879,6 +3010,8 @@ int ext4_mb_init(struct super_block *sb)
+@@ -2853,6 +2978,8 @@ int ext4_mb_init(struct super_block *sb)
                proc_create_data(EXT4_MB_PREALLOC_TABLE, S_IFREG | S_IRUGO |
                                 S_IWUSR, sbi->s_proc,
                                 &ext4_mb_prealloc_seq_fops, sb);
@@ -215,7 +192,7 @@ index e4a00c78..14a4fb25 100644
                proc_create_data("mb_last_group", S_IFREG | S_IRUGO |
                                 S_IWUSR, sbi->s_proc,
                                 &ext4_mb_seq_last_group_fops, sb);
-@@ -2932,6 +3065,7 @@ int ext4_mb_release(struct super_block *sb)
+@@ -2906,6 +3033,7 @@ int ext4_mb_release(struct super_block *
                remove_proc_entry("mb_last_group", sbi->s_proc);
                remove_proc_entry("mb_last_start", sbi->s_proc);
                remove_proc_entry(EXT4_MB_PREALLOC_TABLE, sbi->s_proc);
@@ -223,11 +200,10 @@ index e4a00c78..14a4fb25 100644
        }
  
        if (sbi->s_group_info) {
-@@ -2961,6 +3095,16 @@ int ext4_mb_release(struct super_block *sb)
-                               atomic_read(&sbi->s_bal_allocated),
+@@ -2936,6 +3064,16 @@ int ext4_mb_release(struct super_block *
                                atomic_read(&sbi->s_bal_reqs),
                                atomic_read(&sbi->s_bal_success));
-+              ext4_msg(sb, KERN_INFO,
+               ext4_msg(sb, KERN_INFO,
 +                      "mballoc: (%lu, %lu, %lu) useless c(0,1,2) loops",
 +                              atomic64_read(&sbi->s_bal_cX_failed[0]),
 +                              atomic64_read(&sbi->s_bal_cX_failed[1]),
@@ -237,39 +213,97 @@ index e4a00c78..14a4fb25 100644
 +                              atomic64_read(&sbi->s_bal_cX_skipped[0]),
 +                              atomic64_read(&sbi->s_bal_cX_skipped[1]),
 +                              atomic64_read(&sbi->s_bal_cX_skipped[2]));
-               ext4_msg(sb, KERN_INFO,
++              ext4_msg(sb, KERN_INFO,
                      "mballoc: %u extents scanned, %u goal hits, "
                                "%u 2^N hits, %u breaks, %u lost",
-diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
-index 81d56d0b..3ee74eac 100644
---- a/fs/ext4/mballoc.h
-+++ b/fs/ext4/mballoc.h
-@@ -84,6 +84,12 @@ extern ushort ext4_mballoc_debug;
-  * for which requests use 2^N search using buddies
-  */
- #define MB_DEFAULT_ORDER2_REQS                8
-+#define MB_DEFAULT_C1_THRESHOLD               15
-+#define MB_DEFAULT_C2_THRESHOLD               10
-+#define MB_DEFAULT_C3_THRESHOLD               5
-+#define MB_DEFAULT_MAX_C1_BYTES               (15ULL << 40) /* 15TB */
-+#define MB_DEFAULT_MAX_C2_BYTES               (10ULL << 40)
-+#define MB_DEFAULT_MAX_C3_BYTES               (5ULL << 40)
+                               atomic_read(&sbi->s_bal_ex_scanned),
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1409,6 +1409,9 @@ struct ext4_sb_info {
+       unsigned int s_mb_min_to_scan;
+       unsigned int s_mb_stats;
+       unsigned int s_mb_order2_reqs;
++      ext4_fsblk_t s_mb_c1_blocks;
++      ext4_fsblk_t s_mb_c2_blocks;
++      ext4_fsblk_t s_mb_c3_blocks;
+       unsigned long *s_mb_prealloc_table;
+       unsigned int s_mb_group_prealloc;
+       unsigned int s_max_dir_size_kb;
+@@ -1425,6 +1428,9 @@ struct ext4_sb_info {
+       atomic_t s_bal_goals;   /* goal hits */
+       atomic_t s_bal_breaks;  /* too long searches */
+       atomic_t s_bal_2orders; /* 2^order hits */
++      /* cX loop didn't find blocks */
++      atomic64_t s_bal_cX_failed[3];
++      atomic64_t s_bal_cX_skipped[3];
+       spinlock_t s_bal_lock;
+       unsigned long s_mb_buddies_generated;
+       unsigned long long s_mb_generation_time;
+@@ -2115,6 +2121,8 @@ struct ext4_sb_info {
+ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
  
- /*
-  * default group prealloc size 512 blocks
-diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index ca8b50c8..b492245b 100644
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -2746,6 +2746,73 @@ static ssize_t sbi_deprecated_show(struct ext4_attr *a,
+ /* mballoc.c */
++extern int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
++                                ext4_fsblk_t *blocks);
+ extern long ext4_mb_stats;
+ extern long ext4_mb_max_to_scan;
+ extern int ext4_mb_init(struct super_block *);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1208,6 +1208,7 @@ enum {
+       Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
+       Opt_inode_readahead_blks, Opt_journal_ioprio,
+       Opt_dioread_nolock, Opt_dioread_lock,
++      Opt_mb_c1_threshold, Opt_mb_c2_threshold, Opt_mb_c3_threshold,
+       Opt_no_mbcache,
+       Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+       Opt_max_dir_size_kb, Opt_nojournal_checksum,
+@@ -1287,6 +1288,9 @@ static const match_table_t tokens = {
+       {Opt_nodiscard, "nodiscard"},
+       {Opt_init_itable, "init_itable=%u"},
+       {Opt_no_mbcache, "no_mbcache"},
++      {Opt_mb_c1_threshold, "mb_c1_threshold=%s"},
++      {Opt_mb_c2_threshold, "mb_c2_threshold=%s"},
++      {Opt_mb_c3_threshold, "mb_c3_threshold=%s"},
+       {Opt_init_itable, "init_itable"},
+       {Opt_noinit_itable, "noinit_itable"},
+       {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+@@ -1449,6 +1453,9 @@ static const struct mount_opts {
+       {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
+       {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
+       {Opt_no_mbcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
++      {Opt_mb_c1_threshold, 0, MOPT_STRING},
++      {Opt_mb_c2_threshold, 0, MOPT_STRING},
++      {Opt_mb_c3_threshold, 0, MOPT_STRING},
+       {Opt_commit, 0, MOPT_GTE0},
+       {Opt_max_batch_time, 0, MOPT_GTE0},
+       {Opt_min_batch_time, 0, MOPT_GTE0},
+@@ -1571,6 +1578,12 @@ static const struct mount_opts {
+               sbi->s_max_dir_size_kb = arg;
+               /* reset s_warning_dir_size and make it re-calculated */
+               sbi->s_warning_dir_size = 0;
++      } else if (token == Opt_mb_c1_threshold) {
++              save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c1_blocks);
++      } else if (token == Opt_mb_c2_threshold) {
++              save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c2_blocks);
++      } else if (token == Opt_mb_c3_threshold) {
++              save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c3_blocks);
+       } else if (token == Opt_stripe) {
+               sbi->s_stripe = arg;
+       } else if (token == Opt_resuid) {
+@@ -2734,6 +2747,74 @@ static ssize_t sbi_deprecated_show(struc
        return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
  }
  
-+static int save_threshold(struct ext4_sb_info *sbi, const char *buf,
-+                       ext4_fsblk_t *blocks) {
++int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
++                         ext4_fsblk_t *blocks) {
 +      unsigned long long val;
 +
-+      if (!parse_strtoull(buf, 100, &val)) {
++      if (!parse_strtoull(buf, 100, &val) && val <= 100) {
 +              *blocks = val * ext4_blocks_count(sbi->s_es) / 100;
 +              return 0;
 +      }
@@ -277,14 +311,15 @@ index ca8b50c8..b492245b 100644
 +      return -EINVAL;
 +}
 +
-+#define THRESHOLD_PERCENT(ts) (ts * 100 / ext4_blocks_count(sbi->s_es))
++#define THRESHOLD_PERCENT(sbi, blocks)                                        \
++      (((blocks) - 1) * 100 / ext4_blocks_count((sbi)->s_es) + 1)
 +static ssize_t mb_c1_threshold_store(struct ext4_attr *a,
 +                                  struct ext4_sb_info *sbi,
 +                                  const char *buf, size_t count)
 +{
 +      int ret;
 +
-+      ret = save_threshold(sbi, buf, &sbi->s_mb_c1_blocks);
++      ret = save_threshold_percent(sbi, buf, &sbi->s_mb_c1_blocks);
 +
 +      return ret ? ret : count;
 +}
@@ -293,7 +328,7 @@ index ca8b50c8..b492245b 100644
 +                                 struct ext4_sb_info *sbi, char *buf)
 +{
 +      return snprintf(buf, PAGE_SIZE, "%llu\n",
-+                      THRESHOLD_PERCENT(sbi->s_mb_c1_blocks));
++                      THRESHOLD_PERCENT(sbi, sbi->s_mb_c1_blocks));
 +}
 +
 +static ssize_t mb_c2_threshold_store(struct ext4_attr *a,
@@ -302,7 +337,7 @@ index ca8b50c8..b492245b 100644
 +{
 +      int ret;
 +
-+      ret = save_threshold(sbi, buf, &sbi->s_mb_c2_blocks);
++      ret = save_threshold_percent(sbi, buf, &sbi->s_mb_c2_blocks);
 +      return ret ? ret : count;
 +}
 +
@@ -310,7 +345,7 @@ index ca8b50c8..b492245b 100644
 +                                 struct ext4_sb_info *sbi, char *buf)
 +{
 +              return snprintf(buf, PAGE_SIZE, "%llu\n",
-+                              THRESHOLD_PERCENT(sbi->s_mb_c2_blocks));
++                              THRESHOLD_PERCENT(sbi, sbi->s_mb_c2_blocks));
 +}
 +
 +static ssize_t mb_c3_threshold_store(struct ext4_attr *a,
@@ -319,7 +354,7 @@ index ca8b50c8..b492245b 100644
 +{
 +      int ret;
 +
-+      ret = save_threshold(sbi, buf, &sbi->s_mb_c3_blocks);
++      ret = save_threshold_percent(sbi, buf, &sbi->s_mb_c3_blocks);
 +
 +      return ret ? ret : count;
 +}
@@ -328,14 +363,14 @@ index ca8b50c8..b492245b 100644
 +                                 struct ext4_sb_info *sbi, char *buf)
 +{
 +              return snprintf(buf, PAGE_SIZE, "%llu\n",
-+                              THRESHOLD_PERCENT(sbi->s_mb_c3_blocks));
++                              THRESHOLD_PERCENT(sbi, sbi->s_mb_c3_blocks));
 +}
 +
 +
  #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
  static struct ext4_attr ext4_attr_##_name = {                 \
        .attr = {.name = __stringify(_name), .mode = _mode },   \
-@@ -2802,6 +2869,9 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+@@ -2790,6 +2857,9 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats
  EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
  EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
  EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
@@ -345,7 +380,7 @@ index ca8b50c8..b492245b 100644
  EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req);
  EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
  EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
-@@ -2833,6 +2903,9 @@ static struct attribute *ext4_attrs[] = {
+@@ -2820,6 +2890,9 @@ static struct attribute *ext4_attrs[] =
        ATTR_LIST(mb_max_to_scan),
        ATTR_LIST(mb_min_to_scan),
        ATTR_LIST(mb_order2_req),
@@ -355,3 +390,20 @@ index ca8b50c8..b492245b 100644
        ATTR_LIST(mb_small_req),
        ATTR_LIST(mb_large_req),
        ATTR_LIST(mb_group_prealloc),
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -84,6 +84,12 @@ extern ushort ext4_mballoc_debug;
+  * for which requests use 2^N search using buddies
+  */
+ #define MB_DEFAULT_ORDER2_REQS                8
++#define MB_DEFAULT_C1_THRESHOLD               15
++#define MB_DEFAULT_C2_THRESHOLD               10
++#define MB_DEFAULT_C3_THRESHOLD               5
++#define MB_DEFAULT_MAX_C1_BYTES               (15ULL << 40) /* 15TB */
++#define MB_DEFAULT_MAX_C2_BYTES               (10ULL << 40)
++#define MB_DEFAULT_MAX_C3_BYTES               (5ULL << 40)
+ /*
+  * default group prealloc size 512 blocks