Whamcloud - gitweb
LU-16750 tune2fs: add "-E iops" to set/clear IOPS groups 91/52091/4
authorBobi Jam <bobijam@whamcloud.com>
Fri, 25 Aug 2023 04:00:52 +0000 (12:00 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 31 Aug 2023 17:36:22 +0000 (17:36 +0000)
Add an option to set/clear IOPS region of storage, for example:

-E iops=0-1024G,^4096-8192G

to set EXT4_BG_IOPS flag for block groups in 0 to 1024GiB and clear
the flag for those in 4096 to 8192GiB.

Change-Id: I52b773f9ad877a01b07f2e63d4d7ef4931499446
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/52091
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lib/ext2fs/ext2fs.h
lib/ext2fs/initialize.c
misc/mke2fs.c
misc/tune2fs.8.in
misc/tune2fs.c

index cdc9d98..91794bb 100644 (file)
@@ -1556,6 +1556,8 @@ extern errcode_t ext2fs_initialize(const char *name, int flags,
                                   struct ext2_super_block *param,
                                   io_manager manager, ext2_filsys *ret_fs);
 extern errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs, int super_only);
+extern void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count);
+extern void ext2fs_clear_iops_group(ext2_filsys fs, blk64_t *array, int count);
 
 /* icount.c */
 extern void ext2fs_free_icount(ext2_icount_t icount);
index e96f3ca..b62c6bb 100644 (file)
@@ -671,3 +671,40 @@ errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs, int super_only)
        ext2fs_mark_super_dirty(fs);
        return 0;
 }
+
+static void ext2fs_set_clear_iops_group(ext2_filsys fs, blk64_t *array,
+                                       int count, int set)
+{
+       int i;
+       dgrp_t j, start, end;
+
+       if (!array || !count)
+               return;
+
+       for (i = 0; i < count; i += 2) {
+               start = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i],
+                                                           fs->blocksize),
+                                         EXT2_BLOCKS_PER_GROUP(fs->super));
+               end = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i + 1],
+                                                         fs->blocksize),
+                                       EXT2_BLOCKS_PER_GROUP(fs->super));
+
+               for (j = start; j < end; j++) {
+                       if (set)
+                               ext2fs_bg_flags_set(fs, j, EXT2_BG_IOPS);
+                       else
+                               ext2fs_bg_flags_clear(fs, j, EXT2_BG_IOPS);
+                       ext2fs_group_desc_csum_set(fs, j);
+               }
+       }
+}
+
+void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count)
+{
+       ext2fs_set_clear_iops_group(fs, array, count, 1);
+}
+
+void ext2fs_clear_iops_group(ext2_filsys fs, blk64_t *array, int count)
+{
+       ext2fs_set_clear_iops_group(fs, array, count, 0);
+}
index 230f6f9..b62252a 100644 (file)
@@ -3084,29 +3084,6 @@ try_user:
        return 0;
 }
 
-static void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count)
-{
-       int i;
-       dgrp_t j, start, end;
-
-       if (!array || !count)
-               return;
-
-       for (i = 0; i < count; i += 2) {
-               start = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i],
-                                                           fs->blocksize),
-                                         EXT2_BLOCKS_PER_GROUP(fs->super));
-               end = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i + 1],
-                                                         fs->blocksize),
-                                       EXT2_BLOCKS_PER_GROUP(fs->super));
-
-               for (j = start; j < end; j++) {
-                       ext2fs_bg_flags_set(fs, j, EXT2_BG_IOPS);
-                       ext2fs_group_desc_csum_set(fs, j);
-               }
-       }
-}
-
 int main (int argc, char *argv[])
 {
        errcode_t       retval = 0;
index 2eb7e88..42ba8dd 100644 (file)
@@ -266,6 +266,15 @@ a few blocks per cpu is ideal.
 Set a flag in the file system superblock indicating that errors have been found.
 This will force fsck to run at the next mount.
 .TP
+.BI iops= [^]<size_range>[:[^]<size_range>][...]
+Specify or clear IOPS block group \fIsize_range\fR like:
+.B iops=0-1024G:^4096-8192G
+So the file system can get the knowledge that block groups in 0 to 1024GiB
+are on a relatively faster storage and those in 4096 to 8192GiB are not on
+a faster storage, which allows the kernel block allocator to optimize metadata
+allocations onto high-IOPS storage for a hybrid flash/HDD devices for better
+performance.
+.TP
 .B test_fs
 Set a flag in the file system superblock indicating that it may be
 mounted using experimental kernel code, such as the ext4dev file system.
index ce4b90f..948ddc6 100644 (file)
@@ -52,6 +52,7 @@ extern int optind;
 #include <sys/types.h>
 #include <libgen.h>
 #include <limits.h>
+#include <ctype.h>
 #ifdef HAVE_SYS_IOCTL_H
 #include <sys/ioctl.h>
 #endif
@@ -129,6 +130,14 @@ static int feature_64bit;
 static int fsck_requested;
 static char *undo_file;
 int enabling_casefold;
+blk64_t iops_array[64];
+blk64_t *iops_range = iops_array;
+unsigned int iops_size = sizeof(iops_array);
+unsigned int iops_count = 0;
+blk64_t n_iops_array[64];
+blk64_t *n_iops_range = n_iops_array;
+unsigned int n_iops_size = sizeof(n_iops_array);
+unsigned int n_iops_count = 0;
 
 int journal_size, journal_fc_size, journal_flags;
 char *journal_device;
@@ -2264,6 +2273,88 @@ void do_findfs(int argc, char **argv)
 }
 #endif
 
+static int parse_range(char *p_start, char *p_end, char *p_hyphen)
+{
+       blk64_t start, end;
+       blk64_t *new_array, *n_new_array;
+       int negative = 0;
+
+       /**
+        * e.g. ^0-1024G
+        *       ^      ^
+        *       |      |
+        *    p_start  p_end
+        */
+       if (*p_start == '^') {
+               negative = 1;
+               p_start++;
+       }
+       end = parse_num_blocks(p_hyphen + 1, -1);
+
+       if (isalpha(*(p_end - 1)) && isdigit(*(p_hyphen - 1))) {
+               /* copy G/M/K unit to start value */
+               *p_hyphen = *(p_end - 1);
+               p_hyphen++;
+       }
+       *p_hyphen = 0;
+
+       start = parse_num_blocks(p_start, -1);
+
+       /* add to iops_range/n_iops_range */
+        if (negative && (n_iops_count == n_iops_size) ||
+           !negative && (iops_count == iops_size)) {
+               if (negative) {
+                       n_iops_size <<= 1;
+                       if (n_iops_size == 0) {
+                               n_iops_size = n_iops_count;
+                               return -E2BIG;
+                       }
+               } else {
+                       iops_size <<= 1;
+                       if (iops_size == 0) {
+                               iops_size = iops_count;
+                               return -E2BIG;
+                       }
+               }
+               if (negative) {
+                       if (n_iops_range == n_iops_array)
+                               n_new_array = malloc(n_iops_size *
+                                                    sizeof(blk64_t));
+                       else
+                               n_new_array = realloc(n_iops_range,
+                                               n_iops_size * sizeof(blk64_t));
+                       if (!n_new_array) {
+                               n_iops_size >>= 1;
+                               return -ENOMEM;
+                       } else {
+                               n_iops_range = n_new_array;
+                       }
+               } else {
+                       if (iops_range == iops_array)
+                               new_array = malloc(iops_size * sizeof(blk64_t));
+                       else
+                               new_array = realloc(iops_range,
+                                               iops_size * sizeof(blk64_t));
+                       if (!new_array) {
+                               iops_size >>= 1;
+                               return -ENOMEM;
+                       } else {
+                               iops_range = new_array;
+                       }
+               }
+       }
+
+       if (negative) {
+               n_iops_range[n_iops_count++] = start;
+               n_iops_range[n_iops_count++] = end;
+       } else {
+               iops_range[iops_count++] = start;
+               iops_range[iops_count++] = end;
+       }
+
+       return 0;
+}
+
 static int parse_extended_opts(ext2_filsys fs, const char *opts)
 {
        struct ext2_super_block *sb = fs->super;
@@ -2272,6 +2363,7 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts)
        int     r_usage = 0;
        int encoding = 0;
        char    *encoding_flags = NULL;
+       int ret;
 
        len = strlen(opts);
        buf = malloc(len+1);
@@ -2453,6 +2545,57 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts)
                                r_usage++;
                                continue;
                        }
+               } else if (!strcmp(token, "iops")) {
+                       char *p_colon, *p_hyphen;
+                       blk64_t start, end;
+
+                       /* example: iops=0-1024G:4096-8192G */
+
+                       if (!arg) {
+                               r_usage++;
+                               continue;
+                       }
+                       p_colon = strchr(arg, ':');
+                       while (p_colon != NULL) {
+                               *p_colon = 0;
+
+                               p_hyphen = strchr(arg, '-');
+                               if (p_hyphen == NULL) {
+                                       fprintf(stderr,
+                                               _("error: parse iops %s\n"),
+                                               arg);
+                                       r_usage++;
+                                       break;
+                               }
+
+                               ret = parse_range(arg, p_colon, p_hyphen);
+                               if (ret < 0) {
+                                       fprintf(stderr,
+                                               _("error: parse iops %s:%d\n"),
+                                               arg, ret);
+                                       r_usage++;
+                                       break;
+                               }
+
+                               arg = p_colon + 1;
+                               p_colon = strchr(arg, ':');
+                       }
+                       p_hyphen = strchr(arg, '-');
+                       if (p_hyphen == NULL) {
+                               fprintf(stderr,
+                                       _("error: parse iops %s\n"), arg);
+                               r_usage++;
+                               continue;
+                       }
+
+                       ret = parse_range(arg, arg + strlen(arg), p_hyphen);
+                       if (ret < 0) {
+                               fprintf(stderr,
+                                       _("error: parse iops %s:%d\n"),
+                                       arg, ret);
+                               r_usage++;
+                               continue;
+                       }
                } else
                        r_usage++;
        }
@@ -2490,6 +2633,7 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts)
                        "\tstride=<RAID per-disk chunk size in blocks>\n"
                        "\tstripe_width=<RAID stride*data disks in blocks>\n"
                        "\tforce_fsck\n"
+                       "\tiops=[^]<iops storage size range>\n"
                        "\ttest_fs\n"
                        "\t^test_fs\n"
                        "\ttrack_trim\n"
@@ -2497,6 +2641,10 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts)
                        "\tencoding=<encoding>\n"
                        "\tencoding_flags=<flags>\n"));
                free(buf);
+               if (iops_range != iops_array)
+                       free(iops_range);
+               if (n_iops_range != n_iops_array)
+                       free(n_iops_range);
                return 1;
        }
        free(buf);
@@ -3550,6 +3698,22 @@ _("Warning: The journal is dirty. You may wish to replay the journal like:\n\n"
                }
        }
 
+       if (iops_range && iops_count || n_iops_range && n_iops_count) {
+               if (iops_count) {
+                       ext2fs_set_iops_group(fs, iops_range, iops_count);
+                       sb->s_flags |= EXT2_FLAGS_HAS_IOPS;
+               }
+               if (n_iops_count)
+                       ext2fs_clear_iops_group(fs, n_iops_range, n_iops_count);
+               fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+               ext2fs_mark_super_dirty(fs);
+
+               if (iops_range != iops_array)
+                       free(iops_range);
+               if (n_iops_range != n_iops_array)
+                       free(n_iops_range);
+       }
+
        if (Q_flag) {
                if (mount_flags & EXT2_MF_MOUNTED) {
                        fputs(_("The quota feature may only be changed when "