From a59ac3441448d61d66880e2e5329585191c98716 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Fri, 25 Aug 2023 12:00:52 +0800 Subject: [PATCH] LU-16750 tune2fs: add "-E iops" to set/clear IOPS groups Add an option to set/clear IOPS region of storage, for example: -E iops=0-1024G,^4096-8192G to set EXT4_BG_IOPS flag for block groups in 0 to 1024GiB and clear the flag for those in 4096 to 8192GiB. Change-Id: I52b773f9ad877a01b07f2e63d4d7ef4931499446 Signed-off-by: Bobi Jam Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/52091 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lib/ext2fs/ext2fs.h | 2 + lib/ext2fs/initialize.c | 37 +++++++++++ misc/mke2fs.c | 23 ------- misc/tune2fs.8.in | 9 +++ misc/tune2fs.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 212 insertions(+), 23 deletions(-) diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h index cdc9d98..91794bb 100644 --- a/lib/ext2fs/ext2fs.h +++ b/lib/ext2fs/ext2fs.h @@ -1556,6 +1556,8 @@ extern errcode_t ext2fs_initialize(const char *name, int flags, struct ext2_super_block *param, io_manager manager, ext2_filsys *ret_fs); extern errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs, int super_only); +extern void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count); +extern void ext2fs_clear_iops_group(ext2_filsys fs, blk64_t *array, int count); /* icount.c */ extern void ext2fs_free_icount(ext2_icount_t icount); diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c index e96f3ca..b62c6bb 100644 --- a/lib/ext2fs/initialize.c +++ b/lib/ext2fs/initialize.c @@ -671,3 +671,40 @@ errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs, int super_only) ext2fs_mark_super_dirty(fs); return 0; } + +static void ext2fs_set_clear_iops_group(ext2_filsys fs, blk64_t *array, + int count, int set) +{ + int i; + dgrp_t j, start, end; + + if (!array || !count) + return; + + for (i = 0; i < count; i += 2) { + start = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i], + fs->blocksize), + EXT2_BLOCKS_PER_GROUP(fs->super)); + end = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i + 1], + fs->blocksize), + EXT2_BLOCKS_PER_GROUP(fs->super)); + + for (j = start; j < end; j++) { + if (set) + ext2fs_bg_flags_set(fs, j, EXT2_BG_IOPS); + else + ext2fs_bg_flags_clear(fs, j, EXT2_BG_IOPS); + ext2fs_group_desc_csum_set(fs, j); + } + } +} + +void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count) +{ + ext2fs_set_clear_iops_group(fs, array, count, 1); +} + +void ext2fs_clear_iops_group(ext2_filsys fs, blk64_t *array, int count) +{ + ext2fs_set_clear_iops_group(fs, array, count, 0); +} diff --git a/misc/mke2fs.c b/misc/mke2fs.c index 230f6f9..b62252a 100644 --- a/misc/mke2fs.c +++ b/misc/mke2fs.c @@ -3084,29 +3084,6 @@ try_user: return 0; } -static void ext2fs_set_iops_group(ext2_filsys fs, blk64_t *array, int count) -{ - int i; - dgrp_t j, start, end; - - if (!array || !count) - return; - - for (i = 0; i < count; i += 2) { - start = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i], - fs->blocksize), - EXT2_BLOCKS_PER_GROUP(fs->super)); - end = ext2fs_div64_ceil(ext2fs_div64_ceil(array[i + 1], - fs->blocksize), - EXT2_BLOCKS_PER_GROUP(fs->super)); - - for (j = start; j < end; j++) { - ext2fs_bg_flags_set(fs, j, EXT2_BG_IOPS); - ext2fs_group_desc_csum_set(fs, j); - } - } -} - int main (int argc, char *argv[]) { errcode_t retval = 0; diff --git a/misc/tune2fs.8.in b/misc/tune2fs.8.in index 2eb7e88..42ba8dd 100644 --- a/misc/tune2fs.8.in +++ b/misc/tune2fs.8.in @@ -266,6 +266,15 @@ a few blocks per cpu is ideal. Set a flag in the file system superblock indicating that errors have been found. This will force fsck to run at the next mount. .TP +.BI iops= [^][:[^]][...] +Specify or clear IOPS block group \fIsize_range\fR like: +.B iops=0-1024G:^4096-8192G +So the file system can get the knowledge that block groups in 0 to 1024GiB +are on a relatively faster storage and those in 4096 to 8192GiB are not on +a faster storage, which allows the kernel block allocator to optimize metadata +allocations onto high-IOPS storage for a hybrid flash/HDD devices for better +performance. +.TP .B test_fs Set a flag in the file system superblock indicating that it may be mounted using experimental kernel code, such as the ext4dev file system. diff --git a/misc/tune2fs.c b/misc/tune2fs.c index ce4b90f..948ddc6 100644 --- a/misc/tune2fs.c +++ b/misc/tune2fs.c @@ -52,6 +52,7 @@ extern int optind; #include #include #include +#include #ifdef HAVE_SYS_IOCTL_H #include #endif @@ -129,6 +130,14 @@ static int feature_64bit; static int fsck_requested; static char *undo_file; int enabling_casefold; +blk64_t iops_array[64]; +blk64_t *iops_range = iops_array; +unsigned int iops_size = sizeof(iops_array); +unsigned int iops_count = 0; +blk64_t n_iops_array[64]; +blk64_t *n_iops_range = n_iops_array; +unsigned int n_iops_size = sizeof(n_iops_array); +unsigned int n_iops_count = 0; int journal_size, journal_fc_size, journal_flags; char *journal_device; @@ -2264,6 +2273,88 @@ void do_findfs(int argc, char **argv) } #endif +static int parse_range(char *p_start, char *p_end, char *p_hyphen) +{ + blk64_t start, end; + blk64_t *new_array, *n_new_array; + int negative = 0; + + /** + * e.g. ^0-1024G + * ^ ^ + * | | + * p_start p_end + */ + if (*p_start == '^') { + negative = 1; + p_start++; + } + end = parse_num_blocks(p_hyphen + 1, -1); + + if (isalpha(*(p_end - 1)) && isdigit(*(p_hyphen - 1))) { + /* copy G/M/K unit to start value */ + *p_hyphen = *(p_end - 1); + p_hyphen++; + } + *p_hyphen = 0; + + start = parse_num_blocks(p_start, -1); + + /* add to iops_range/n_iops_range */ + if (negative && (n_iops_count == n_iops_size) || + !negative && (iops_count == iops_size)) { + if (negative) { + n_iops_size <<= 1; + if (n_iops_size == 0) { + n_iops_size = n_iops_count; + return -E2BIG; + } + } else { + iops_size <<= 1; + if (iops_size == 0) { + iops_size = iops_count; + return -E2BIG; + } + } + if (negative) { + if (n_iops_range == n_iops_array) + n_new_array = malloc(n_iops_size * + sizeof(blk64_t)); + else + n_new_array = realloc(n_iops_range, + n_iops_size * sizeof(blk64_t)); + if (!n_new_array) { + n_iops_size >>= 1; + return -ENOMEM; + } else { + n_iops_range = n_new_array; + } + } else { + if (iops_range == iops_array) + new_array = malloc(iops_size * sizeof(blk64_t)); + else + new_array = realloc(iops_range, + iops_size * sizeof(blk64_t)); + if (!new_array) { + iops_size >>= 1; + return -ENOMEM; + } else { + iops_range = new_array; + } + } + } + + if (negative) { + n_iops_range[n_iops_count++] = start; + n_iops_range[n_iops_count++] = end; + } else { + iops_range[iops_count++] = start; + iops_range[iops_count++] = end; + } + + return 0; +} + static int parse_extended_opts(ext2_filsys fs, const char *opts) { struct ext2_super_block *sb = fs->super; @@ -2272,6 +2363,7 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts) int r_usage = 0; int encoding = 0; char *encoding_flags = NULL; + int ret; len = strlen(opts); buf = malloc(len+1); @@ -2453,6 +2545,57 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts) r_usage++; continue; } + } else if (!strcmp(token, "iops")) { + char *p_colon, *p_hyphen; + blk64_t start, end; + + /* example: iops=0-1024G:4096-8192G */ + + if (!arg) { + r_usage++; + continue; + } + p_colon = strchr(arg, ':'); + while (p_colon != NULL) { + *p_colon = 0; + + p_hyphen = strchr(arg, '-'); + if (p_hyphen == NULL) { + fprintf(stderr, + _("error: parse iops %s\n"), + arg); + r_usage++; + break; + } + + ret = parse_range(arg, p_colon, p_hyphen); + if (ret < 0) { + fprintf(stderr, + _("error: parse iops %s:%d\n"), + arg, ret); + r_usage++; + break; + } + + arg = p_colon + 1; + p_colon = strchr(arg, ':'); + } + p_hyphen = strchr(arg, '-'); + if (p_hyphen == NULL) { + fprintf(stderr, + _("error: parse iops %s\n"), arg); + r_usage++; + continue; + } + + ret = parse_range(arg, arg + strlen(arg), p_hyphen); + if (ret < 0) { + fprintf(stderr, + _("error: parse iops %s:%d\n"), + arg, ret); + r_usage++; + continue; + } } else r_usage++; } @@ -2490,6 +2633,7 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts) "\tstride=\n" "\tstripe_width=\n" "\tforce_fsck\n" + "\tiops=[^]\n" "\ttest_fs\n" "\t^test_fs\n" "\ttrack_trim\n" @@ -2497,6 +2641,10 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts) "\tencoding=\n" "\tencoding_flags=\n")); free(buf); + if (iops_range != iops_array) + free(iops_range); + if (n_iops_range != n_iops_array) + free(n_iops_range); return 1; } free(buf); @@ -3550,6 +3698,22 @@ _("Warning: The journal is dirty. You may wish to replay the journal like:\n\n" } } + if (iops_range && iops_count || n_iops_range && n_iops_count) { + if (iops_count) { + ext2fs_set_iops_group(fs, iops_range, iops_count); + sb->s_flags |= EXT2_FLAGS_HAS_IOPS; + } + if (n_iops_count) + ext2fs_clear_iops_group(fs, n_iops_range, n_iops_count); + fs->flags &= ~EXT2_FLAG_SUPER_ONLY; + ext2fs_mark_super_dirty(fs); + + if (iops_range != iops_array) + free(iops_range); + if (n_iops_range != n_iops_array) + free(n_iops_range); + } + if (Q_flag) { if (mount_flags & EXT2_MF_MOUNTED) { fputs(_("The quota feature may only be changed when " -- 1.8.3.1