From 0c17cb25f24730dca138a976a390f105d2191736 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 18 Feb 2008 22:56:25 -0500 Subject: [PATCH] Add support for setting RAID stride and strip-width via mke2fs and tune2fs This is useful for mballoc to align block allocation on the RAID stripe boundaries. Signed-off-by: Rupesh Thakare Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- lib/ext2fs/initialize.c | 2 ++ misc/mke2fs.8.in | 19 ++++++++++++++++--- misc/mke2fs.c | 45 ++++++++++++++++++++++++++++++++++++--------- misc/tune2fs.8.in | 22 ++++++++++++++++++++-- misc/tune2fs.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 118 insertions(+), 15 deletions(-) diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c index 08822dc..7d06860 100644 --- a/lib/ext2fs/initialize.c +++ b/lib/ext2fs/initialize.c @@ -156,6 +156,8 @@ errcode_t ext2fs_initialize(const char *name, int flags, set_field(s_feature_incompat, 0); set_field(s_feature_ro_compat, 0); set_field(s_first_meta_bg, 0); + set_field(s_raid_stride, 0); /* default stride size: 0 */ + set_field(s_raid_stripe_width, 0); /* default stripe width: 0 */ set_field(s_flags, 0); if (super->s_feature_incompat & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) { retval = EXT2_ET_UNSUPP_FEATURE; diff --git a/misc/mke2fs.8.in b/misc/mke2fs.8.in index 9afd7f4..65c42bf 100644 --- a/misc/mke2fs.8.in +++ b/misc/mke2fs.8.in @@ -179,10 +179,23 @@ option is still accepted for backwards compatibility. The following extended options are supported: .RS 1.2i .TP -.BI stride= stripe-size +.BI stride= stride-size Configure the filesystem for a RAID array with -.I stripe-size -filesystem blocks per stripe. +.I stride-size +filesystem blocks. This is the number of blocks read or written to disk +before moving to next disk. This mostly affects placement of filesystem +metadata like bitmaps at +.BR mke2fs (2) +time to avoid placing them on a single disk, which can hurt the performanace. +It may also be used by block allocator. +.TP +.BI stripe-width= stripe-width +Configure the filesystem for a RAID array with +.I stripe-width +filesystem blocks per stripe. This is typically be stride-size * N, where +N is the number of data disks in the RAID (e.g. RAID 5 N+1, RAID 6 N+2). +This allows the block allocator to prevent read-modify-write of the +parity in a RAID stripe if possible when the data is written. .TP .BI resize= max-online-resize Reserve enough space so that the block group descriptor table can grow diff --git a/misc/mke2fs.c b/misc/mke2fs.c index ede6722..cfc8116 100644 --- a/misc/mke2fs.c +++ b/misc/mke2fs.c @@ -756,7 +756,7 @@ static int set_os(struct ext2_super_block *sb, char *os) static void parse_extended_opts(struct ext2_super_block *param, const char *opts) { - char *buf, *token, *next, *p, *arg; + char *buf, *token, *next, *p, *arg, *badopt = ""; int len; int r_usage = 0; @@ -783,16 +783,32 @@ static void parse_extended_opts(struct ext2_super_block *param, if (strcmp(token, "stride") == 0) { if (!arg) { r_usage++; + badopt = token; continue; } - fs_stride = strtoul(arg, &p, 0); - if (*p || (fs_stride == 0)) { + param->s_raid_stride = strtoul(arg, &p, 0); + if (*p || (param->s_raid_stride == 0)) { fprintf(stderr, _("Invalid stride parameter: %s\n"), arg); r_usage++; continue; } + } else if (strcmp(token, "stripe-width") == 0 || + strcmp(token, "stripe_width") == 0) { + if (!arg) { + r_usage++; + badopt = token; + continue; + } + param->s_raid_stripe_width = strtoul(arg, &p, 0); + if (*p || (param->s_raid_stripe_width == 0)) { + fprintf(stderr, + _("Invalid stripe-width parameter: %s\n"), + arg); + r_usage++; + continue; + } } else if (!strcmp(token, "resize")) { unsigned long resize, bpg, rsv_groups; unsigned long group_desc_count, desc_blocks; @@ -801,6 +817,7 @@ static void parse_extended_opts(struct ext2_super_block *param, if (!arg) { r_usage++; + badopt = token; continue; } @@ -851,21 +868,31 @@ static void parse_extended_opts(struct ext2_super_block *param, } } else if (!strcmp(token, "test_fs")) { param->s_flags |= EXT2_FLAGS_TEST_FILESYS; - } else + } else { r_usage++; + badopt = token; + } } if (r_usage) { - fprintf(stderr, _("\nBad options specified.\n\n" + fprintf(stderr, _("\nBad option(s) specified: %s\n\n" "Extended options are separated by commas, " "and may take an argument which\n" "\tis set off by an equals ('=') sign.\n\n" "Valid extended options are:\n" - "\tstride=\n" - "\tresize=\n" - "\ttest_fs\n")); + "\tstride=\n" + "\tstripe-width=\n" + "\tresize=\n\n" + "\ttest_fs\n"), + badopt); free(buf); exit(1); } + if (param->s_raid_stride && + (param->s_raid_stripe_width % param->s_raid_stride) != 0) + fprintf(stderr, _("\nWarning: RAID stripe-width %u not an even " + "multiple of stride %u.\n\n"), + param->s_raid_stripe_width, param->s_raid_stride); + free(buf); } @@ -1643,7 +1670,7 @@ int main (int argc, char *argv[]) test_disk(fs, &bb_list); handle_bad_blocks(fs, bb_list); - fs->stride = fs->super->s_raid_stride = fs_stride; + fs->stride = fs_stride = fs->super->s_raid_stride; retval = ext2fs_allocate_tables(fs); if (retval) { com_err(program_name, retval, diff --git a/misc/tune2fs.8.in b/misc/tune2fs.8.in index d1edbd9..435556f 100644 --- a/misc/tune2fs.8.in +++ b/misc/tune2fs.8.in @@ -150,10 +150,28 @@ Cause a kernel panic. .TP .BI \-E " extended-options" Set extended options for the filesystem. Extended options are comma -separated, and may take an argument using the equals ('=') sign. The -following extended options are supported: +separated, and may take an argument using the equals ('=') sign. +The following extended options are supported: .RS 1.2i .TP +.BI stride= stride-size +Configure the filesystem for a RAID array with +.I stride-size +filesystem blocks. This is the number of blocks read or written to disk +before moving to next disk. This mostly affects placement of filesystem +metadata like bitmaps at +.BR mke2fs (2) +time to avoid placing them on a single disk, which can hurt the performanace. +It may also be used by block allocator. +.TP +.BI stripe-width= stripe-width +Configure the filesystem for a RAID array with +.I stripe-width +filesystem blocks per stripe. This is typically be stride-size * N, where +N is the number of data disks in the RAID (e.g. RAID 5 N+1, RAID 6 N+2). +This allows the block allocator to prevent read-modify-write of the +parity in a RAID stripe if possible when the data is written. +.TP .B test_fs Set a flag in the filesystem superblock indicating that it may be mounted using experimental kernel code, such as the ext4dev filesystem. diff --git a/misc/tune2fs.c b/misc/tune2fs.c index d6999e5..083d6f5 100644 --- a/misc/tune2fs.c +++ b/misc/tune2fs.c @@ -81,6 +81,8 @@ static unsigned short errors; static int open_flag; static char *features_cmd; static char *mntopts_cmd; +static int stride, stripe_width; +static int stride_set, stripe_width_set; static char *extended_cmd; int journal_size, journal_flags; @@ -797,7 +799,36 @@ static void parse_extended_opts(ext2_filsys fs, const char *opts) fs->super->s_flags &= ~EXT2_FLAGS_TEST_FILESYS; printf("Clearing test filesystem flag\n"); ext2fs_mark_super_dirty(fs); - } else + } else if (strcmp(token, "stride") == 0) { + if (!arg) { + r_usage++; + continue; + } + stride = strtoul(arg, &p, 0); + if (*p || (stride == 0)) { + fprintf(stderr, + _("Invalid RAID stride: %s\n"), + arg); + r_usage++; + continue; + } + stride_set = 1; + } else if (strcmp(token, "stripe-width") == 0 || + strcmp(token, "stripe_width") == 0) { + if (!arg) { + r_usage++; + continue; + } + stripe_width = strtoul(arg, &p, 0); + if (*p || (stripe_width == 0)) { + fprintf(stderr, + _("Invalid RAID stripe-width: %s\n"), + arg); + r_usage++; + continue; + } + stripe_width_set = 1; + } else r_usage++; } if (r_usage) { @@ -806,6 +837,8 @@ static void parse_extended_opts(ext2_filsys fs, const char *opts) "and may take an argument which\n" "\tis set off by an equals ('=') sign.\n\n" "Valid extended options are:\n" + "\tstride=\n" + "\tstripe-width=\n" "\ttest_fs\n" "\t^test_fs\n")); free(buf); @@ -1006,6 +1039,16 @@ int main (int argc, char ** argv) if (l_flag) list_super (sb); + if (stride_set) { + sb->s_raid_stride = stride; + ext2fs_mark_super_dirty(fs); + printf(_("Setting stride size to %d\n"), stride); + } + if (stripe_width_set) { + sb->s_raid_stripe_width = stripe_width; + ext2fs_mark_super_dirty(fs); + printf(_("Setting stripe width to %d\n"), stripe_width); + } remove_error_table(&et_ext2_error_table); return (ext2fs_close (fs) ? 1 : 0); } -- 1.8.3.1