From 8e7439f9d8646cf916248173fdd841b456b26eac Mon Sep 17 00:00:00 2001 From: scjody Date: Wed, 10 Jun 2009 13:08:37 +0000 Subject: [PATCH] Branch HEAD b=17986 i=adilger i=scjody Allow tuning of the RAID stripe cache size in mount.lustre; automatically tune it to 2048 by default. Original patch from James Simmons --- lustre/doc/mount.lustre.8 | 4 ++ lustre/utils/mount_lustre.c | 125 +++++++++++++++++++++++++------------------- 2 files changed, 75 insertions(+), 54 deletions(-) diff --git a/lustre/doc/mount.lustre.8 b/lustre/doc/mount.lustre.8 index 40085b3..5e67b56 100644 --- a/lustre/doc/mount.lustre.8 +++ b/lustre/doc/mount.lustre.8 @@ -103,6 +103,10 @@ Start a client or MDT with a (colon-separated) list of known inactive OSTs. .TP .BI abort_recov Abort client recovery and start the target service immediately. +.TP +.BI md_stripe_cache_size +Sets the stripe cache size for server side disk with a striped raid +configuration. .SH EXAMPLES .TP .B mount -t lustre cfs21@tcp0:/testfs /mnt/myfilesystem diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 04cbcae..bbbe9ef 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -60,6 +60,7 @@ #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define STRIPE_CACHE_SIZE "md/stripe_cache_size" #define MAX_RETRIES 99 int verbose = 0; @@ -67,6 +68,7 @@ int nomtab = 0; int fake = 0; int force = 0; int retry = 0; +int md_stripe_cache_size = 2048; char *progname = NULL; void usage(FILE *out) @@ -94,6 +96,8 @@ void usage(FILE *out) "\t\texclude=[:] : colon-separated list of " "inactive OSTs (e.g. lustre-OST0001)\n" "\t\tretry=: number of times mount is retried by client\n" + "\t\tmd_stripe_cache_size=: set the raid stripe cache " + "size for the underlying raid if present\n" ); exit((out != stdout) ? EINVAL : 0); } @@ -280,12 +284,16 @@ int parse_options(char *orig_options, int *flagp) * manner */ arg = opt; val = strchr(opt, '='); - if (val != NULL && strncmp(arg, "retry", 5) == 0) { - retry = atoi(val + 1); - if (retry > MAX_RETRIES) - retry = MAX_RETRIES; - else if (retry < 0) - retry = 0; + if (val != NULL) { + if (strncmp(arg, "md_stripe_cache_size", 20) == 0) { + md_stripe_cache_size = atoi(val + 1); + } else if (strncmp(arg, "retry", 5) == 0) { + retry = atoi(val + 1); + if (retry > MAX_RETRIES) + retry = MAX_RETRIES; + else if (retry < 0) + retry = 0; + } } else if (strncmp(opt, "force", 5) == 0) { //XXX special check for 'force' option ++force; @@ -332,7 +340,7 @@ int write_file(char *path, char *buf) /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_tunables(char *source, int src_len) +int set_blockdev_tunables(char *source) { glob_t glob_info; struct stat stat_buf; @@ -355,52 +363,32 @@ int set_tunables(char *source, int src_len) return -EINVAL; } - src_len = sizeof(real_path); - if (strncmp(real_path, "/dev/loop", 9) == 0) return 0; if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) return 0; - dev = real_path + src_len - 1; - while (dev > real_path && (*dev != '/')) { - if (isdigit(*dev)) - *dev = 0; - dev--; - } - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); - if (rc == 0 && (strlen(buf) - 1) > 0) { - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); - if (rc && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - return rc; - } - - if (rc != ENOENT) - return rc; + snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); + if (access(path, X_OK) == 0) + goto set_params; /* The name of the device say 'X' specified in /dev/X may not * match any entry under /sys/block/. In that case we need to * match the major/minor number to find the entry under * sys/block corresponding to /dev/X */ - dev = real_path + src_len - 1; - while (dev > real_path) { - if (isdigit(*dev)) - *dev = 0; - dev--; - } + dev = real_path + strlen(real_path); + while (--dev > real_path && isdigit(*dev)) + *dev = 0; - rc = stat(dev, &stat_buf); + if (strncmp(real_path, "/dev/md_", 8) == 0) + *dev = 0; + + rc = stat(real_path, &stat_buf); if (rc) { if (verbose) fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), dev); + strerror(errno), real_path); return rc; } @@ -434,31 +422,59 @@ int set_tunables(char *source, int src_len) if (verbose) fprintf(stderr,"warning: device %s does not match any " "entry under /sys/block\n", real_path); - rc = -EINVAL; - goto out; + globfree(&glob_info); + return -EINVAL; } - snprintf(path, sizeof(path), "%s/%s", glob_info.gl_pathv[i], + /* Chop off "/dev" from path we found */ + path[strlen(glob_info.gl_pathv[i])] = '\0'; + globfree(&glob_info); + +set_params: + if (strncmp(real_path, "/dev/md", 7) == 0) { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + STRIPE_CACHE_SIZE); + + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + return rc; + } + + if (atoi(buf) >= md_stripe_cache_size) + return 0; + + if (strlen(buf) - 1 > 0) { + snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size); + rc = write_file(real_path, buf); + if (rc && verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + } + /* Return since raid and disk tunables are different */ + return rc; + } + + snprintf(real_path, sizeof(real_path), "%s/%s", path, MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); + rc = read_file(real_path, buf, sizeof(buf)); if (rc) { if (verbose) fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - goto out; + real_path, strerror(errno)); + return rc; } if (strlen(buf) - 1 > 0) { - snprintf(path, sizeof(path), "%s/%s", - glob_info.gl_pathv[i], MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_SECTORS_KB_PATH); + rc = write_file(real_path, buf); if (rc && verbose) fprintf(stderr, "warning: writing to %s: %s\n", - path, strerror(errno)); + real_path, strerror(errno)); } - -out: - globfree(&glob_info); return rc; } @@ -605,11 +621,12 @@ int main(int argc, char *const argv[]) printf("mounting device %s at %s, flags=%#x options=%s\n", source, target, flags, optcopy); - if (!strstr(usource, ":/") && set_tunables(source, strlen(source)) && - verbose) - fprintf(stderr, "%s: unable to set tunables for %s" + if (!strstr(usource, ":/") && set_blockdev_tunables(source)) { + if (verbose) + fprintf(stderr, "%s: unable to set tunables for %s" " (may cause reduced IO performance)\n", argv[0], source); + } register_service_tags(usource, source, target); -- 1.8.3.1