From: Alex Zhuravlev Date: Wed, 27 Jun 2012 07:15:40 +0000 (+0400) Subject: LU-1581 utils: introduce osd_tune() X-Git-Tag: 2.2.90~6 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=9e7b2d9cb8563c30152c6a4cbebbc568e042e9c8;hp=5b9f3ee5887bc4effc0d3ca753f4dd5ae8f4f39e LU-1581 utils: introduce osd_tune() mount_lustre.c to use that Signed-off-by: Alex Zhuravlev Change-Id: If3e576c69c30625a079007cb93e5c447721118ab Reviewed-on: http://review.whamcloud.com/3234 Reviewed-by: Andreas Dilger Tested-by: Hudson Reviewed-by: Li Wei Tested-by: Maloo --- diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index ebb88d4..09e1bdd 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -50,19 +50,14 @@ #include #include "obdctl.h" #include -#include #include #include #include "mount_utils.h" -#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" -#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" -#define STRIPE_CACHE_SIZE "md/stripe_cache_size" #define MAXOPT 4096 #define MAX_RETRIES 99 int verbose = 0; -int md_stripe_cache_size = 16384; char *progname = NULL; void usage(FILE *out) @@ -237,7 +232,7 @@ int parse_options(struct mount_opts *mop, char *orig_options, int *flagp) * of param=value. We should pay attention not to remove those * mount options, see bug 22097. */ if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) { - md_stripe_cache_size = atoi(val + 1); + mop->mo_md_stripe_cache_size = atoi(val + 1); } else if (val && strncmp(arg, "retry", 5) == 0) { mop->mo_retry = atoi(val + 1); if (mop->mo_retry > MAX_RETRIES) @@ -267,224 +262,6 @@ int parse_options(struct mount_opts *mop, char *orig_options, int *flagp) } -int read_file(char *path, char *buf, int size) -{ - FILE *fd; - - fd = fopen(path, "r"); - if (fd == NULL) - return errno; - - /* should not ignore fgets(3)'s return value */ - if (!fgets(buf, size, fd)) { - fprintf(stderr, "reading from %s: %s", path, strerror(errno)); - fclose(fd); - return 1; - } - fclose(fd); - return 0; -} - -int write_file(char *path, char *buf) -{ - FILE *fd; - - fd = fopen(path, "w"); - if (fd == NULL) - return errno; - - fputs(buf, fd); - fclose(fd); - return 0; -} - -/* This is to tune the kernel for good SCSI performance. - * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb - * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_blockdev_tunables(char *source, int fan_out) -{ - glob_t glob_info = { 0 }; - struct stat stat_buf; - char *chk_major, *chk_minor; - char *savept = NULL, *dev; - char *ret_path; - char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; - char real_path[PATH_MAX] = {'\0'}; - int i, rc = 0; - int major, minor; - - if (!source) - return -EINVAL; - - ret_path = realpath(source, real_path); - if (ret_path == NULL) { - if (verbose) - fprintf(stderr, "warning: %s: cannot resolve: %s\n", - source, strerror(errno)); - return -EINVAL; - } - - if (strncmp(real_path, "/dev/loop", 9) == 0) - return 0; - - if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) - return 0; - - snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); - if (access(path, X_OK) == 0) - goto set_params; - - /* The name of the device say 'X' specified in /dev/X may not - * match any entry under /sys/block/. In that case we need to - * match the major/minor number to find the entry under - * sys/block corresponding to /dev/X */ - - /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */ - if (strncmp(real_path, "/dev/mapper", 11) != 0) { - dev = real_path + strlen(real_path); - while (--dev > real_path && isdigit(*dev)) - *dev = 0; - - if (strncmp(real_path, "/dev/md_", 8) == 0) - *dev = 0; - } - - rc = stat(real_path, &stat_buf); - if (rc) { - if (verbose) - fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), real_path); - return rc; - } - - major = major(stat_buf.st_rdev); - minor = minor(stat_buf.st_rdev); - rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info); - if (rc) { - if (verbose) - fprintf(stderr, "warning: failed to read entries under " - "/sys/block\n"); - globfree(&glob_info); - return rc; - } - - for (i = 0; i < glob_info.gl_pathc; i++){ - snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]); - - rc = read_file(path, buf, sizeof(buf)); - if (rc) - continue; - - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; - - chk_major = strtok_r(buf, ":", &savept); - chk_minor = savept; - if (major == atoi(chk_major) &&minor == atoi(chk_minor)) - break; - } - - if (i == glob_info.gl_pathc) { - if (verbose) - fprintf(stderr,"warning: device %s does not match any " - "entry under /sys/block\n", real_path); - globfree(&glob_info); - return -EINVAL; - } - - /* Chop off "/dev" from path we found */ - path[strlen(glob_info.gl_pathv[i])] = '\0'; - globfree(&glob_info); - -set_params: - if (strncmp(real_path, "/dev/md", 7) == 0) { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - STRIPE_CACHE_SIZE); - - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - return 0; - } - - if (atoi(buf) >= md_stripe_cache_size) - return 0; - - if (strlen(buf) - 1 > 0) { - snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size); - rc = write_file(real_path, buf); - if (rc && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - } - /* Return since raid and disk tunables are different */ - return rc; - } - - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - /* No MAX_HW_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - rc = 0; - } - - if (strlen(buf) - 1 > 0) { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_SECTORS_KB_PATH); - rc = write_file(real_path, buf); - if (rc) { - if (verbose) - fprintf(stderr, "warning: writing to %s: %s\n", - real_path, strerror(errno)); - /* No MAX_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - rc = 0; - } - } - - if (fan_out) { - char *slave = NULL; - glob_info.gl_pathc = 0; - glob_info.gl_offs = 0; - /* if device is multipath device, tune its slave devices */ - snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); - rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); - - for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){ - slave = basename(glob_info.gl_pathv[i]); - snprintf(real_path, sizeof(real_path), "/dev/%s", slave); - rc = set_blockdev_tunables(real_path, 0); - } - - if (rc == GLOB_NOMATCH) { - /* no slave device is not an error */ - rc = 0; - } else if (rc && verbose) { - if (slave == NULL) { - fprintf(stderr, "warning: %s, failed to read" - " entries under %s/slaves\n", - strerror(errno), path); - } else { - fprintf(stderr, "unable to set tunables for" - " slave device %s (slave would be" - " unable to handle IO request from" - " master %s)\n", - real_path, source); - } - } - globfree(&glob_info); - } - - return rc; -} - static int parse_ldd(char *source, struct mount_opts *mop, char *options) { struct lustre_disk_data *ldd = &mop->mo_ldd; @@ -693,12 +470,13 @@ int main(int argc, char *const argv[]) printf("mounting device %s at %s, flags=%#x options=%s\n", mop.mo_source, mop.mo_target, flags, options); - if (!strstr(mop.mo_usource, ":/") && set_blockdev_tunables(mop.mo_source, 1)) { - if (verbose) - fprintf(stderr, "%s: unable to set tunables for %s" - " (may cause reduced IO performance)\n", - argv[0], mop.mo_source); - } + if (!strstr(mop.mo_usource, ":/") && + osd_tune_lustre(mop.mo_source, &mop)) { + if (verbose) + fprintf(stderr, "%s: unable to set tunables for %s" + " (may cause reduced IO performance)\n", + argv[0], mop.mo_source); + } if (!mop.mo_fake) { /* flags and target get to lustre_get_sb, but not diff --git a/lustre/utils/mount_utils.c b/lustre/utils/mount_utils.c index 4c570a4..08e4b40 100644 --- a/lustre/utils/mount_utils.c +++ b/lustre/utils/mount_utils.c @@ -508,6 +508,32 @@ int osd_prepare_lustre(struct mkfs_opts *mop, return ret; } +int osd_tune_lustre(char *dev, struct mount_opts *mop) +{ + struct lustre_disk_data *ldd = &mop->mo_ldd; + int ret; + + switch (ldd->ldd_mount_type) { + case LDD_MT_LDISKFS: + case LDD_MT_LDISKFS2: + ret = ldiskfs_tune_lustre(dev, mop); + break; +#ifdef HAVE_ZFS_OSD + case LDD_MT_ZFS: + ret = zfs_tune_lustre(dev, mop); + break; +#endif /* HAVE_ZFS_OSD */ + default: + fatal(); + fprintf(stderr, "unknown fs type %d '%s'\n", + ldd->ldd_mount_type, MT_STR(ldd)); + ret = EINVAL; + break; + } + + return ret; +} + int osd_init(void) { int ret = 0; diff --git a/lustre/utils/mount_utils.h b/lustre/utils/mount_utils.h index 287b2d8..acd0955 100644 --- a/lustre/utils/mount_utils.h +++ b/lustre/utils/mount_utils.h @@ -122,6 +122,7 @@ int osd_make_lustre(struct mkfs_opts *mop); int osd_prepare_lustre(struct mkfs_opts *mop, char *default_mountopts, int default_len, char *always_mountopts, int always_len); +int osd_tune_lustre(char *dev, struct mount_opts *mop); int osd_init(void); void osd_fini(void); @@ -132,6 +133,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop); int ldiskfs_prepare_lustre(struct mkfs_opts *mop, char *default_mountopts, int default_len, char *always_mountopts, int always_len); +int ldiskfs_tune_lustre(char *dev, struct mount_opts *mop); int ldiskfs_init(void); void ldiskfs_fini(void); @@ -143,6 +145,7 @@ int zfs_make_lustre(struct mkfs_opts *mop); int zfs_prepare_lustre(struct mkfs_opts *mop, char *default_mountopts, int default_len, char *always_mountopts, int always_len); +int zfs_tune_lustre(char *dev, struct mount_opts *mop); int zfs_init(void); void zfs_fini(void); #endif diff --git a/lustre/utils/mount_utils_ldiskfs.c b/lustre/utils/mount_utils_ldiskfs.c index 0159328..a8419e9 100644 --- a/lustre/utils/mount_utils_ldiskfs.c +++ b/lustre/utils/mount_utils_ldiskfs.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,10 @@ #include #include "mount_utils.h" +#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" +#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define STRIPE_CACHE_SIZE "md/stripe_cache_size" + extern char *progname; #define L_BLOCK_SIZE 4096 @@ -745,6 +750,230 @@ int ldiskfs_prepare_lustre(struct mkfs_opts *mop, return 0; } +int read_file(char *path, char *buf, int size) +{ + FILE *fd; + + fd = fopen(path, "r"); + if (fd == NULL) + return errno; + + /* should not ignore fgets(3)'s return value */ + if (!fgets(buf, size, fd)) { + fprintf(stderr, "reading from %s: %s", path, strerror(errno)); + fclose(fd); + return 1; + } + fclose(fd); + return 0; +} + +int write_file(char *path, char *buf) +{ + FILE *fd; + + fd = fopen(path, "w"); + if (fd == NULL) + return errno; + + fputs(buf, fd); + fclose(fd); + return 0; +} + +/* This is to tune the kernel for good SCSI performance. + * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb + * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ +int set_blockdev_tunables(char *source, struct mount_opts *mop, int fan_out) +{ + glob_t glob_info = { 0 }; + struct stat stat_buf; + char *chk_major, *chk_minor; + char *savept = NULL, *dev; + char *ret_path; + char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; + char real_path[PATH_MAX] = {'\0'}; + int i, rc = 0; + int major, minor; + + if (!source) + return -EINVAL; + + ret_path = realpath(source, real_path); + if (ret_path == NULL) { + if (verbose) + fprintf(stderr, "warning: %s: cannot resolve: %s\n", + source, strerror(errno)); + return -EINVAL; + } + + if (strncmp(real_path, "/dev/loop", 9) == 0) + return 0; + + if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) + return 0; + + snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); + if (access(path, X_OK) == 0) + goto set_params; + + /* The name of the device say 'X' specified in /dev/X may not + * match any entry under /sys/block/. In that case we need to + * match the major/minor number to find the entry under + * sys/block corresponding to /dev/X */ + + /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */ + if (strncmp(real_path, "/dev/mapper", 11) != 0) { + dev = real_path + strlen(real_path); + while (--dev > real_path && isdigit(*dev)) + *dev = 0; + + if (strncmp(real_path, "/dev/md_", 8) == 0) + *dev = 0; + } + + rc = stat(real_path, &stat_buf); + if (rc) { + if (verbose) + fprintf(stderr, "warning: %s, device %s stat failed\n", + strerror(errno), real_path); + return rc; + } + + major = major(stat_buf.st_rdev); + minor = minor(stat_buf.st_rdev); + rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info); + if (rc) { + if (verbose) + fprintf(stderr, "warning: failed to read entries under " + "/sys/block\n"); + globfree(&glob_info); + return rc; + } + + for (i = 0; i < glob_info.gl_pathc; i++){ + snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]); + + rc = read_file(path, buf, sizeof(buf)); + if (rc) + continue; + + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + + chk_major = strtok_r(buf, ":", &savept); + chk_minor = savept; + if (major == atoi(chk_major) &&minor == atoi(chk_minor)) + break; + } + + if (i == glob_info.gl_pathc) { + if (verbose) + fprintf(stderr,"warning: device %s does not match any " + "entry under /sys/block\n", real_path); + globfree(&glob_info); + return -EINVAL; + } + + /* Chop off "/dev" from path we found */ + path[strlen(glob_info.gl_pathv[i])] = '\0'; + globfree(&glob_info); + +set_params: + if (strncmp(real_path, "/dev/md", 7) == 0) { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + STRIPE_CACHE_SIZE); + + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + return 0; + } + + if (atoi(buf) >= mop->mo_md_stripe_cache_size) + return 0; + + if (strlen(buf) - 1 > 0) { + snprintf(buf, sizeof(buf), "%d", + mop->mo_md_stripe_cache_size); + rc = write_file(real_path, buf); + if (rc && verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + } + /* Return since raid and disk tunables are different */ + return rc; + } + + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_HW_SECTORS_KB_PATH); + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + /* No MAX_HW_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + rc = 0; + } + + if (strlen(buf) - 1 > 0) { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_SECTORS_KB_PATH); + rc = write_file(real_path, buf); + if (rc) { + if (verbose) + fprintf(stderr, "warning: writing to %s: %s\n", + real_path, strerror(errno)); + /* No MAX_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + rc = 0; + } + } + + if (fan_out) { + char *slave = NULL; + glob_info.gl_pathc = 0; + glob_info.gl_offs = 0; + /* if device is multipath device, tune its slave devices */ + snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); + rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); + + for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){ + slave = basename(glob_info.gl_pathv[i]); + snprintf(real_path, sizeof(real_path), "/dev/%s", slave); + rc = set_blockdev_tunables(real_path, mop, 0); + } + + if (rc == GLOB_NOMATCH) { + /* no slave device is not an error */ + rc = 0; + } else if (rc && verbose) { + if (slave == NULL) { + fprintf(stderr, "warning: %s, failed to read" + " entries under %s/slaves\n", + strerror(errno), path); + } else { + fprintf(stderr, "unable to set tunables for" + " slave device %s (slave would be" + " unable to handle IO request from" + " master %s)\n", + real_path, source); + } + } + globfree(&glob_info); + } + + return rc; +} + +int ldiskfs_tune_lustre(char *dev, struct mount_opts *mop) +{ + return set_blockdev_tunables(dev, mop, 1); +} + /* return canonicalized absolute pathname, even if the target file does not * exist, unlike realpath */ static char *absolute_path(char *devname) diff --git a/lustre/utils/mount_utils_zfs.c b/lustre/utils/mount_utils_zfs.c index d824306..8271135 100644 --- a/lustre/utils/mount_utils_zfs.c +++ b/lustre/utils/mount_utils_zfs.c @@ -616,6 +616,14 @@ int zfs_prepare_lustre(struct mkfs_opts *mop, return 0; } +int zfs_tune_lustre(char *dev, struct mount_opts *mop) +{ + if (osd_check_zfs_setup() == 0) + return EINVAL; + + return 0; +} + int zfs_init(void) { int ret = 0;