X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmount_lustre.c;h=f622728265f7a6464dc757e462760181f9c16f5f;hp=8bfddf2b45e8172a3e8f6583eb9bcf2cc322cf99;hb=536981b0f297a2fa4ae53d4ab81b38e183ee43c0;hpb=207bee13fb5a832207aeb608cb0e439015edf06c diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 8bfddf2..f622728 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include "obdctl.h" @@ -60,6 +61,7 @@ #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define STRIPE_CACHE_SIZE "md/stripe_cache_size" #define MAX_RETRIES 99 int verbose = 0; @@ -67,6 +69,7 @@ int nomtab = 0; int fake = 0; int force = 0; int retry = 0; +int md_stripe_cache_size = 16384; char *progname = NULL; void usage(FILE *out) @@ -82,7 +85,7 @@ void usage(FILE *out) "\t: name of the Lustre filesystem (e.g. lustre1)\n" "\t: filesystem mountpoint (e.g. /mnt/lustre)\n" "\t-f|--fake: fake mount (updates /etc/mtab)\n" - "\t--force: force mount even if already in /etc/mtab\n" + "\t-o force|--force: force mount even if already in /etc/mtab\n" "\t-h|--help: print this usage message\n" "\t-n|--nomtab: do not update /etc/mtab after mount\n" "\t-v|--verbose: print verbose config settings\n" @@ -94,6 +97,8 @@ void usage(FILE *out) "\t\texclude=[:] : colon-separated list of " "inactive OSTs (e.g. lustre-OST0001)\n" "\t\tretry=: number of times mount is retried by client\n" + "\t\tmd_stripe_cache_size=: set the raid stripe cache " + "size for the underlying raid if present\n" ); exit((out != stdout) ? EINVAL : 0); } @@ -163,6 +168,11 @@ static char *convert_hostnames(char *s1) lnet_nid_t nid; converted = malloc(left); + if (converted == NULL) { + fprintf(stderr, "out of memory: needed %d bytes\n", + MAXNIDSTR); + return NULL; + } c = converted; while ((left > 0) && (*s1 != '/')) { s2 = strpbrk(s1, ",:"); @@ -224,6 +234,9 @@ static const struct opt_map opt_map[] = { { "relatime", 0, MS_RELATIME }, /* set file access time on read */ { "norelatime",1,MS_RELATIME }, /* do not set file access time on read */ #endif +#ifdef MS_STRICTATIME + { "strictatime",0,MS_STRICTATIME }, /* update access time strictly */ +#endif { "auto", 0, 0 }, /* Can be mounted using -a */ { "noauto", 0, 0 }, /* Can only be mounted explicitly */ { "nousers", 1, 0 }, /* Forbid ordinary user to mount */ @@ -257,6 +270,13 @@ static int parse_one_option(const char *check, int *flagp) return 0; } +static void append_option(char *options, const char *one) +{ + if (*options) + strcat(options, ","); + strcat(options, one); +} + /* Replace options with subset of Lustre-specific options, and fill in mount flags */ int parse_options(char *orig_options, int *flagp) @@ -275,20 +295,34 @@ int parse_options(char *orig_options, int *flagp) * manner */ arg = opt; val = strchr(opt, '='); - if (val != NULL && strncmp(arg, "retry", 5) == 0) { + /* please note that some ldiskfs mount options are also in the form + * of param=value. We should pay attention not to remove those + * mount options, see bug 22097. */ + if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) { + md_stripe_cache_size = atoi(val + 1); + } else if (val && strncmp(arg, "retry", 5) == 0) { retry = atoi(val + 1); if (retry > MAX_RETRIES) retry = MAX_RETRIES; else if (retry < 0) retry = 0; - } - else if (parse_one_option(opt, flagp) == 0) { + } else if (val && strncmp(arg, "mgssec", 6) == 0) { + append_option(options, opt); + } else if (strcmp(opt, "force") == 0) { + //XXX special check for 'force' option + ++force; + printf("force: %d\n", force); + } else if (parse_one_option(opt, flagp) == 0) { /* pass this on as an option */ - if (*options) - strcat(options, ","); - strcat(options, opt); + append_option(options, opt); } } +#ifdef MS_STRICTATIME + /* set strictatime to default if NOATIME or RELATIME + not given explicit */ + if (!(*flagp & (MS_NOATIME | MS_RELATIME))) + *flagp |= MS_STRICTATIME; +#endif strcpy(orig_options, options); free(options); return 0; @@ -303,7 +337,12 @@ int read_file(char *path, char *buf, int size) if (fd == NULL) return errno; - fgets(buf, size, fd); + /* should not ignore fgets(3)'s return value */ + if (!fgets(buf, size, fd)) { + fprintf(stderr, "reading from %s: %s", path, strerror(errno)); + fclose(fd); + return 1; + } fclose(fd); return 0; } @@ -324,12 +363,12 @@ int write_file(char *path, char *buf) /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_tunables(char *source, int src_len) +int set_blockdev_tunables(char *source, int fan_out) { - glob_t glob_info; + glob_t glob_info = { 0 }; struct stat stat_buf; char *chk_major, *chk_minor; - char *savept, *dev, *s2 = 0; + char *savept, *dev; char *ret_path; char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; char real_path[PATH_MAX] = {'\0'}; @@ -347,52 +386,36 @@ int set_tunables(char *source, int src_len) return -EINVAL; } - src_len = sizeof(real_path); - if (strncmp(real_path, "/dev/loop", 9) == 0) return 0; - if ((real_path[0] != '/') && ((s2 = strpbrk(real_path, ",:")) != NULL)) + if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) return 0; - dev = real_path + src_len - 1; - while (dev > real_path && (*dev != '/')) { - if (isdigit(*dev)) - *dev = 0; - dev--; - } - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); - if (rc == 0 && (strlen(buf) - 1) > 0) { - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); - if (rc && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - return rc; - } - - if (rc != ENOENT) - return rc; + snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); + if (access(path, X_OK) == 0) + goto set_params; /* The name of the device say 'X' specified in /dev/X may not * match any entry under /sys/block/. In that case we need to * match the major/minor number to find the entry under * sys/block corresponding to /dev/X */ - dev = real_path + src_len - 1; - while (dev > real_path) { - if (isdigit(*dev)) + + /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */ + if (strncmp(real_path, "/dev/mapper", 11) != 0) { + dev = real_path + strlen(real_path); + while (--dev > real_path && isdigit(*dev)) + *dev = 0; + + if (strncmp(real_path, "/dev/md_", 8) == 0) *dev = 0; - dev--; } - rc = stat(dev, &stat_buf); + rc = stat(real_path, &stat_buf); if (rc) { if (verbose) fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), dev); + strerror(errno), real_path); return rc; } @@ -403,6 +426,7 @@ int set_tunables(char *source, int src_len) if (verbose) fprintf(stderr, "warning: failed to read entries under " "/sys/block\n"); + globfree(&glob_info); return rc; } @@ -426,38 +450,112 @@ int set_tunables(char *source, int src_len) if (verbose) fprintf(stderr,"warning: device %s does not match any " "entry under /sys/block\n", real_path); - rc = -EINVAL; - goto out; + globfree(&glob_info); + return -EINVAL; } - snprintf(path, sizeof(path), "%s/%s", glob_info.gl_pathv[i], + /* Chop off "/dev" from path we found */ + path[strlen(glob_info.gl_pathv[i])] = '\0'; + globfree(&glob_info); + +set_params: + if (strncmp(real_path, "/dev/md", 7) == 0) { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + STRIPE_CACHE_SIZE); + + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + return 0; + } + + if (atoi(buf) >= md_stripe_cache_size) + return 0; + + if (strlen(buf) - 1 > 0) { + snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size); + rc = write_file(real_path, buf); + if (rc && verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + } + /* Return since raid and disk tunables are different */ + return rc; + } + + snprintf(real_path, sizeof(real_path), "%s/%s", path, MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); + rc = read_file(real_path, buf, sizeof(buf)); if (rc) { if (verbose) fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - goto out; + real_path, strerror(errno)); + /* No MAX_HW_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + rc = 0; } if (strlen(buf) - 1 > 0) { - snprintf(path, sizeof(path), "%s/%s", - glob_info.gl_pathv[i], MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); - if (rc && verbose) - fprintf(stderr, "warning: writing to %s: %s\n", - path, strerror(errno)); + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_SECTORS_KB_PATH); + rc = write_file(real_path, buf); + if (rc) { + if (verbose) + fprintf(stderr, "warning: writing to %s: %s\n", + real_path, strerror(errno)); + /* No MAX_SECTORS_KB_PATH isn't necessary an + * error for some device. */ + rc = 0; + } + } + + if (fan_out) { + char *slave = NULL; + glob_info.gl_pathc = 0; + glob_info.gl_offs = 0; + /* if device is multipath device, tune its slave devices */ + snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); + rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); + + for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){ + slave = basename(glob_info.gl_pathv[i]); + snprintf(real_path, sizeof(real_path), "/dev/%s", slave); + rc = set_blockdev_tunables(real_path, 0); + } + + if (rc == GLOB_NOMATCH) { + /* no slave device is not an error */ + rc = 0; + } else if (rc && verbose) { + if (slave == NULL) { + fprintf(stderr, "warning: %s, failed to read" + " entries under %s/slaves\n", + strerror(errno), path); + } else { + fprintf(stderr, "unable to set tunables for" + " slave device %s (slave would be" + " unable to handle IO request from" + " master %s)\n", + real_path, source); + } + } + globfree(&glob_info); } -out: - globfree(&glob_info); return rc; } int main(int argc, char *const argv[]) { char default_options[] = ""; - char *usource, *source, *target, *ptr; + char *usource, *source, *ptr; + char target[PATH_MAX] = {'\0'}; + char real_path[PATH_MAX] = {'\0'}; + char path[256], name[256]; + FILE *f; + size_t sz; char *options, *optcopy, *orig_options = default_options; int i, nargs = 3, opt, rc, flags, optlen; static struct option long_opt[] = { @@ -516,18 +614,43 @@ int main(int argc, char *const argv[]) } usource = argv[optind]; - source = convert_hostnames(usource); - target = argv[optind + 1]; - ptr = target + strlen(target) - 1; - while ((ptr > target) && (*ptr == '/')) { - *ptr = 0; - ptr--; + if (!usource) { + usage(stderr); } - if (!usource || !source) { + /** + * Try to get the real path to the device, in case it is a + * symbolic link for instance + */ + if (realpath(usource, real_path) != NULL) { + usource = real_path; + + ptr = strrchr(real_path, '/'); + if (ptr && strncmp(ptr, "/dm-", 4) == 0 && isdigit(*(ptr + 4))) { + snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptr+1); + if ((f = fopen(path, "r"))) { + /* read "\n" from sysfs */ + if (fgets(name, sizeof(name), f) && (sz = strlen(name)) > 1) { + name[sz - 1] = '\0'; + snprintf(real_path, sizeof(real_path), "/dev/mapper/%s", name); + } + fclose(f); + } + } + } + + source = convert_hostnames(usource); + if (!source) { usage(stderr); } + if (realpath(argv[optind + 1], target) == NULL) { + rc = errno; + fprintf(stderr, "warning: %s: cannot resolve: %s\n", + argv[optind + 1], strerror(errno)); + return rc; + } + if (verbose) { for (i = 0; i < argc; i++) printf("arg[%d] = %s\n", i, argv[i]); @@ -537,6 +660,10 @@ int main(int argc, char *const argv[]) } options = malloc(strlen(orig_options) + 1); + if (options == NULL) { + fprintf(stderr, "can't allocate memory for options\n"); + return -1; + } strcpy(options, orig_options); rc = parse_options(options, &flags); if (rc) { @@ -575,6 +702,10 @@ int main(int argc, char *const argv[]) functions. So we'll stick it on the end of the options. */ optlen = strlen(options) + strlen(",device=") + strlen(source) + 1; optcopy = malloc(optlen); + if (optcopy == NULL) { + fprintf(stderr, "can't allocate memory to optcopy\n"); + return -1; + } strcpy(optcopy, options); if (*optcopy) strcat(optcopy, ","); @@ -585,11 +716,12 @@ int main(int argc, char *const argv[]) printf("mounting device %s at %s, flags=%#x options=%s\n", source, target, flags, optcopy); - if (!strstr(usource, ":/") && set_tunables(source, strlen(source)) && - verbose) - fprintf(stderr, "%s: unable to set tunables for %s" + if (!strstr(usource, ":/") && set_blockdev_tunables(source, 1)) { + if (verbose) + fprintf(stderr, "%s: unable to set tunables for %s" " (may cause reduced IO performance)\n", argv[0], source); + } register_service_tags(usource, source, target); @@ -634,9 +766,8 @@ int main(int argc, char *const argv[]) usource, target, strerror(errno)); if (errno == ENODEV) fprintf(stderr, "Are the lustre modules loaded?\n" - "Check /etc/modprobe.conf and /proc/filesystems" - "\nNote 'alias lustre llite' should be removed" - " from modprobe.conf\n"); + "Check /etc/modprobe.conf and " + "/proc/filesystems\n"); if (errno == ENOTBLK) fprintf(stderr, "Do you need -o loop?\n"); if (errno == ENOMEDIUM) @@ -672,8 +803,12 @@ int main(int argc, char *const argv[]) /* May as well try to clean up loop devs */ if (strncmp(usource, "/dev/loop", 9) == 0) { char cmd[256]; + int ret; sprintf(cmd, "/sbin/losetup -d %s", usource); - system(cmd); + if ((ret = system(cmd)) < 0) + rc = errno; + else if (ret > 0) + rc = WEXITSTATUS(ret); } } else if (!nomtab) {