X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Futils%2Fmount_lustre.c;h=28e6114de8f5dff98219a5f449c778d1d4a0b5fe;hb=398397bef91e8faac15e3a527fe2974f73c63bd6;hp=385cd6c6dcd44beca0dedfef2d79ba8ec6d4a2f5;hpb=6869932b552ac705f411de3362f01bd50c1f6f7d;p=fs%2Flustre-release.git diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 385cd6c..28e6114 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -60,11 +60,15 @@ #define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" #define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" +#define STRIPE_CACHE_SIZE "md/stripe_cache_size" +#define MAX_RETRIES 99 int verbose = 0; int nomtab = 0; int fake = 0; int force = 0; +int retry = 0; +int md_stripe_cache_size = 2048; char *progname = NULL; void usage(FILE *out) @@ -80,21 +84,25 @@ void usage(FILE *out) "\t: name of the Lustre filesystem (e.g. lustre1)\n" "\t: filesystem mountpoint (e.g. /mnt/lustre)\n" "\t-f|--fake: fake mount (updates /etc/mtab)\n" - "\t--force: force mount even if already in /etc/mtab\n" + "\t-o force|--force: force mount even if already in /etc/mtab\n" "\t-h|--help: print this usage message\n" "\t-n|--nomtab: do not update /etc/mtab after mount\n" "\t-v|--verbose: print verbose config settings\n" "\t: one or more comma separated of:\n" "\t\t(no)flock,(no)user_xattr,(no)acl\n" + "\t\tabort_recov: abort server recovery handling\n" "\t\tnosvc: only start MGC/MGS obds\n" "\t\tnomgs: only start target obds, using existing MGS\n" "\t\texclude=[:] : colon-separated list of " "inactive OSTs (e.g. lustre-OST0001)\n" + "\t\tretry=: number of times mount is retried by client\n" + "\t\tmd_stripe_cache_size=: set the raid stripe cache " + "size for the underlying raid if present\n" ); exit((out != stdout) ? EINVAL : 0); } -static int check_mtab_entry(char *spec, char *mtpt, char *type) +static int check_mtab_entry(char *spec1, char *spec2, char *mtpt, char *type) { FILE *fp; struct mntent *mnt; @@ -104,7 +112,8 @@ static int check_mtab_entry(char *spec, char *mtpt, char *type) return(0); while ((mnt = getmntent(fp)) != NULL) { - if (strcmp(mnt->mnt_fsname, spec) == 0 && + if ((strcmp(mnt->mnt_fsname, spec1) == 0 || + strcmp(mnt->mnt_fsname, spec2) == 0) && strcmp(mnt->mnt_dir, mtpt) == 0 && strcmp(mnt->mnt_type, type) == 0) { endmntent(fp); @@ -158,6 +167,11 @@ static char *convert_hostnames(char *s1) lnet_nid_t nid; converted = malloc(left); + if (converted == NULL) { + fprintf(stderr, "out of memory: needed %d bytes\n", + MAXNIDSTR); + return NULL; + } c = converted; while ((left > 0) && (*s1 != '/')) { s2 = strpbrk(s1, ",:"); @@ -252,11 +266,18 @@ static int parse_one_option(const char *check, int *flagp) return 0; } +static void append_option(char *options, const char *one) +{ + if (*options) + strcat(options, ","); + strcat(options, one); +} + /* Replace options with subset of Lustre-specific options, and fill in mount flags */ int parse_options(char *orig_options, int *flagp) { - char *options, *opt, *nextopt; + char *options, *opt, *nextopt, *arg, *val; options = calloc(strlen(orig_options) + 1, 1); *flagp = 0; @@ -265,11 +286,31 @@ int parse_options(char *orig_options, int *flagp) if (!*opt) /* empty option */ continue; - if (parse_one_option(opt, flagp) == 0) { + + /* Handle retries in a slightly different + * manner */ + arg = opt; + val = strchr(opt, '='); + /* please note that some ldiskfs mount options are also in the form + * of param=value. We should pay attention not to remove those + * mount options, see bug 22097. */ + if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) { + md_stripe_cache_size = atoi(val + 1); + } else if (val && strncmp(arg, "retry", 5) == 0) { + retry = atoi(val + 1); + if (retry > MAX_RETRIES) + retry = MAX_RETRIES; + else if (retry < 0) + retry = 0; + } else if (val && strncmp(arg, "mgssec", 6) == 0) { + append_option(options, opt); + } else if (strncmp(opt, "force", 5) == 0) { + //XXX special check for 'force' option + ++force; + printf("force: %d\n", force); + } else if (parse_one_option(opt, flagp) == 0) { /* pass this on as an option */ - if (*options) - strcat(options, ","); - strcat(options, opt); + append_option(options, opt); } } strcpy(orig_options, options); @@ -286,7 +327,12 @@ int read_file(char *path, char *buf, int size) if (fd == NULL) return errno; - fgets(buf, size, fd); + /* should not ignore fgets(3)'s return value */ + if (!fgets(buf, size, fd)) { + fprintf(stderr, "reading from %s: %s", path, strerror(errno)); + fclose(fd); + return 1; + } fclose(fd); return 0; } @@ -307,12 +353,12 @@ int write_file(char *path, char *buf) /* This is to tune the kernel for good SCSI performance. * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_tunables(char *source, int src_len) +int set_blockdev_tunables(char *source) { glob_t glob_info; struct stat stat_buf; char *chk_major, *chk_minor; - char *savept, *dev, *s2 = 0; + char *savept, *dev; char *ret_path; char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; char real_path[PATH_MAX] = {'\0'}; @@ -325,57 +371,37 @@ int set_tunables(char *source, int src_len) ret_path = realpath(source, real_path); if (ret_path == NULL) { if (verbose) - fprintf(stderr, "warning: %s: cannot resolve: %s", + fprintf(stderr, "warning: %s: cannot resolve: %s\n", source, strerror(errno)); return -EINVAL; } - src_len = sizeof(real_path); - if (strncmp(real_path, "/dev/loop", 9) == 0) return 0; - if ((real_path[0] != '/') && ((s2 = strpbrk(real_path, ",:")) != NULL)) + if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) return 0; - dev = real_path + src_len - 1; - while (dev > real_path && (*dev != '/')) { - if (isdigit(*dev)) - *dev = 0; - dev--; - } - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); - if (rc == 0 && (strlen(buf) - 1) > 0) { - snprintf(path, sizeof(path), "/sys/block%s/%s", dev, - MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); - if (rc && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - return rc; - } - - if (rc != ENOENT) - return rc; + snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); + if (access(path, X_OK) == 0) + goto set_params; /* The name of the device say 'X' specified in /dev/X may not * match any entry under /sys/block/. In that case we need to * match the major/minor number to find the entry under * sys/block corresponding to /dev/X */ - dev = real_path + src_len - 1; - while (dev > real_path) { - if (isdigit(*dev)) - *dev = 0; - dev--; - } + dev = real_path + strlen(real_path); + while (--dev > real_path && isdigit(*dev)) + *dev = 0; - rc = stat(dev, &stat_buf); + if (strncmp(real_path, "/dev/md_", 8) == 0) + *dev = 0; + + rc = stat(real_path, &stat_buf); if (rc) { if (verbose) fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), dev); + strerror(errno), real_path); return rc; } @@ -409,31 +435,59 @@ int set_tunables(char *source, int src_len) if (verbose) fprintf(stderr,"warning: device %s does not match any " "entry under /sys/block\n", real_path); - rc = -EINVAL; - goto out; + globfree(&glob_info); + return -EINVAL; + } + + /* Chop off "/dev" from path we found */ + path[strlen(glob_info.gl_pathv[i])] = '\0'; + globfree(&glob_info); + +set_params: + if (strncmp(real_path, "/dev/md", 7) == 0) { + snprintf(real_path, sizeof(real_path), "%s/%s", path, + STRIPE_CACHE_SIZE); + + rc = read_file(real_path, buf, sizeof(buf)); + if (rc) { + if (verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + return rc; + } + + if (atoi(buf) >= md_stripe_cache_size) + return 0; + + if (strlen(buf) - 1 > 0) { + snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size); + rc = write_file(real_path, buf); + if (rc && verbose) + fprintf(stderr, "warning: opening %s: %s\n", + real_path, strerror(errno)); + } + /* Return since raid and disk tunables are different */ + return rc; } - snprintf(path, sizeof(path), "%s/%s", glob_info.gl_pathv[i], + snprintf(real_path, sizeof(real_path), "%s/%s", path, MAX_HW_SECTORS_KB_PATH); - rc = read_file(path, buf, sizeof(buf)); + rc = read_file(real_path, buf, sizeof(buf)); if (rc) { if (verbose) fprintf(stderr, "warning: opening %s: %s\n", - path, strerror(errno)); - goto out; + real_path, strerror(errno)); + return rc; } if (strlen(buf) - 1 > 0) { - snprintf(path, sizeof(path), "%s/%s", - glob_info.gl_pathv[i], MAX_SECTORS_KB_PATH); - rc = write_file(path, buf); + snprintf(real_path, sizeof(real_path), "%s/%s", path, + MAX_SECTORS_KB_PATH); + rc = write_file(real_path, buf); if (rc && verbose) fprintf(stderr, "warning: writing to %s: %s\n", - path, strerror(errno)); + real_path, strerror(errno)); } - -out: - globfree(&glob_info); return rc; } @@ -499,7 +553,15 @@ int main(int argc, char *const argv[]) } usource = argv[optind]; + if (!usource) { + usage(stderr); + } + source = convert_hostnames(usource); + if (!source) { + usage(stderr); + } + target = argv[optind + 1]; ptr = target + strlen(target) - 1; while ((ptr > target) && (*ptr == '/')) { @@ -507,10 +569,6 @@ int main(int argc, char *const argv[]) ptr--; } - if (!usource || !source) { - usage(stderr); - } - if (verbose) { for (i = 0; i < argc; i++) printf("arg[%d] = %s\n", i, argv[i]); @@ -520,6 +578,10 @@ int main(int argc, char *const argv[]) } options = malloc(strlen(orig_options) + 1); + if (options == NULL) { + fprintf(stderr, "can't allocate memory for options\n"); + return -1; + } strcpy(options, orig_options); rc = parse_options(options, &flags); if (rc) { @@ -529,7 +591,7 @@ int main(int argc, char *const argv[]) } if (!force) { - rc = check_mtab_entry(usource, target, "lustre"); + rc = check_mtab_entry(usource, source, target, "lustre"); if (rc && !(flags & MS_REMOUNT)) { fprintf(stderr, "%s: according to %s %s is " "already mounted on %s\n", @@ -558,6 +620,10 @@ int main(int argc, char *const argv[]) functions. So we'll stick it on the end of the options. */ optlen = strlen(options) + strlen(",device=") + strlen(source) + 1; optcopy = malloc(optlen); + if (optcopy == NULL) { + fprintf(stderr, "can't allocate memory to optcopy\n"); + return -1; + } strcpy(optcopy, options); if (*optcopy) strcat(optcopy, ","); @@ -568,18 +634,40 @@ int main(int argc, char *const argv[]) printf("mounting device %s at %s, flags=%#x options=%s\n", source, target, flags, optcopy); - if (set_tunables(source, strlen(source)) && verbose) - fprintf(stderr, "%s: unable to set tunables for %s" + if (!strstr(usource, ":/") && set_blockdev_tunables(source)) { + if (verbose) + fprintf(stderr, "%s: unable to set tunables for %s" " (may cause reduced IO performance)\n", argv[0], source); + } register_service_tags(usource, source, target); - if (!fake) + if (!fake) { /* flags and target get to lustre_get_sb, but not lustre_fill_super. Lustre ignores the flags, but mount does not. */ - rc = mount(source, target, "lustre", flags, (void *)optcopy); + for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) { + rc = mount(source, target, "lustre", flags, + (void *)optcopy); + if (rc) { + if (verbose) { + fprintf(stderr, "%s: mount %s at %s " + "failed: %s retries left: " + "%d\n", basename(progname), + usource, target, + strerror(errno), retry-i); + } + + if (retry) { + sleep(1 << max((i/2), 5)); + } + else { + rc = errno; + } + } + } + } if (rc) { char *cli; @@ -634,8 +722,12 @@ int main(int argc, char *const argv[]) /* May as well try to clean up loop devs */ if (strncmp(usource, "/dev/loop", 9) == 0) { char cmd[256]; + int ret; sprintf(cmd, "/sbin/losetup -d %s", usource); - system(cmd); + if ((ret = system(cmd)) < 0) + rc = errno; + else if (ret > 0) + rc = WEXITSTATUS(ret); } } else if (!nomtab) {