X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Futils%2Fmount_lustre.c;h=57348a5562baf46a58890d09c8c17766d01bf4ff;hp=e6b38ef51e544cc64dfcf4953b250e2d1fee6285;hb=7bf1d7c6cb7d0a7231b3fdcb9e3d3ec3129fb427;hpb=b43367e05211ec565287563e8bc55a5c7e024106 diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index e6b38ef..57348a5 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,6 +42,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include "mount_utils.h" #include #include #include @@ -50,19 +51,26 @@ #include #include "obdctl.h" #include -#include #include #include -#include "mount_utils.h" +#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 10, 51, 0) +/* + * LU-1783 + * We only #include a kernel level include file here because + * important MS_ flag #defines are missing from the SLES version + * of sys/mount.h + * In the future if SLES updates sys/mount.h to have a more complete + * set of flag #defines we should stop including linux/fs.h + */ +#warn remove kernel include +#else +#include +#endif -#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb" -#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb" -#define STRIPE_CACHE_SIZE "md/stripe_cache_size" #define MAXOPT 4096 #define MAX_RETRIES 99 int verbose = 0; -int md_stripe_cache_size = 16384; char *progname = NULL; void usage(FILE *out) @@ -237,7 +245,7 @@ int parse_options(struct mount_opts *mop, char *orig_options, int *flagp) * of param=value. We should pay attention not to remove those * mount options, see bug 22097. */ if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) { - md_stripe_cache_size = atoi(val + 1); + mop->mo_md_stripe_cache_size = atoi(val + 1); } else if (val && strncmp(arg, "retry", 5) == 0) { mop->mo_retry = atoi(val + 1); if (mop->mo_retry > MAX_RETRIES) @@ -246,6 +254,9 @@ int parse_options(struct mount_opts *mop, char *orig_options, int *flagp) mop->mo_retry = 0; } else if (val && strncmp(arg, "mgssec", 6) == 0) { append_option(options, opt); + } else if (strncmp(arg, "nosvc", 5) == 0) { + mop->mo_nosvc = 1; + append_option(options, opt); } else if (strcmp(opt, "force") == 0) { //XXX special check for 'force' option ++mop->mo_force; @@ -256,238 +267,57 @@ int parse_options(struct mount_opts *mop, char *orig_options, int *flagp) } } #ifdef MS_STRICTATIME - /* set strictatime to default if NOATIME or RELATIME - not given explicit */ - if (!(*flagp & (MS_NOATIME | MS_RELATIME))) - *flagp |= MS_STRICTATIME; +#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 10, 51, 0) +/* + * LU-1783 + * In the future when upstream fixes land in all supported kernels + * we should stop forcing MS_STRICTATIME in lustre mounts. + * We override the kernel level default of MS_RELATIME for now + * due to a kernel vfs level bug in atime updates that fails + * to reset timestamps from the future. + */ +#warn remove MS_STRICTATIME override +#endif + /* set strictatime to default if NOATIME or RELATIME + not given explicit */ + if (!(*flagp & (MS_NOATIME | MS_RELATIME))) + *flagp |= MS_STRICTATIME; #endif strcpy(orig_options, options); free(options); return 0; } - -int read_file(char *path, char *buf, int size) +/* Add mgsnids from ldd params */ +static int add_mgsnids(struct mount_opts *mop, char *options, + const char *params) { - FILE *fd; - - fd = fopen(path, "r"); - if (fd == NULL) - return errno; - - /* should not ignore fgets(3)'s return value */ - if (!fgets(buf, size, fd)) { - fprintf(stderr, "reading from %s: %s", path, strerror(errno)); - fclose(fd); - return 1; - } - fclose(fd); - return 0; -} - -int write_file(char *path, char *buf) -{ - FILE *fd; - - fd = fopen(path, "w"); - if (fd == NULL) - return errno; - - fputs(buf, fd); - fclose(fd); - return 0; -} - -/* This is to tune the kernel for good SCSI performance. - * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb - * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */ -int set_blockdev_tunables(char *source, int fan_out) -{ - glob_t glob_info = { 0 }; - struct stat stat_buf; - char *chk_major, *chk_minor; - char *savept = NULL, *dev; - char *ret_path; - char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'}; - char real_path[PATH_MAX] = {'\0'}; - int i, rc = 0; - int major, minor; - - if (!source) - return -EINVAL; - - ret_path = realpath(source, real_path); - if (ret_path == NULL) { - if (verbose) - fprintf(stderr, "warning: %s: cannot resolve: %s\n", - source, strerror(errno)); - return -EINVAL; - } - - if (strncmp(real_path, "/dev/loop", 9) == 0) - return 0; - - if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL)) - return 0; - - snprintf(path, sizeof(path), "/sys/block%s", real_path + 4); - if (access(path, X_OK) == 0) - goto set_params; - - /* The name of the device say 'X' specified in /dev/X may not - * match any entry under /sys/block/. In that case we need to - * match the major/minor number to find the entry under - * sys/block corresponding to /dev/X */ - - /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */ - if (strncmp(real_path, "/dev/mapper", 11) != 0) { - dev = real_path + strlen(real_path); - while (--dev > real_path && isdigit(*dev)) - *dev = 0; - - if (strncmp(real_path, "/dev/md_", 8) == 0) - *dev = 0; - } - - rc = stat(real_path, &stat_buf); - if (rc) { - if (verbose) - fprintf(stderr, "warning: %s, device %s stat failed\n", - strerror(errno), real_path); - return rc; - } - - major = major(stat_buf.st_rdev); - minor = minor(stat_buf.st_rdev); - rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info); - if (rc) { - if (verbose) - fprintf(stderr, "warning: failed to read entries under " - "/sys/block\n"); - globfree(&glob_info); - return rc; - } - - for (i = 0; i < glob_info.gl_pathc; i++){ - snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]); - - rc = read_file(path, buf, sizeof(buf)); - if (rc) - continue; - - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; - - chk_major = strtok_r(buf, ":", &savept); - chk_minor = savept; - if (major == atoi(chk_major) &&minor == atoi(chk_minor)) - break; - } - - if (i == glob_info.gl_pathc) { - if (verbose) - fprintf(stderr,"warning: device %s does not match any " - "entry under /sys/block\n", real_path); - globfree(&glob_info); - return -EINVAL; - } - - /* Chop off "/dev" from path we found */ - path[strlen(glob_info.gl_pathv[i])] = '\0'; - globfree(&glob_info); - -set_params: - if (strncmp(real_path, "/dev/md", 7) == 0) { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - STRIPE_CACHE_SIZE); - - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - return 0; - } - - if (atoi(buf) >= md_stripe_cache_size) - return 0; - - if (strlen(buf) - 1 > 0) { - snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size); - rc = write_file(real_path, buf); - if (rc && verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - } - /* Return since raid and disk tunables are different */ - return rc; - } - - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_HW_SECTORS_KB_PATH); - rc = read_file(real_path, buf, sizeof(buf)); - if (rc) { - if (verbose) - fprintf(stderr, "warning: opening %s: %s\n", - real_path, strerror(errno)); - /* No MAX_HW_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - rc = 0; - } - - if (strlen(buf) - 1 > 0) { - snprintf(real_path, sizeof(real_path), "%s/%s", path, - MAX_SECTORS_KB_PATH); - rc = write_file(real_path, buf); - if (rc) { - if (verbose) - fprintf(stderr, "warning: writing to %s: %s\n", - real_path, strerror(errno)); - /* No MAX_SECTORS_KB_PATH isn't necessary an - * error for some device. */ - rc = 0; - } - } - - if (fan_out) { - char *slave = NULL; - glob_info.gl_pathc = 0; - glob_info.gl_offs = 0; - /* if device is multipath device, tune its slave devices */ - snprintf(real_path, sizeof(real_path), "%s/slaves/*", path); - rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info); - - for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){ - slave = basename(glob_info.gl_pathv[i]); - snprintf(real_path, sizeof(real_path), "/dev/%s", slave); - rc = set_blockdev_tunables(real_path, 0); - } - - if (rc == GLOB_NOMATCH) { - /* no slave device is not an error */ - rc = 0; - } else if (rc && verbose) { - if (slave == NULL) { - fprintf(stderr, "warning: %s, failed to read" - " entries under %s/slaves\n", - strerror(errno), path); - } else { - fprintf(stderr, "unable to set tunables for" - " slave device %s (slave would be" - " unable to handle IO request from" - " master %s)\n", - real_path, source); - } - } - globfree(&glob_info); - } + char *ptr = (char *)params; + char tmp, *sep; + + while ((ptr = strstr(ptr, PARAM_MGSNODE)) != NULL) { + sep = strchr(ptr, ' '); + if (sep != NULL) { + tmp = *sep; + *sep = '\0'; + } + append_option(options, ptr); + mop->mo_have_mgsnid++; + if (sep) { + *sep = tmp; + ptr = sep; + } else { + break; + } + } - return rc; + return 0; } static int parse_ldd(char *source, struct mount_opts *mop, char *options) { struct lustre_disk_data *ldd = &mop->mo_ldd; + char *cur, *start; int rc; rc = osd_is_lustre(source, &ldd->ldd_mount_type); @@ -498,10 +328,91 @@ static int parse_ldd(char *source, struct mount_opts *mop, char *options) return ENODEV; } + rc = osd_read_ldd(source, ldd); + if (rc) { + fprintf(stderr, "%s: %s failed to read permanent mount" + " data: %s\n", progname, source, + rc >= 0 ? strerror(rc) : ""); + return rc; + } + + if (ldd->ldd_flags & LDD_F_NEED_INDEX) { + fprintf(stderr, "%s: %s has no index assigned " + "(probably formatted with old mkfs)\n", + progname, source); + return EINVAL; + } + + if (ldd->ldd_flags & LDD_F_UPGRADE14) { + fprintf(stderr, "%s: we cannot upgrade %s from this (very old) " + "Lustre version\n", progname, source); + return EINVAL; + } + + /* Since we never rewrite ldd, ignore temp flags */ + ldd->ldd_flags &= ~(LDD_F_VIRGIN | LDD_F_UPDATE | LDD_F_WRITECONF); + + /* svname of the form lustre:OST1234 means never registered */ + rc = strlen(ldd->ldd_svname); + if (ldd->ldd_svname[rc - 8] == ':') { + ldd->ldd_svname[rc - 8] = '-'; + ldd->ldd_flags |= LDD_F_VIRGIN; + } else if (ldd->ldd_svname[rc - 8] == '=') { + ldd->ldd_svname[rc - 8] = '-'; + ldd->ldd_flags |= LDD_F_WRITECONF; + } + /* backend osd type */ append_option(options, "osd="); strcat(options, mt_type(ldd->ldd_mount_type)); + append_option(options, ldd->ldd_mount_opts); + + if (!mop->mo_have_mgsnid) { + /* Only use disk data if mount -o mgsnode=nid wasn't + * specified */ + if (ldd->ldd_flags & LDD_F_SV_TYPE_MGS) { + append_option(options, "mgs"); + mop->mo_have_mgsnid++; + } else { + add_mgsnids(mop, options, ldd->ldd_params); + } + } + /* Better have an mgsnid by now */ + if (!mop->mo_have_mgsnid) { + fprintf(stderr, "%s: missing option mgsnode=\n", + progname); + return EINVAL; + } + + if (ldd->ldd_flags & LDD_F_VIRGIN) + append_option(options, "virgin"); + if (ldd->ldd_flags & LDD_F_WRITECONF) + append_option(options, "writeconf"); + if (ldd->ldd_flags & LDD_F_NO_PRIMNODE) + append_option(options, "noprimnode"); + + /* prefix every lustre parameter with param= so that in-kernel + * mount can recognize them properly and send to MGS at registration */ + start = ldd->ldd_params; + while (start && *start != '\0') { + while (*start == ' ') start++; + if (*start == '\0') + break; + cur = start; + start = strchr(cur, ' '); + if (start) { + *start = '\0'; + start++; + } + append_option(options, "param="); + strcat(options, cur); + } + + /* svname must be last option */ + append_option(options, "svname="); + strcat(options, ldd->ldd_svname); + return 0; } @@ -517,6 +428,7 @@ static void set_defaults(struct mount_opts *mop) mop->mo_have_mgsnid = 0; mop->mo_md_stripe_cache_size = 16384; mop->mo_orig_options = ""; + mop->mo_nosvc = 0; } static int parse_opts(int argc, char *const argv[], struct mount_opts *mop) @@ -584,8 +496,6 @@ static int parse_opts(int argc, char *const argv[], struct mount_opts *mop) * symbolic link for instance */ if (realpath(mop->mo_usource, real_path) != NULL) { - mop->mo_usource = strdup(real_path); - ptr = strrchr(real_path, '/'); if (ptr && strncmp(ptr, "/dm-", 4) == 0 && isdigit(*(ptr + 4))) { snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptr+1); @@ -598,11 +508,16 @@ static int parse_opts(int argc, char *const argv[], struct mount_opts *mop) fclose(f); } } + mop->mo_usource = strdup(real_path); } - mop->mo_source = convert_hostnames(mop->mo_usource); - if (!mop->mo_source) { - usage(stderr); + ptr = strstr(mop->mo_usource, ":/"); + if (ptr != NULL) { + mop->mo_source = convert_hostnames(mop->mo_usource); + if (!mop->mo_source) + usage(stderr); + } else { + mop->mo_source = strdup(mop->mo_usource); } if (realpath(argv[optind + 1], mop->mo_target) == NULL) { @@ -697,12 +612,13 @@ int main(int argc, char *const argv[]) printf("mounting device %s at %s, flags=%#x options=%s\n", mop.mo_source, mop.mo_target, flags, options); - if (!strstr(mop.mo_usource, ":/") && set_blockdev_tunables(mop.mo_source, 1)) { - if (verbose) - fprintf(stderr, "%s: unable to set tunables for %s" - " (may cause reduced IO performance)\n", - argv[0], mop.mo_source); - } + if (!strstr(mop.mo_usource, ":/") && + osd_tune_lustre(mop.mo_source, &mop)) { + if (verbose) + fprintf(stderr, "%s: unable to set tunables for %s" + " (may cause reduced IO performance)\n", + argv[0], mop.mo_source); + } if (!mop.mo_fake) { /* flags and target get to lustre_get_sb, but not @@ -744,6 +660,9 @@ int main(int argc, char *const argv[]) fprintf(stderr, "%s: mount %s at %s failed: %s\n", progname, mop.mo_usource, mop.mo_target, strerror(errno)); + if (errno == EBUSY) + fprintf(stderr, "Is the backend filesystem mounted?\n" + "Check /etc/mtab and /proc/mounts\n"); if (errno == ENODEV) fprintf(stderr, "Are the lustre modules loaded?\n" "Check /etc/modprobe.conf and " @@ -794,6 +713,15 @@ int main(int argc, char *const argv[]) } else if (!mop.mo_nomtab) { rc = update_mtab_entry(mop.mo_usource, mop.mo_target, "lustre", mop.mo_orig_options, 0,0,0); + + /* change label from : to - + * to indicate the device has been registered. + * only if the label is supposed to be changed and + * target service is supposed to start */ + if (mop.mo_ldd.ldd_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) { + if (mop.mo_nosvc == 0 ) + (void) osd_label_lustre(&mop); + } } free(options);