* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
+#include "mount_utils.h"
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include "obdctl.h"
#include <lustre_ver.h>
-#include <glob.h>
#include <ctype.h>
#include <limits.h>
-#include "mount_utils.h"
+#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 10, 51, 0)
+/*
+ * LU-1783
+ * We only #include a kernel level include file here because
+ * important MS_ flag #defines are missing from the SLES version
+ * of sys/mount.h
+ * In the future if SLES updates sys/mount.h to have a more complete
+ * set of flag #defines we should stop including linux/fs.h
+ */
+#warn remove kernel include
+#else
+#include <linux/fs.h>
+#endif
-#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
-#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
-#define STRIPE_CACHE_SIZE "md/stripe_cache_size"
#define MAXOPT 4096
#define MAX_RETRIES 99
int verbose = 0;
-int md_stripe_cache_size = 16384;
char *progname = NULL;
void usage(FILE *out)
* of param=value. We should pay attention not to remove those
* mount options, see bug 22097. */
if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) {
- md_stripe_cache_size = atoi(val + 1);
+ mop->mo_md_stripe_cache_size = atoi(val + 1);
} else if (val && strncmp(arg, "retry", 5) == 0) {
mop->mo_retry = atoi(val + 1);
if (mop->mo_retry > MAX_RETRIES)
mop->mo_retry = 0;
} else if (val && strncmp(arg, "mgssec", 6) == 0) {
append_option(options, opt);
+ } else if (strncmp(arg, "nosvc", 5) == 0) {
+ mop->mo_nosvc = 1;
+ append_option(options, opt);
} else if (strcmp(opt, "force") == 0) {
//XXX special check for 'force' option
++mop->mo_force;
}
}
#ifdef MS_STRICTATIME
- /* set strictatime to default if NOATIME or RELATIME
- not given explicit */
- if (!(*flagp & (MS_NOATIME | MS_RELATIME)))
- *flagp |= MS_STRICTATIME;
+#if LUSTRE_VERSION_CODE > OBD_OCD_VERSION(2, 10, 51, 0)
+/*
+ * LU-1783
+ * In the future when upstream fixes land in all supported kernels
+ * we should stop forcing MS_STRICTATIME in lustre mounts.
+ * We override the kernel level default of MS_RELATIME for now
+ * due to a kernel vfs level bug in atime updates that fails
+ * to reset timestamps from the future.
+ */
+#warn remove MS_STRICTATIME override
+#endif
+ /* set strictatime to default if NOATIME or RELATIME
+ not given explicit */
+ if (!(*flagp & (MS_NOATIME | MS_RELATIME)))
+ *flagp |= MS_STRICTATIME;
#endif
strcpy(orig_options, options);
free(options);
return 0;
}
-
-int read_file(char *path, char *buf, int size)
-{
- FILE *fd;
-
- fd = fopen(path, "r");
- if (fd == NULL)
- return errno;
-
- /* should not ignore fgets(3)'s return value */
- if (!fgets(buf, size, fd)) {
- fprintf(stderr, "reading from %s: %s", path, strerror(errno));
- fclose(fd);
- return 1;
- }
- fclose(fd);
- return 0;
-}
-
-int write_file(char *path, char *buf)
+/* Add mgsnids from ldd params */
+static int add_mgsnids(struct mount_opts *mop, char *options,
+ const char *params)
{
- FILE *fd;
-
- fd = fopen(path, "w");
- if (fd == NULL)
- return errno;
+ char *ptr = (char *)params;
+ char tmp, *sep;
+
+ while ((ptr = strstr(ptr, PARAM_MGSNODE)) != NULL) {
+ sep = strchr(ptr, ' ');
+ if (sep != NULL) {
+ tmp = *sep;
+ *sep = '\0';
+ }
+ append_option(options, ptr);
+ mop->mo_have_mgsnid++;
+ if (sep) {
+ *sep = tmp;
+ ptr = sep;
+ } else {
+ break;
+ }
+ }
- fputs(buf, fd);
- fclose(fd);
- return 0;
+ return 0;
}
-/* This is to tune the kernel for good SCSI performance.
- * For that we set the value of /sys/block/{dev}/queue/max_sectors_kb
- * to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */
-int set_blockdev_tunables(char *source, int fan_out)
+static int clear_update_ondisk(char *source, struct lustre_disk_data *ldd)
{
- glob_t glob_info = { 0 };
- struct stat stat_buf;
- char *chk_major, *chk_minor;
- char *savept = NULL, *dev;
- char *ret_path;
- char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
- char real_path[PATH_MAX] = {'\0'};
- int i, rc = 0;
- int major, minor;
-
- if (!source)
- return -EINVAL;
-
- ret_path = realpath(source, real_path);
- if (ret_path == NULL) {
- if (verbose)
- fprintf(stderr, "warning: %s: cannot resolve: %s\n",
- source, strerror(errno));
- return -EINVAL;
- }
-
- if (strncmp(real_path, "/dev/loop", 9) == 0)
- return 0;
-
- if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL))
- return 0;
-
- snprintf(path, sizeof(path), "/sys/block%s", real_path + 4);
- if (access(path, X_OK) == 0)
- goto set_params;
-
- /* The name of the device say 'X' specified in /dev/X may not
- * match any entry under /sys/block/. In that case we need to
- * match the major/minor number to find the entry under
- * sys/block corresponding to /dev/X */
-
- /* Don't chop tail digit on /dev/mapper/xxx, LU-478 */
- if (strncmp(real_path, "/dev/mapper", 11) != 0) {
- dev = real_path + strlen(real_path);
- while (--dev > real_path && isdigit(*dev))
- *dev = 0;
-
- if (strncmp(real_path, "/dev/md_", 8) == 0)
- *dev = 0;
- }
-
- rc = stat(real_path, &stat_buf);
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: %s, device %s stat failed\n",
- strerror(errno), real_path);
- return rc;
- }
-
- major = major(stat_buf.st_rdev);
- minor = minor(stat_buf.st_rdev);
- rc = glob("/sys/block/*", GLOB_NOSORT, NULL, &glob_info);
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: failed to read entries under "
- "/sys/block\n");
- globfree(&glob_info);
- return rc;
- }
-
- for (i = 0; i < glob_info.gl_pathc; i++){
- snprintf(path, sizeof(path), "%s/dev", glob_info.gl_pathv[i]);
-
- rc = read_file(path, buf, sizeof(buf));
- if (rc)
- continue;
-
- if (buf[strlen(buf) - 1] == '\n')
- buf[strlen(buf) - 1] = '\0';
-
- chk_major = strtok_r(buf, ":", &savept);
- chk_minor = savept;
- if (major == atoi(chk_major) &&minor == atoi(chk_minor))
- break;
- }
-
- if (i == glob_info.gl_pathc) {
- if (verbose)
- fprintf(stderr,"warning: device %s does not match any "
- "entry under /sys/block\n", real_path);
- globfree(&glob_info);
- return -EINVAL;
- }
-
- /* Chop off "/dev" from path we found */
- path[strlen(glob_info.gl_pathv[i])] = '\0';
- globfree(&glob_info);
-
-set_params:
- if (strncmp(real_path, "/dev/md", 7) == 0) {
- snprintf(real_path, sizeof(real_path), "%s/%s", path,
- STRIPE_CACHE_SIZE);
-
- rc = read_file(real_path, buf, sizeof(buf));
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- real_path, strerror(errno));
- return 0;
- }
-
- if (atoi(buf) >= md_stripe_cache_size)
- return 0;
-
- if (strlen(buf) - 1 > 0) {
- snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size);
- rc = write_file(real_path, buf);
- if (rc && verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- real_path, strerror(errno));
- }
- /* Return since raid and disk tunables are different */
- return rc;
- }
-
- snprintf(real_path, sizeof(real_path), "%s/%s", path,
- MAX_HW_SECTORS_KB_PATH);
- rc = read_file(real_path, buf, sizeof(buf));
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- real_path, strerror(errno));
- /* No MAX_HW_SECTORS_KB_PATH isn't necessary an
- * error for some device. */
- rc = 0;
- }
-
- if (strlen(buf) - 1 > 0) {
- snprintf(real_path, sizeof(real_path), "%s/%s", path,
- MAX_SECTORS_KB_PATH);
- rc = write_file(real_path, buf);
- if (rc) {
- if (verbose)
- fprintf(stderr, "warning: writing to %s: %s\n",
- real_path, strerror(errno));
- /* No MAX_SECTORS_KB_PATH isn't necessary an
- * error for some device. */
- rc = 0;
- }
- }
+ char always_mountopts[512] = "";
+ char default_mountopts[512] = "";
+ struct mkfs_opts mkop;
+ int ret;
+
+ memset(&mkop, 0, sizeof(mkop));
+ mkop.mo_ldd = *ldd;
+ mkop.mo_ldd.ldd_flags &= ~LDD_F_UPDATE;
+ if (strlen(source) > sizeof(mkop.mo_device)-1) {
+ fatal();
+ fprintf(stderr, "Device name too long: %s\n", source);
+ return -E2BIG;
+ }
+ strncpy(mkop.mo_device, source, sizeof(mkop.mo_device));
+
+ ret = osd_prepare_lustre(&mkop,
+ default_mountopts, sizeof(default_mountopts),
+ always_mountopts, sizeof(always_mountopts));
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Can't prepare device %s: %s\n",
+ source, strerror(ret));
+ return ret;
+ }
- if (fan_out) {
- char *slave = NULL;
- glob_info.gl_pathc = 0;
- glob_info.gl_offs = 0;
- /* if device is multipath device, tune its slave devices */
- snprintf(real_path, sizeof(real_path), "%s/slaves/*", path);
- rc = glob(real_path, GLOB_NOSORT, NULL, &glob_info);
-
- for (i = 0; rc == 0 && i < glob_info.gl_pathc; i++){
- slave = basename(glob_info.gl_pathv[i]);
- snprintf(real_path, sizeof(real_path), "/dev/%s", slave);
- rc = set_blockdev_tunables(real_path, 0);
- }
+ /* Create the loopback file */
+ if (mkop.mo_flags & MO_IS_LOOP) {
+ ret = access(mkop.mo_device, F_OK);
+ if (ret) {
+ ret = errno;
+ fatal();
+ fprintf(stderr, "Can't access device %s: %s\n",
+ source, strerror(ret));
+ return ret;
+ }
- if (rc == GLOB_NOMATCH) {
- /* no slave device is not an error */
- rc = 0;
- } else if (rc && verbose) {
- if (slave == NULL) {
- fprintf(stderr, "warning: %s, failed to read"
- " entries under %s/slaves\n",
- strerror(errno), path);
- } else {
- fprintf(stderr, "unable to set tunables for"
- " slave device %s (slave would be"
- " unable to handle IO request from"
- " master %s)\n",
- real_path, source);
- }
- }
- globfree(&glob_info);
- }
+ ret = loop_setup(&mkop);
+ if (ret) {
+ fatal();
+ fprintf(stderr, "Loop device setup for %s failed: %s\n",
+ mkop.mo_device, strerror(ret));
+ return ret;
+ }
+ }
+ ret = osd_write_ldd(&mkop);
+ if (ret != 0) {
+ fatal();
+ fprintf(stderr, "failed to write local files: %s\n",
+ strerror(ret));
+ }
+ loop_cleanup(&mkop);
- return rc;
+ return ret;
}
static int parse_ldd(char *source, struct mount_opts *mop, char *options)
{
struct lustre_disk_data *ldd = &mop->mo_ldd;
+ char *cur, *start;
int rc;
rc = osd_is_lustre(source, &ldd->ldd_mount_type);
return ENODEV;
}
+ rc = osd_read_ldd(source, ldd);
+ if (rc) {
+ fprintf(stderr, "%s: %s failed to read permanent mount"
+ " data: %s\n", progname, source,
+ rc >= 0 ? strerror(rc) : "");
+ return rc;
+ }
+
+ if ((IS_MDT(ldd) || IS_OST(ldd)) &&
+ (ldd->ldd_flags & LDD_F_NEED_INDEX)) {
+ fprintf(stderr, "%s: %s has no index assigned "
+ "(probably formatted with old mkfs)\n",
+ progname, source);
+ return EINVAL;
+ }
+
+ if (ldd->ldd_flags & LDD_F_UPGRADE14) {
+ fprintf(stderr, "%s: we cannot upgrade %s from this (very old) "
+ "Lustre version\n", progname, source);
+ return EINVAL;
+ }
+
+ if (ldd->ldd_flags & LDD_F_UPDATE)
+ clear_update_ondisk(source, ldd);
+
+ /* Since we never rewrite ldd, ignore temp flags */
+ ldd->ldd_flags &= ~(LDD_F_VIRGIN | LDD_F_WRITECONF);
+
+ /* svname of the form lustre:OST1234 means never registered */
+ rc = strlen(ldd->ldd_svname);
+ if (ldd->ldd_svname[rc - 8] == ':') {
+ ldd->ldd_svname[rc - 8] = '-';
+ ldd->ldd_flags |= LDD_F_VIRGIN;
+ } else if (ldd->ldd_svname[rc - 8] == '=') {
+ ldd->ldd_svname[rc - 8] = '-';
+ ldd->ldd_flags |= LDD_F_WRITECONF;
+ }
+
/* backend osd type */
append_option(options, "osd=");
strcat(options, mt_type(ldd->ldd_mount_type));
+ append_option(options, ldd->ldd_mount_opts);
+
+ if (!mop->mo_have_mgsnid) {
+ /* Only use disk data if mount -o mgsnode=nid wasn't
+ * specified */
+ if (ldd->ldd_flags & LDD_F_SV_TYPE_MGS) {
+ append_option(options, "mgs");
+ mop->mo_have_mgsnid++;
+ } else {
+ add_mgsnids(mop, options, ldd->ldd_params);
+ }
+ }
+ /* Better have an mgsnid by now */
+ if (!mop->mo_have_mgsnid) {
+ fprintf(stderr, "%s: missing option mgsnode=<nid>\n",
+ progname);
+ return EINVAL;
+ }
+
+ if (ldd->ldd_flags & LDD_F_VIRGIN)
+ append_option(options, "virgin");
+ if (ldd->ldd_flags & LDD_F_UPDATE)
+ append_option(options, "update");
+ if (ldd->ldd_flags & LDD_F_WRITECONF)
+ append_option(options, "writeconf");
+ if (ldd->ldd_flags & LDD_F_NO_PRIMNODE)
+ append_option(options, "noprimnode");
+
+ /* prefix every lustre parameter with param= so that in-kernel
+ * mount can recognize them properly and send to MGS at registration */
+ start = ldd->ldd_params;
+ while (start && *start != '\0') {
+ while (*start == ' ') start++;
+ if (*start == '\0')
+ break;
+ cur = start;
+ start = strchr(cur, ' ');
+ if (start) {
+ *start = '\0';
+ start++;
+ }
+ append_option(options, "param=");
+ strcat(options, cur);
+ }
+
+ /* svname must be last option */
+ append_option(options, "svname=");
+ strcat(options, ldd->ldd_svname);
+
return 0;
}
mop->mo_have_mgsnid = 0;
mop->mo_md_stripe_cache_size = 16384;
mop->mo_orig_options = "";
+ mop->mo_nosvc = 0;
}
static int parse_opts(int argc, char *const argv[], struct mount_opts *mop)
* symbolic link for instance
*/
if (realpath(mop->mo_usource, real_path) != NULL) {
- mop->mo_usource = strdup(real_path);
-
ptr = strrchr(real_path, '/');
if (ptr && strncmp(ptr, "/dm-", 4) == 0 && isdigit(*(ptr + 4))) {
snprintf(path, sizeof(path), "/sys/block/%s/dm/name", ptr+1);
fclose(f);
}
}
+ mop->mo_usource = strdup(real_path);
}
- mop->mo_source = convert_hostnames(mop->mo_usource);
- if (!mop->mo_source) {
- usage(stderr);
+ ptr = strstr(mop->mo_usource, ":/");
+ if (ptr != NULL) {
+ mop->mo_source = convert_hostnames(mop->mo_usource);
+ if (!mop->mo_source)
+ usage(stderr);
+ } else {
+ mop->mo_source = strdup(mop->mo_usource);
}
if (realpath(argv[optind + 1], mop->mo_target) == NULL) {
printf("mounting device %s at %s, flags=%#x options=%s\n",
mop.mo_source, mop.mo_target, flags, options);
- if (!strstr(mop.mo_usource, ":/") && set_blockdev_tunables(mop.mo_source, 1)) {
- if (verbose)
- fprintf(stderr, "%s: unable to set tunables for %s"
- " (may cause reduced IO performance)\n",
- argv[0], mop.mo_source);
- }
+ if (!strstr(mop.mo_usource, ":/") &&
+ osd_tune_lustre(mop.mo_source, &mop)) {
+ if (verbose)
+ fprintf(stderr, "%s: unable to set tunables for %s"
+ " (may cause reduced IO performance)\n",
+ argv[0], mop.mo_source);
+ }
if (!mop.mo_fake) {
/* flags and target get to lustre_get_sb, but not
for (i = 0, rc = -EAGAIN; i <= mop.mo_retry && rc != 0; i++) {
rc = mount(mop.mo_source, mop.mo_target, "lustre",
flags, (void *)options);
- if (rc) {
+ if (rc == 0) {
+ /* change label from <fsname>:<index> to
+ * <fsname>-<index> to indicate the device has
+ * been registered. only if the label is
+ * supposed to be changed and target service
+ * is supposed to start */
+ if (mop.mo_ldd.ldd_flags &
+ (LDD_F_VIRGIN | LDD_F_WRITECONF)) {
+ if (mop.mo_nosvc == 0)
+ (void)osd_label_lustre(&mop);
+ }
+ } else {
if (verbose) {
fprintf(stderr, "%s: mount %s at %s "
"failed: %s retries left: "
fprintf(stderr, "%s: mount %s at %s failed: %s\n", progname,
mop.mo_usource, mop.mo_target, strerror(errno));
+ if (errno == EBUSY)
+ fprintf(stderr, "Is the backend filesystem mounted?\n"
+ "Check /etc/mtab and /proc/mounts\n");
if (errno == ENODEV)
fprintf(stderr, "Are the lustre modules loaded?\n"
"Check /etc/modprobe.conf and "
} else if (!mop.mo_nomtab) {
rc = update_mtab_entry(mop.mo_usource, mop.mo_target, "lustre",
mop.mo_orig_options, 0,0,0);
- }
+ }
free(options);
/* mo_usource should be freed, but we can rely on the kernel */