/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Robert Read <rread@clusterfs.com>
- * Author: Nathan Rutman <nathan@clusterfs.com>
+ * GPL HEADER START
*
- * This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/utils/mount_lustre.c
+ *
+ * Author: Robert Read <rread@clusterfs.com>
+ * Author: Nathan Rutman <nathan@clusterfs.com>
*/
-
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mount.h>
#include <mntent.h>
#include <getopt.h>
-#include <sys/utsname.h>
#include "obdctl.h"
#include <lustre_ver.h>
#include <glob.h>
#include <ctype.h>
#include <limits.h>
+#include "mount_utils.h"
#define MAX_HW_SECTORS_KB_PATH "queue/max_hw_sectors_kb"
#define MAX_SECTORS_KB_PATH "queue/max_sectors_kb"
+#define STRIPE_CACHE_SIZE "md/stripe_cache_size"
+#define MAX_RETRIES 99
int verbose = 0;
int nomtab = 0;
int fake = 0;
int force = 0;
-static char *progname = NULL;
+int retry = 0;
+int md_stripe_cache_size = 2048;
+char *progname = NULL;
void usage(FILE *out)
{
"\t<filesystem>: name of the Lustre filesystem (e.g. lustre1)\n"
"\t<mountpt>: filesystem mountpoint (e.g. /mnt/lustre)\n"
"\t-f|--fake: fake mount (updates /etc/mtab)\n"
- "\t--force: force mount even if already in /etc/mtab\n"
+ "\t-o force|--force: force mount even if already in /etc/mtab\n"
"\t-h|--help: print this usage message\n"
"\t-n|--nomtab: do not update /etc/mtab after mount\n"
"\t-v|--verbose: print verbose config settings\n"
"\t<mntopt>: one or more comma separated of:\n"
"\t\t(no)flock,(no)user_xattr,(no)acl\n"
+ "\t\tabort_recov: abort server recovery handling\n"
"\t\tnosvc: only start MGC/MGS obds\n"
+ "\t\tnomgs: only start target obds, using existing MGS\n"
"\t\texclude=<ostname>[:<ostname>] : colon-separated list of "
"inactive OSTs (e.g. lustre-OST0001)\n"
+ "\t\tretry=<num>: number of times mount is retried by client\n"
+ "\t\tmd_stripe_cache_size=<num>: set the raid stripe cache "
+ "size for the underlying raid if present\n"
);
exit((out != stdout) ? EINVAL : 0);
}
-static int check_mtab_entry(char *spec, char *mtpt, char *type)
+static int check_mtab_entry(char *spec1, char *spec2, char *mtpt, char *type)
{
FILE *fp;
struct mntent *mnt;
return(0);
while ((mnt = getmntent(fp)) != NULL) {
- if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+ if ((strcmp(mnt->mnt_fsname, spec1) == 0 ||
+ strcmp(mnt->mnt_fsname, spec2) == 0) &&
strcmp(mnt->mnt_dir, mtpt) == 0 &&
strcmp(mnt->mnt_type, type) == 0) {
endmntent(fp);
lnet_nid_t nid;
converted = malloc(left);
+ if (converted == NULL) {
+ fprintf(stderr, "out of memory: needed %d bytes\n",
+ MAXNIDSTR);
+ return NULL;
+ }
c = converted;
while ((left > 0) && (*s1 != '/')) {
s2 = strpbrk(s1, ",:");
{ "nouser", 1, 0 }, /* Forbid ordinary user to mount */
{ "noowner", 1, 0 }, /* Device owner has no special privs */
{ "_netdev", 0, 0 }, /* Device accessible only via network */
+ { "loop", 0, 0 },
{ NULL, 0, 0 }
};
/****************************************************************************/
return 0;
}
+static void append_option(char *options, const char *one)
+{
+ if (*options)
+ strcat(options, ",");
+ strcat(options, one);
+}
+
/* Replace options with subset of Lustre-specific options, and
fill in mount flags */
int parse_options(char *orig_options, int *flagp)
{
- char *options, *opt, *nextopt;
+ char *options, *opt, *nextopt, *arg, *val;
options = calloc(strlen(orig_options) + 1, 1);
*flagp = 0;
if (!*opt)
/* empty option */
continue;
- if (parse_one_option(opt, flagp) == 0) {
+
+ /* Handle retries in a slightly different
+ * manner */
+ arg = opt;
+ val = strchr(opt, '=');
+ /* please note that some ldiskfs mount options are also in the form
+ * of param=value. We should pay attention not to remove those
+ * mount options, see bug 22097. */
+ if (val && strncmp(arg, "md_stripe_cache_size", 20) == 0) {
+ md_stripe_cache_size = atoi(val + 1);
+ } else if (val && strncmp(arg, "retry", 5) == 0) {
+ retry = atoi(val + 1);
+ if (retry > MAX_RETRIES)
+ retry = MAX_RETRIES;
+ else if (retry < 0)
+ retry = 0;
+ } else if (val && strncmp(arg, "mgssec", 6) == 0) {
+ append_option(options, opt);
+ } else if (strncmp(opt, "force", 5) == 0) {
+ //XXX special check for 'force' option
+ ++force;
+ printf("force: %d\n", force);
+ } else if (parse_one_option(opt, flagp) == 0) {
/* pass this on as an option */
- if (*options)
- strcat(options, ",");
- strcat(options, opt);
+ append_option(options, opt);
}
}
strcpy(orig_options, options);
if (fd == NULL)
return errno;
- fgets(buf, size, fd);
+ /* should not ignore fgets(3)'s return value */
+ if (!fgets(buf, size, fd)) {
+ fprintf(stderr, "reading from %s: %s", path, strerror(errno));
+ fclose(fd);
+ return 1;
+ }
fclose(fd);
return 0;
}
/* This is to tune the kernel for good SCSI performance.
* For that we set the value of /sys/block/{dev}/queue/max_sectors_kb
* to the value of /sys/block/{dev}/queue/max_hw_sectors_kb */
-int set_tunables(char *source, int src_len)
+int set_blockdev_tunables(char *source)
{
glob_t glob_info;
struct stat stat_buf;
char *chk_major, *chk_minor;
- char *savept, *dev, *s2 = 0;
+ char *savept, *dev;
char *ret_path;
char buf[PATH_MAX] = {'\0'}, path[PATH_MAX] = {'\0'};
char real_path[PATH_MAX] = {'\0'};
ret_path = realpath(source, real_path);
if (ret_path == NULL) {
if (verbose)
- fprintf(stderr, "warning: %s: cannot resolve: %s",
+ fprintf(stderr, "warning: %s: cannot resolve: %s\n",
source, strerror(errno));
return -EINVAL;
}
- src_len = sizeof(real_path);
-
if (strncmp(real_path, "/dev/loop", 9) == 0)
return 0;
- if ((real_path[0] != '/') && ((s2 = strpbrk(real_path, ",:")) != NULL))
+ if ((real_path[0] != '/') && (strpbrk(real_path, ",:") != NULL))
return 0;
- dev = real_path + src_len - 1;
- while (dev > real_path && (*dev != '/')) {
- if (isdigit(*dev))
- *dev = 0;
- dev--;
- }
- snprintf(path, sizeof(path), "/sys/block%s/%s", dev,
- MAX_HW_SECTORS_KB_PATH);
- rc = read_file(path, buf, sizeof(buf));
- if (rc == 0 && (strlen(buf) - 1) > 0) {
- snprintf(path, sizeof(path), "/sys/block%s/%s", dev,
- MAX_SECTORS_KB_PATH);
- rc = write_file(path, buf);
- if (rc && verbose)
- fprintf(stderr, "warning: opening %s: %s\n",
- path, strerror(errno));
- return rc;
- }
-
- if (rc != ENOENT)
- return rc;
+ snprintf(path, sizeof(path), "/sys/block%s", real_path + 4);
+ if (access(path, X_OK) == 0)
+ goto set_params;
/* The name of the device say 'X' specified in /dev/X may not
* match any entry under /sys/block/. In that case we need to
* match the major/minor number to find the entry under
* sys/block corresponding to /dev/X */
- dev = real_path + src_len - 1;
- while (dev > real_path) {
- if (isdigit(*dev))
- *dev = 0;
- dev--;
- }
+ dev = real_path + strlen(real_path);
+ while (--dev > real_path && isdigit(*dev))
+ *dev = 0;
- rc = stat(dev, &stat_buf);
+ if (strncmp(real_path, "/dev/md_", 8) == 0)
+ *dev = 0;
+
+ rc = stat(real_path, &stat_buf);
if (rc) {
if (verbose)
fprintf(stderr, "warning: %s, device %s stat failed\n",
- strerror(errno), dev);
+ strerror(errno), real_path);
return rc;
}
if (verbose)
fprintf(stderr,"warning: device %s does not match any "
"entry under /sys/block\n", real_path);
- rc = -EINVAL;
- goto out;
+ globfree(&glob_info);
+ return -EINVAL;
+ }
+
+ /* Chop off "/dev" from path we found */
+ path[strlen(glob_info.gl_pathv[i])] = '\0';
+ globfree(&glob_info);
+
+set_params:
+ if (strncmp(real_path, "/dev/md", 7) == 0) {
+ snprintf(real_path, sizeof(real_path), "%s/%s", path,
+ STRIPE_CACHE_SIZE);
+
+ rc = read_file(real_path, buf, sizeof(buf));
+ if (rc) {
+ if (verbose)
+ fprintf(stderr, "warning: opening %s: %s\n",
+ real_path, strerror(errno));
+ return rc;
+ }
+
+ if (atoi(buf) >= md_stripe_cache_size)
+ return 0;
+
+ if (strlen(buf) - 1 > 0) {
+ snprintf(buf, sizeof(buf), "%d", md_stripe_cache_size);
+ rc = write_file(real_path, buf);
+ if (rc && verbose)
+ fprintf(stderr, "warning: opening %s: %s\n",
+ real_path, strerror(errno));
+ }
+ /* Return since raid and disk tunables are different */
+ return rc;
}
- snprintf(path, sizeof(path), "%s/%s", glob_info.gl_pathv[i],
+ snprintf(real_path, sizeof(real_path), "%s/%s", path,
MAX_HW_SECTORS_KB_PATH);
- rc = read_file(path, buf, sizeof(buf));
+ rc = read_file(real_path, buf, sizeof(buf));
if (rc) {
if (verbose)
fprintf(stderr, "warning: opening %s: %s\n",
- path, strerror(errno));
- goto out;
+ real_path, strerror(errno));
+ return rc;
}
if (strlen(buf) - 1 > 0) {
- snprintf(path, sizeof(path), "%s/%s",
- glob_info.gl_pathv[i], MAX_SECTORS_KB_PATH);
- rc = write_file(path, buf);
+ snprintf(real_path, sizeof(real_path), "%s/%s", path,
+ MAX_SECTORS_KB_PATH);
+ rc = write_file(real_path, buf);
if (rc && verbose)
fprintf(stderr, "warning: writing to %s: %s\n",
- path, strerror(errno));
+ real_path, strerror(errno));
}
-
-out:
- globfree(&glob_info);
return rc;
}
}
usource = argv[optind];
+ if (!usource) {
+ usage(stderr);
+ }
+
source = convert_hostnames(usource);
+ if (!source) {
+ usage(stderr);
+ }
+
target = argv[optind + 1];
ptr = target + strlen(target) - 1;
while ((ptr > target) && (*ptr == '/')) {
ptr--;
}
- if (!usource || !source) {
- usage(stderr);
- }
-
if (verbose) {
for (i = 0; i < argc; i++)
printf("arg[%d] = %s\n", i, argv[i]);
- printf("source = %s (%s), target = %s\n", usource, source, target);
+ printf("source = %s (%s), target = %s\n", usource, source,
+ target);
printf("options = %s\n", orig_options);
}
options = malloc(strlen(orig_options) + 1);
+ if (options == NULL) {
+ fprintf(stderr, "can't allocate memory for options\n");
+ return -1;
+ }
strcpy(options, orig_options);
rc = parse_options(options, &flags);
if (rc) {
}
if (!force) {
- rc = check_mtab_entry(usource, target, "lustre");
+ rc = check_mtab_entry(usource, source, target, "lustre");
if (rc && !(flags & MS_REMOUNT)) {
fprintf(stderr, "%s: according to %s %s is "
"already mounted on %s\n",
functions. So we'll stick it on the end of the options. */
optlen = strlen(options) + strlen(",device=") + strlen(source) + 1;
optcopy = malloc(optlen);
+ if (optcopy == NULL) {
+ fprintf(stderr, "can't allocate memory to optcopy\n");
+ return -1;
+ }
strcpy(optcopy, options);
if (*optcopy)
strcat(optcopy, ",");
printf("mounting device %s at %s, flags=%#x options=%s\n",
source, target, flags, optcopy);
- if (set_tunables(source, strlen(source)) && verbose)
- fprintf(stderr, "%s: unable to set tunables for %s"
- " (may cause reduced IO performance)",
+ if (!strstr(usource, ":/") && set_blockdev_tunables(source)) {
+ if (verbose)
+ fprintf(stderr, "%s: unable to set tunables for %s"
+ " (may cause reduced IO performance)\n",
argv[0], source);
+ }
+
+ register_service_tags(usource, source, target);
- if (!fake)
+ if (!fake) {
/* flags and target get to lustre_get_sb, but not
lustre_fill_super. Lustre ignores the flags, but mount
does not. */
- rc = mount(source, target, "lustre", flags, (void *)optcopy);
+ for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) {
+ rc = mount(source, target, "lustre", flags,
+ (void *)optcopy);
+ if (rc) {
+ if (verbose) {
+ fprintf(stderr, "%s: mount %s at %s "
+ "failed: %s retries left: "
+ "%d\n", basename(progname),
+ usource, target,
+ strerror(errno), retry-i);
+ }
+
+ if (retry) {
+ sleep(1 << max((i/2), 5));
+ }
+ else {
+ rc = errno;
+ }
+ }
+ }
+ }
if (rc) {
char *cli;
/* May as well try to clean up loop devs */
if (strncmp(usource, "/dev/loop", 9) == 0) {
char cmd[256];
+ int ret;
sprintf(cmd, "/sbin/losetup -d %s", usource);
- system(cmd);
+ if ((ret = system(cmd)) < 0)
+ rc = errno;
+ else if (ret > 0)
+ rc = WEXITSTATUS(ret);
}
} else if (!nomtab) {